diff options
author | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-30 13:26:22 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-30 15:44:45 +0300 |
commit | 0a98fece5a9b54f16afeb3a94b3eb3105e9c3962 (patch) | |
tree | 291d72dbd7e9865399f668c84d11ed86fb190bbf /library/cpp/on_disk/st_hash | |
parent | cb2c8d75065e5b3c47094067cb4aa407d4813298 (diff) | |
download | ydb-0a98fece5a9b54f16afeb3a94b3eb3105e9c3962.tar.gz |
YQ Connector:Use docker-compose in integrational tests
Diffstat (limited to 'library/cpp/on_disk/st_hash')
-rw-r--r-- | library/cpp/on_disk/st_hash/fake.cpp | 4 | ||||
-rw-r--r-- | library/cpp/on_disk/st_hash/save_stl.h | 84 | ||||
-rw-r--r-- | library/cpp/on_disk/st_hash/static_hash.h | 420 | ||||
-rw-r--r-- | library/cpp/on_disk/st_hash/static_hash_map.h | 59 | ||||
-rw-r--r-- | library/cpp/on_disk/st_hash/sthash_iterators.h | 334 | ||||
-rw-r--r-- | library/cpp/on_disk/st_hash/ya.make | 15 |
6 files changed, 916 insertions, 0 deletions
diff --git a/library/cpp/on_disk/st_hash/fake.cpp b/library/cpp/on_disk/st_hash/fake.cpp new file mode 100644 index 0000000000..ef5af4d432 --- /dev/null +++ b/library/cpp/on_disk/st_hash/fake.cpp @@ -0,0 +1,4 @@ +#include "save_stl.h" +#include "static_hash.h" +#include "static_hash_map.h" +#include "sthash_iterators.h" diff --git a/library/cpp/on_disk/st_hash/save_stl.h b/library/cpp/on_disk/st_hash/save_stl.h new file mode 100644 index 0000000000..00f8f0e20d --- /dev/null +++ b/library/cpp/on_disk/st_hash/save_stl.h @@ -0,0 +1,84 @@ +#pragma once + +#include <util/generic/hash.h> +#include <util/system/yassert.h> +#include <util/stream/output.h> + +// this structure might be replaced with sthashtable class +template <class HF, class Eq, class size_type> +struct sthashtable_nvm_sv { + sthashtable_nvm_sv() { + if (sizeof(sthashtable_nvm_sv) != sizeof(HF) + sizeof(Eq) + 3 * sizeof(size_type)) { + memset(this, 0, sizeof(sthashtable_nvm_sv)); + } + } + + sthashtable_nvm_sv(const HF& phf, const Eq& peq, const size_type& pnb, const size_type& pne, const size_type& pnd) + : sthashtable_nvm_sv() + { + hf = phf; + eq = peq; + num_buckets = pnb; + num_elements = pne; + data_end_off = pnd; + } + + HF hf; + Eq eq; + size_type num_buckets; + size_type num_elements; + size_type data_end_off; +}; + +/** + * Some hack to save both THashMap and sthash. + * Working with stHash does not depend on the template parameters, because the content of stHash is not used inside this method. + */ +template <class V, class K, class HF, class Ex, class Eq, class A> +template <class KeySaver> +inline int THashTable<V, K, HF, Ex, Eq, A>::save_for_st(IOutputStream* stream, KeySaver& ks, sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* stHash) const { + Y_ASSERT(!stHash || stHash->bucket_count() == bucket_count()); + typedef sthashtable_nvm_sv<HF, Eq, typename KeySaver::TSizeType> sv_type; + sv_type sv = {this->_get_hash_fun(), this->_get_key_eq(), static_cast<typename KeySaver::TSizeType>(buckets.size()), static_cast<typename KeySaver::TSizeType>(num_elements), 0}; + // to do: m.b. use just the size of corresponding object? + typename KeySaver::TSizeType cur_off = sizeof(sv_type) + + (sv.num_buckets + 1) * sizeof(typename KeySaver::TSizeType); + sv.data_end_off = cur_off; + const_iterator n; + for (n = begin(); n != end(); ++n) { + sv.data_end_off += static_cast<typename KeySaver::TSizeType>(ks.GetRecordSize(*n)); + } + typename KeySaver::TSizeType* sb = stHash ? (typename KeySaver::TSizeType*)(stHash->buckets()) : nullptr; + if (stHash) + sv.data_end_off += static_cast<typename KeySaver::TSizeType>(sb[buckets.size()] - sb[0]); + //saver.Align(sizeof(char*)); + stream->Write(&sv, sizeof(sv)); + + size_type i; + //save vector + for (i = 0; i < buckets.size(); ++i) { + node* cur = buckets[i]; + stream->Write(&cur_off, sizeof(cur_off)); + if (cur) { + while (!((uintptr_t)cur & 1)) { + cur_off += static_cast<typename KeySaver::TSizeType>(ks.GetRecordSize(cur->val)); + cur = cur->next; + } + } + if (stHash) + cur_off += static_cast<typename KeySaver::TSizeType>(sb[i + 1] - sb[i]); + } + stream->Write(&cur_off, sizeof(cur_off)); // end mark + for (i = 0; i < buckets.size(); ++i) { + node* cur = buckets[i]; + if (cur) { + while (!((uintptr_t)cur & 1)) { + ks.SaveRecord(stream, cur->val); + cur = cur->next; + } + } + if (stHash) + stream->Write((const char*)stHash + sb[i], sb[i + 1] - sb[i]); + } + return 0; +} diff --git a/library/cpp/on_disk/st_hash/static_hash.h b/library/cpp/on_disk/st_hash/static_hash.h new file mode 100644 index 0000000000..ca7a6ccd36 --- /dev/null +++ b/library/cpp/on_disk/st_hash/static_hash.h @@ -0,0 +1,420 @@ +#pragma once + +#include "save_stl.h" +#include "sthash_iterators.h" + +#include <util/generic/hash.h> +#include <util/generic/vector.h> +#include <util/generic/buffer.h> +#include <util/generic/cast.h> +#include <util/generic/yexception.h> // for save/load only +#include <util/stream/file.h> +#include <util/stream/buffer.h> +#include <utility> + +#include <memory> +#include <algorithm> +#include <functional> + +#include <cstdlib> +#include <cstddef> + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4624) // 'destructor could not be generated because a base class destructor is inaccessible' +#endif + +template <class HashType, class KeySaver> +inline void SaveHashToStreamEx(HashType& hash, IOutputStream* stream) { + KeySaver ks; + if (hash.save_for_st(stream, ks)) + ythrow yexception() << "Could not save hash to stream"; +} + +template <class HashType> +inline void SaveHashToStream(HashType& hash, IOutputStream* stream) { + typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; + return SaveHashToStreamEx<HashType, KeySaver>(hash, stream); +} + +template <class HashType, class KeySaver> +inline void SaveHashToFileEx(HashType& hash, const char* fileName) { + TFileOutput output(fileName); + SaveHashToStreamEx<HashType, KeySaver>(hash, &output); +} + +template <class HashType> +inline void SaveHashToFile(HashType& hash, const char* fileName) { + typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; + return SaveHashToFileEx<HashType, KeySaver>(hash, fileName); +} + +template <class HashType> +inline void SaveHashSetToFile(HashType& hash, const char* fileName) { + typedef TSthashSetWriter<typename HashType::key_type, ui64> KeySaver; + return SaveHashToFileEx<HashType, KeySaver>(hash, fileName); +} + +template <class HashType> +inline void SaveHashToFile32(HashType& hash, const char* fileName) { + typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui32> KeySaver; + return SaveHashToFileEx<HashType, KeySaver>(hash, fileName); +} + +template <class HashType, class KeySaver> +inline void SaveHashToBufferEx(HashType& hash, TBuffer& buffer, sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* stHash = nullptr) { + TBufferOutput stream(buffer); + KeySaver ks; + if (hash.save_for_st(&stream, ks, stHash)) + ythrow yexception() << "Could not save hash to memory"; +} + +template <class HashType> +inline void SaveHashToBuffer(HashType& hash, TBuffer& buffer) { + typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; + SaveHashToBufferEx<HashType, KeySaver>(hash, buffer); +} + +/** + * Some hack to save both THashMap and sthash. + * THashMap and sthash must have same bucket_count(). + */ +template <class HashType, class StHashType> +inline void SaveHashToBuffer(HashType& hash, TBuffer& buffer, StHashType* stHash) { + typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; + typedef sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* SH; + + SH sh = reinterpret_cast<SH>(stHash); + SaveHashToBufferEx<HashType, KeySaver>(hash, buffer, sh); +} + +template <class HashType> +inline void SaveHashToBuffer32(HashType& hash, TBuffer& buffer) { + typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui32> KeySaver; + SaveHashToBufferEx<HashType, KeySaver>(hash, buffer); +} + +template <class Iter, typename size_type_f = ui64> +class sthashtable { +public: + typedef typename Iter::TKeyType key_type; + typedef typename Iter::TValueType value_type; + typedef typename Iter::THasherType hasher; + typedef typename Iter::TKeyEqualType key_equal; + + typedef size_type_f size_type; + typedef ptrdiff_t difference_type; + typedef const value_type* const_pointer; + typedef const value_type& const_reference; + + typedef Iter const_iterator; + + const hasher hash_funct() const { + return hash; + } + const key_equal key_eq() const { + return equals; + } + +private: + const hasher hash; + const key_equal equals; + +private: + const_iterator iter_at_bucket(size_type bucket) const { + return (const_iterator)(((char*)this + buckets()[bucket])); + } + + const_iterator iter_at_bucket_or_end(size_type bucket) const { + if (bucket < num_buckets) + return (const_iterator)(((char*)this + buckets()[bucket])); + else + return end(); + } + + const size_type num_buckets; + const size_type num_elements; + const size_type data_end_off; + +protected: //shut up gcc warning + // we can't construct/destroy this object at all! + sthashtable(); + sthashtable(const sthashtable& ht); + ~sthashtable(); + +public: + // const size_type *buckets; + const size_type* buckets() const { + return (size_type*)((char*)this + sizeof(*this)); + } + const size_type buckets(size_type n) const { + return buckets()[n]; + } + + size_type size() const { + return num_elements; + } + size_type max_size() const { + return size_type(-1); + } + bool empty() const { + return size() == 0; + } + + const_iterator begin() const { + return num_buckets ? iter_at_bucket(0) : end(); + } + + const_iterator end() const { + return (const_iterator)(((char*)this + data_end_off)); + } + +public: + size_type size_in_bytes() const { + return data_end_off; + } + + size_type bucket_count() const { + return num_buckets; + } + + size_type elems_in_bucket(size_type bucket) const { + size_type result = 0; + const_iterator first = iter_at_bucket(bucket); + const_iterator last = iter_at_bucket_or_end(bucket + 1); + + for (; first != last; ++first) + ++result; + return result; + } + + template <class TheKey> + const_iterator find(const TheKey& key) const { + size_type n = bkt_num_key(key); + const_iterator first(iter_at_bucket(n)), last(iter_at_bucket_or_end(n + 1)); + for (; + first != last && !first.KeyEquals(equals, key); + ++first) { + } + if (first != last) + return first; + return end(); + } + + size_type count(const key_type& key) const { + const size_type n = bkt_num_key(key); + size_type result = 0; + const_iterator first = iter_at_bucket(n); + const_iterator last = iter_at_bucket_or_end(n + 1); + + for (; first != last; ++first) + if (first.KeyEquals(equals, key)) + ++result; + return result; + } + + std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const; + +private: + template <class TheKey> + size_type bkt_num_key(const TheKey& key) const { + return hash(key) % num_buckets; + } +}; + +template <class I, class size_type_f> +std::pair<I, I> sthashtable<I, size_type_f>::equal_range(const key_type& key) const { + typedef std::pair<const_iterator, const_iterator> pii; + const size_type n = bkt_num_key(key); + const_iterator first = iter_at_bucket(n); + const_iterator last = iter_at_bucket_or_end(n + 1); + + for (; first != last; ++first) { + if (first.KeyEquals(equals, key)) { + const_iterator cur = first; + ++cur; + for (; cur != last; ++cur) + if (!cur.KeyEquals(equals, key)) + return pii(const_iterator(first), + const_iterator(cur)); + return pii(const_iterator(first), + const_iterator(last)); + } + } + return pii(end(), end()); +} + +/* end __SGI_STL_HASHTABLE_H */ + +template <class Key, class T, class HashFcn /*= hash<Key>*/, + class EqualKey = TEqualTo<Key>, typename size_type_f = ui64> +class sthash { +private: + typedef sthashtable<TSthashIterator<const Key, const T, HashFcn, EqualKey>, size_type_f> ht; + ht rep; + +public: + typedef typename ht::key_type key_type; + typedef typename ht::value_type value_type; + typedef typename ht::hasher hasher; + typedef typename ht::key_equal key_equal; + typedef T mapped_type; + + typedef typename ht::size_type size_type; + typedef typename ht::difference_type difference_type; + typedef typename ht::const_pointer const_pointer; + typedef typename ht::const_reference const_reference; + + typedef typename ht::const_iterator const_iterator; + + const hasher hash_funct() const { + return rep.hash_funct(); + } + const key_equal key_eq() const { + return rep.key_eq(); + } + +public: + size_type size() const { + return rep.size(); + } + size_type max_size() const { + return rep.max_size(); + } + bool empty() const { + return rep.empty(); + } + + const_iterator begin() const { + return rep.begin(); + } + const_iterator end() const { + return rep.end(); + } + +public: + template <class TheKey> + const_iterator find(const TheKey& key) const { + return rep.find(key); + } + template <class TheKey> + bool has(const TheKey& key) const { + return rep.find(key) != rep.end(); + } + + size_type count(const key_type& key) const { + return rep.count(key); + } + + std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const { + return rep.equal_range(key); + } + + size_type size_in_bytes() const { + return rep.size_in_bytes(); + } + + size_type bucket_count() const { + return rep.bucket_count(); + } + size_type max_bucket_count() const { + return rep.max_bucket_count(); + } + size_type elems_in_bucket(size_type n) const { + return rep.elems_in_bucket(n); + } + + const size_type* buckets() const { + return rep.buckets(); + } + const size_type buckets(size_type n) const { + return rep.buckets()[n]; + } +}; + +template <class Key, class HashFcn, + class EqualKey = TEqualTo<Key>, typename size_type_f = ui64> +class sthash_set: public sthash<Key, TEmptyValue, HashFcn, EqualKey, size_type_f> { + typedef sthash<Key, TEmptyValue, HashFcn, EqualKey, size_type_f> Base; + +public: + using Base::const_iterator; + using Base::hasher; + using Base::key_equal; + using Base::key_type; + using Base::size_type; + using Base::value_type; +}; + +template <class Key, class T, class HashFcn /*= hash<Key>*/, + class EqualKey = TEqualTo<Key>, typename size_type_f = ui64> +class sthash_mm { +private: + typedef sthashtable<TSthashIterator<const Key, T, HashFcn, EqualKey>, size_type_f> ht; + ht rep; + +public: + typedef typename ht::key_type key_type; + typedef typename ht::value_type value_type; + typedef typename ht::hasher hasher; + typedef typename ht::key_equal key_equal; + typedef T mapped_type; + + typedef typename ht::size_type size_type; + typedef typename ht::difference_type difference_type; + typedef typename ht::const_pointer const_pointer; + typedef typename ht::const_reference const_reference; + + typedef typename ht::const_iterator const_iterator; + + const hasher hash_funct() const { + return rep.hash_funct(); + } + const key_equal key_eq() const { + return rep.key_eq(); + } + +public: + size_type size() const { + return rep.size(); + } + size_type max_size() const { + return rep.max_size(); + } + bool empty() const { + return rep.empty(); + } + + const_iterator begin() const { + return rep.begin(); + } + const_iterator end() const { + return rep.end(); + } + + const_iterator find(const key_type& key) const { + return rep.find(key); + } + + size_type count(const key_type& key) const { + return rep.count(key); + } + + std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const { + return rep.equal_range(key); + } + + size_type bucket_count() const { + return rep.bucket_count(); + } + size_type max_bucket_count() const { + return rep.max_bucket_count(); + } + size_type elems_in_bucket(size_type n) const { + return rep.elems_in_bucket(n); + } +}; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif diff --git a/library/cpp/on_disk/st_hash/static_hash_map.h b/library/cpp/on_disk/st_hash/static_hash_map.h new file mode 100644 index 0000000000..5dc50abd39 --- /dev/null +++ b/library/cpp/on_disk/st_hash/static_hash_map.h @@ -0,0 +1,59 @@ +#pragma once + +#include "static_hash.h" + +#include <library/cpp/deprecated/mapped_file/mapped_file.h> + +#include <util/system/filemap.h> + +template <class SH> +struct sthash_mapped_c { + typedef SH H; + typedef typename H::const_iterator const_iterator; + TMappedFile M; + H* hsh; + sthash_mapped_c() + : M() + , hsh(nullptr) + { + } + sthash_mapped_c(const char* fname, bool precharge) + : M() + , hsh(nullptr) + { + Open(fname, precharge); + } + void Open(const char* fname, bool precharge) { + M.init(fname); + if (precharge) + M.precharge(); + hsh = (H*)M.getData(); + if (M.getSize() < sizeof(H) || (ssize_t)M.getSize() != hsh->end().Data - (char*)hsh) + ythrow yexception() << "Could not map hash: " << fname << " is damaged"; + } + H* operator->() { + return hsh; + } + const H* operator->() const { + return hsh; + } + H* GetSthash() { + return hsh; + } + const H* GetSthash() const { + return hsh; + } +}; + +template <class Key, class T, class Hash> +struct sthash_mapped: public sthash_mapped_c<sthash<Key, T, Hash>> { + typedef sthash<Key, T, Hash> H; + sthash_mapped(const char* fname, bool precharge) + : sthash_mapped_c<H>(fname, precharge) + { + } + sthash_mapped() + : sthash_mapped_c<H>() + { + } +}; diff --git a/library/cpp/on_disk/st_hash/sthash_iterators.h b/library/cpp/on_disk/st_hash/sthash_iterators.h new file mode 100644 index 0000000000..6a9ebdd6c3 --- /dev/null +++ b/library/cpp/on_disk/st_hash/sthash_iterators.h @@ -0,0 +1,334 @@ +#pragma once + +#include "save_stl.h" + +#include <util/system/align.h> + +/** + This file provides functionality for saving some relatively simple THashMap object + to disk in a form that can be mapped read-only (via mmap) at any address. + That saved object is accessed via pointer to sthash object (that must have + the same parameters as original THashMap object) + + If either key or value are variable-sized (i.e. contain pointers), user must + write his own instantiation of TSthashIterator (read iterator for sthash) and + TSthashWriter (write iterator for THashMap). + An example for <const char *, B> pair is in here. +**/ + +// TEmptyValue and SizeOfEx are helpers for sthash_set +struct TEmptyValue { + TEmptyValue() = default; +}; + +template <class T> +inline size_t SizeOfEx() { + return sizeof(T); +} + +template <> +inline size_t SizeOfEx<TEmptyValue>() { + return 0; +} +template <> +inline size_t SizeOfEx<const TEmptyValue>() { + return 0; +} + +template <class TKey, class TValue, class HashFcn, class EqualKey> +struct TSthashIterator { + // Implementation for simple types + typedef const TKey TKeyType; + typedef const TValue TValueType; + typedef EqualKey TKeyEqualType; + typedef HashFcn THasherType; + + const char* Data; + TSthashIterator() + : Data(nullptr) + { + } + explicit TSthashIterator(const char* data) + : Data(data) + { + } + void operator++() { + Data += GetLength(); + } + + bool operator!=(const TSthashIterator& that) const { + return Data != that.Data; + } + bool operator==(const TSthashIterator& that) const { + return Data == that.Data; + } + TKey& Key() const { + return *(TKey*)Data; + } + TValue& Value() { + return *(TValue*)(Data + sizeof(TKey)); + } + const TValue& Value() const { + return *(const TValue*)(Data + sizeof(TKey)); + } + + template <class AnotherKeyType> + bool KeyEquals(const EqualKey& eq, const AnotherKeyType& key) const { + return eq(*(TKey*)Data, key); + } + + size_t GetLength() const { + return sizeof(TKey) + SizeOfEx<TValue>(); + } +}; + +template <class Key, class Value, typename size_type_o = ui64> +struct TSthashWriter { + typedef size_type_o TSizeType; + size_t GetRecordSize(const std::pair<const Key, const Value>&) const { + return sizeof(Key) + SizeOfEx<Value>(); + } + int SaveRecord(IOutputStream* stream, const std::pair<const Key, const Value>& record) const { + stream->Write(&record.first, sizeof(Key)); + stream->Write(&record.second, SizeOfEx<Value>()); + return 0; + } +}; + +// Remember that this simplified implementation makes a copy of `key' in std::make_pair. +// It can also waste some memory on undesired alignment. +template <class Key, typename size_type_o = ui64> +struct TSthashSetWriter: public TSthashWriter<Key, TEmptyValue, size_type_o> { + typedef TSthashWriter<Key, TEmptyValue, size_type_o> MapWriter; + size_t GetRecordSize(const Key& key) const { + return MapWriter::GetRecordSize(std::make_pair(key, TEmptyValue())); + } + int SaveRecord(IOutputStream* stream, const Key& key) const { + return MapWriter::SaveRecord(stream, std::make_pair(key, TEmptyValue())); + } +}; + +// we can't save something with pointers without additional tricks + +template <class A, class B, class HashFcn, class EqualKey> +struct TSthashIterator<A*, B, HashFcn, EqualKey> {}; + +template <class A, class B, class HashFcn, class EqualKey> +struct TSthashIterator<A, B*, HashFcn, EqualKey> {}; + +template <class A, class B, typename size_type_o> +struct TSthashWriter<A*, B*, size_type_o> {}; + +template <class A, class B, typename size_type_o> +struct TSthashWriter<A*, B, size_type_o> {}; + +template <class A, class B, typename size_type_o> +struct TSthashWriter<A, B*, size_type_o> {}; + +template <class T> +inline size_t AlignForChrKey() { + return 4; // TODO: change this (requeres rebuilt of a few existing files) +} + +template <> +inline size_t AlignForChrKey<TEmptyValue>() { + return 1; +} + +template <> +inline size_t AlignForChrKey<const TEmptyValue>() { + return AlignForChrKey<TEmptyValue>(); +} + +// !! note that for char*, physical placement of key and value is swapped +template <class TValue, class HashFcn, class EqualKey> +struct TSthashIterator<const char* const, TValue, HashFcn, EqualKey> { + typedef const TValue TValueType; + typedef const char* TKeyType; + typedef EqualKey TKeyEqualType; + typedef HashFcn THasherType; + + const char* Data; + TSthashIterator() + : Data(nullptr) + { + } + TSthashIterator(const char* data) + : Data(data) + { + } + void operator++() { + Data += GetLength(); + } + + bool operator!=(const TSthashIterator& that) const { + return Data != that.Data; + } + bool operator==(const TSthashIterator& that) const { + return Data == that.Data; + } + const char* Key() const { + return Data + SizeOfEx<TValue>(); + } + TValue& Value() { + return *(TValue*)Data; + } + const TValue& Value() const { + return *(const TValue*)Data; + } + + template <class K> + bool KeyEquals(const EqualKey& eq, const K& k) const { + return eq(Data + SizeOfEx<TValue>(), k); + } + + size_t GetLength() const { + size_t length = strlen(Data + SizeOfEx<TValue>()) + 1 + SizeOfEx<TValue>(); + length = AlignUp(length, AlignForChrKey<TValue>()); + return length; + } +}; + +template <class Value, typename size_type_o> +struct TSthashWriter<const char*, Value, size_type_o> { + typedef size_type_o TSizeType; + size_t GetRecordSize(const std::pair<const char*, const Value>& record) const { + size_t length = strlen(record.first) + 1 + SizeOfEx<Value>(); + length = AlignUp(length, AlignForChrKey<Value>()); + return length; + } + int SaveRecord(IOutputStream* stream, const std::pair<const char*, const Value>& record) const { + const char* alignBuffer = "qqqq"; + stream->Write(&record.second, SizeOfEx<Value>()); + size_t length = strlen(record.first) + 1; + stream->Write(record.first, length); + length = AlignUpSpace(length, AlignForChrKey<Value>()); + if (length) + stream->Write(alignBuffer, length); + return 0; + } +}; + +template <class TKey, class HashFcn, class EqualKey> +struct TSthashIterator<TKey, const char* const, HashFcn, EqualKey> { + typedef const TKey TKeyType; + typedef const char* TValueType; + typedef EqualKey TKeyEqualType; + typedef HashFcn THasherType; + + const char* Data; + TSthashIterator() + : Data(nullptr) + { + } + TSthashIterator(const char* data) + : Data(data) + { + } + void operator++() { + Data += GetLength(); + } + + bool operator!=(const TSthashIterator& that) const { + return Data != that.Data; + } + bool operator==(const TSthashIterator& that) const { + return Data == that.Data; + } + TKey& Key() { + return *(TKey*)Data; + } + const char* Value() const { + return Data + sizeof(TKey); + } + + template <class K> + bool KeyEquals(const EqualKey& eq, const K& k) const { + return eq(*(TKey*)Data, k); + } + + size_t GetLength() const { + size_t length = strlen(Data + sizeof(TKey)) + 1 + sizeof(TKey); + length = AlignUp(length, (size_t)4); + return length; + } +}; + +template <class Key, typename size_type_o> +struct TSthashWriter<Key, const char*, size_type_o> { + typedef size_type_o TSizeType; + size_t GetRecordSize(const std::pair<const Key, const char*>& record) const { + size_t length = strlen(record.second) + 1 + sizeof(Key); + length = AlignUp(length, (size_t)4); + return length; + } + int SaveRecord(IOutputStream* stream, const std::pair<const Key, const char*>& record) const { + const char* alignBuffer = "qqqq"; + stream->Write(&record.first, sizeof(Key)); + size_t length = strlen(record.second) + 1; + stream->Write(record.second, length); + length = AlignUpSpace(length, (size_t)4); + if (length) + stream->Write(alignBuffer, length); + return 0; + } +}; + +template <class HashFcn, class EqualKey> +struct TSthashIterator<const char* const, const char* const, HashFcn, EqualKey> { + typedef const char* TKeyType; + typedef const char* TValueType; + typedef EqualKey TKeyEqualType; + typedef HashFcn THasherType; + + const char* Data; + TSthashIterator() + : Data(nullptr) + { + } + TSthashIterator(const char* data) + : Data(data) + { + } + void operator++() { + Data += GetLength(); + } + + bool operator!=(const TSthashIterator& that) const { + return Data != that.Data; + } + bool operator==(const TSthashIterator& that) const { + return Data == that.Data; + } + const char* Key() const { + return Data; + } + const char* Value() const { + return Data + strlen(Data) + 1; + } + + template <class K> + bool KeyEquals(const EqualKey& eq, const K& k) const { + return eq(Data, k); + } + + size_t GetLength() const { + size_t length = strlen(Data) + 1; + length += strlen(Data + length) + 1; + return length; + } +}; + +template <typename size_type_o> +struct TSthashWriter<const char*, const char*, size_type_o> { + typedef size_type_o TSizeType; + size_t GetRecordSize(const std::pair<const char*, const char*>& record) const { + size_t size = strlen(record.first) + strlen(record.second) + 2; + return size; + } + int SaveRecord(IOutputStream* stream, const std::pair<const char*, const char*>& record) const { + stream->Write(record.first, strlen(record.first) + 1); + stream->Write(record.second, strlen(record.second) + 1); + return 0; + } +}; diff --git a/library/cpp/on_disk/st_hash/ya.make b/library/cpp/on_disk/st_hash/ya.make new file mode 100644 index 0000000000..8c6d05711c --- /dev/null +++ b/library/cpp/on_disk/st_hash/ya.make @@ -0,0 +1,15 @@ +LIBRARY() + +SRCS( + fake.cpp + save_stl.h + static_hash.h + static_hash_map.h + sthash_iterators.h +) + +PEERDIR( + library/cpp/deprecated/mapped_file +) + +END() |