aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/on_disk/st_hash
diff options
context:
space:
mode:
authorvitalyisaev <vitalyisaev@ydb.tech>2023-11-30 13:26:22 +0300
committervitalyisaev <vitalyisaev@ydb.tech>2023-11-30 15:44:45 +0300
commit0a98fece5a9b54f16afeb3a94b3eb3105e9c3962 (patch)
tree291d72dbd7e9865399f668c84d11ed86fb190bbf /library/cpp/on_disk/st_hash
parentcb2c8d75065e5b3c47094067cb4aa407d4813298 (diff)
downloadydb-0a98fece5a9b54f16afeb3a94b3eb3105e9c3962.tar.gz
YQ Connector:Use docker-compose in integrational tests
Diffstat (limited to 'library/cpp/on_disk/st_hash')
-rw-r--r--library/cpp/on_disk/st_hash/fake.cpp4
-rw-r--r--library/cpp/on_disk/st_hash/save_stl.h84
-rw-r--r--library/cpp/on_disk/st_hash/static_hash.h420
-rw-r--r--library/cpp/on_disk/st_hash/static_hash_map.h59
-rw-r--r--library/cpp/on_disk/st_hash/sthash_iterators.h334
-rw-r--r--library/cpp/on_disk/st_hash/ya.make15
6 files changed, 916 insertions, 0 deletions
diff --git a/library/cpp/on_disk/st_hash/fake.cpp b/library/cpp/on_disk/st_hash/fake.cpp
new file mode 100644
index 0000000000..ef5af4d432
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/fake.cpp
@@ -0,0 +1,4 @@
+#include "save_stl.h"
+#include "static_hash.h"
+#include "static_hash_map.h"
+#include "sthash_iterators.h"
diff --git a/library/cpp/on_disk/st_hash/save_stl.h b/library/cpp/on_disk/st_hash/save_stl.h
new file mode 100644
index 0000000000..00f8f0e20d
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/save_stl.h
@@ -0,0 +1,84 @@
+#pragma once
+
+#include <util/generic/hash.h>
+#include <util/system/yassert.h>
+#include <util/stream/output.h>
+
+// this structure might be replaced with sthashtable class
+template <class HF, class Eq, class size_type>
+struct sthashtable_nvm_sv {
+ sthashtable_nvm_sv() {
+ if (sizeof(sthashtable_nvm_sv) != sizeof(HF) + sizeof(Eq) + 3 * sizeof(size_type)) {
+ memset(this, 0, sizeof(sthashtable_nvm_sv));
+ }
+ }
+
+ sthashtable_nvm_sv(const HF& phf, const Eq& peq, const size_type& pnb, const size_type& pne, const size_type& pnd)
+ : sthashtable_nvm_sv()
+ {
+ hf = phf;
+ eq = peq;
+ num_buckets = pnb;
+ num_elements = pne;
+ data_end_off = pnd;
+ }
+
+ HF hf;
+ Eq eq;
+ size_type num_buckets;
+ size_type num_elements;
+ size_type data_end_off;
+};
+
+/**
+ * Some hack to save both THashMap and sthash.
+ * Working with stHash does not depend on the template parameters, because the content of stHash is not used inside this method.
+ */
+template <class V, class K, class HF, class Ex, class Eq, class A>
+template <class KeySaver>
+inline int THashTable<V, K, HF, Ex, Eq, A>::save_for_st(IOutputStream* stream, KeySaver& ks, sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* stHash) const {
+ Y_ASSERT(!stHash || stHash->bucket_count() == bucket_count());
+ typedef sthashtable_nvm_sv<HF, Eq, typename KeySaver::TSizeType> sv_type;
+ sv_type sv = {this->_get_hash_fun(), this->_get_key_eq(), static_cast<typename KeySaver::TSizeType>(buckets.size()), static_cast<typename KeySaver::TSizeType>(num_elements), 0};
+ // to do: m.b. use just the size of corresponding object?
+ typename KeySaver::TSizeType cur_off = sizeof(sv_type) +
+ (sv.num_buckets + 1) * sizeof(typename KeySaver::TSizeType);
+ sv.data_end_off = cur_off;
+ const_iterator n;
+ for (n = begin(); n != end(); ++n) {
+ sv.data_end_off += static_cast<typename KeySaver::TSizeType>(ks.GetRecordSize(*n));
+ }
+ typename KeySaver::TSizeType* sb = stHash ? (typename KeySaver::TSizeType*)(stHash->buckets()) : nullptr;
+ if (stHash)
+ sv.data_end_off += static_cast<typename KeySaver::TSizeType>(sb[buckets.size()] - sb[0]);
+ //saver.Align(sizeof(char*));
+ stream->Write(&sv, sizeof(sv));
+
+ size_type i;
+ //save vector
+ for (i = 0; i < buckets.size(); ++i) {
+ node* cur = buckets[i];
+ stream->Write(&cur_off, sizeof(cur_off));
+ if (cur) {
+ while (!((uintptr_t)cur & 1)) {
+ cur_off += static_cast<typename KeySaver::TSizeType>(ks.GetRecordSize(cur->val));
+ cur = cur->next;
+ }
+ }
+ if (stHash)
+ cur_off += static_cast<typename KeySaver::TSizeType>(sb[i + 1] - sb[i]);
+ }
+ stream->Write(&cur_off, sizeof(cur_off)); // end mark
+ for (i = 0; i < buckets.size(); ++i) {
+ node* cur = buckets[i];
+ if (cur) {
+ while (!((uintptr_t)cur & 1)) {
+ ks.SaveRecord(stream, cur->val);
+ cur = cur->next;
+ }
+ }
+ if (stHash)
+ stream->Write((const char*)stHash + sb[i], sb[i + 1] - sb[i]);
+ }
+ return 0;
+}
diff --git a/library/cpp/on_disk/st_hash/static_hash.h b/library/cpp/on_disk/st_hash/static_hash.h
new file mode 100644
index 0000000000..ca7a6ccd36
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/static_hash.h
@@ -0,0 +1,420 @@
+#pragma once
+
+#include "save_stl.h"
+#include "sthash_iterators.h"
+
+#include <util/generic/hash.h>
+#include <util/generic/vector.h>
+#include <util/generic/buffer.h>
+#include <util/generic/cast.h>
+#include <util/generic/yexception.h> // for save/load only
+#include <util/stream/file.h>
+#include <util/stream/buffer.h>
+#include <utility>
+
+#include <memory>
+#include <algorithm>
+#include <functional>
+
+#include <cstdlib>
+#include <cstddef>
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4624) // 'destructor could not be generated because a base class destructor is inaccessible'
+#endif
+
+template <class HashType, class KeySaver>
+inline void SaveHashToStreamEx(HashType& hash, IOutputStream* stream) {
+ KeySaver ks;
+ if (hash.save_for_st(stream, ks))
+ ythrow yexception() << "Could not save hash to stream";
+}
+
+template <class HashType>
+inline void SaveHashToStream(HashType& hash, IOutputStream* stream) {
+ typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver;
+ return SaveHashToStreamEx<HashType, KeySaver>(hash, stream);
+}
+
+template <class HashType, class KeySaver>
+inline void SaveHashToFileEx(HashType& hash, const char* fileName) {
+ TFileOutput output(fileName);
+ SaveHashToStreamEx<HashType, KeySaver>(hash, &output);
+}
+
+template <class HashType>
+inline void SaveHashToFile(HashType& hash, const char* fileName) {
+ typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver;
+ return SaveHashToFileEx<HashType, KeySaver>(hash, fileName);
+}
+
+template <class HashType>
+inline void SaveHashSetToFile(HashType& hash, const char* fileName) {
+ typedef TSthashSetWriter<typename HashType::key_type, ui64> KeySaver;
+ return SaveHashToFileEx<HashType, KeySaver>(hash, fileName);
+}
+
+template <class HashType>
+inline void SaveHashToFile32(HashType& hash, const char* fileName) {
+ typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui32> KeySaver;
+ return SaveHashToFileEx<HashType, KeySaver>(hash, fileName);
+}
+
+template <class HashType, class KeySaver>
+inline void SaveHashToBufferEx(HashType& hash, TBuffer& buffer, sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* stHash = nullptr) {
+ TBufferOutput stream(buffer);
+ KeySaver ks;
+ if (hash.save_for_st(&stream, ks, stHash))
+ ythrow yexception() << "Could not save hash to memory";
+}
+
+template <class HashType>
+inline void SaveHashToBuffer(HashType& hash, TBuffer& buffer) {
+ typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver;
+ SaveHashToBufferEx<HashType, KeySaver>(hash, buffer);
+}
+
+/**
+ * Some hack to save both THashMap and sthash.
+ * THashMap and sthash must have same bucket_count().
+ */
+template <class HashType, class StHashType>
+inline void SaveHashToBuffer(HashType& hash, TBuffer& buffer, StHashType* stHash) {
+ typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver;
+ typedef sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* SH;
+
+ SH sh = reinterpret_cast<SH>(stHash);
+ SaveHashToBufferEx<HashType, KeySaver>(hash, buffer, sh);
+}
+
+template <class HashType>
+inline void SaveHashToBuffer32(HashType& hash, TBuffer& buffer) {
+ typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui32> KeySaver;
+ SaveHashToBufferEx<HashType, KeySaver>(hash, buffer);
+}
+
+template <class Iter, typename size_type_f = ui64>
+class sthashtable {
+public:
+ typedef typename Iter::TKeyType key_type;
+ typedef typename Iter::TValueType value_type;
+ typedef typename Iter::THasherType hasher;
+ typedef typename Iter::TKeyEqualType key_equal;
+
+ typedef size_type_f size_type;
+ typedef ptrdiff_t difference_type;
+ typedef const value_type* const_pointer;
+ typedef const value_type& const_reference;
+
+ typedef Iter const_iterator;
+
+ const hasher hash_funct() const {
+ return hash;
+ }
+ const key_equal key_eq() const {
+ return equals;
+ }
+
+private:
+ const hasher hash;
+ const key_equal equals;
+
+private:
+ const_iterator iter_at_bucket(size_type bucket) const {
+ return (const_iterator)(((char*)this + buckets()[bucket]));
+ }
+
+ const_iterator iter_at_bucket_or_end(size_type bucket) const {
+ if (bucket < num_buckets)
+ return (const_iterator)(((char*)this + buckets()[bucket]));
+ else
+ return end();
+ }
+
+ const size_type num_buckets;
+ const size_type num_elements;
+ const size_type data_end_off;
+
+protected: //shut up gcc warning
+ // we can't construct/destroy this object at all!
+ sthashtable();
+ sthashtable(const sthashtable& ht);
+ ~sthashtable();
+
+public:
+ // const size_type *buckets;
+ const size_type* buckets() const {
+ return (size_type*)((char*)this + sizeof(*this));
+ }
+ const size_type buckets(size_type n) const {
+ return buckets()[n];
+ }
+
+ size_type size() const {
+ return num_elements;
+ }
+ size_type max_size() const {
+ return size_type(-1);
+ }
+ bool empty() const {
+ return size() == 0;
+ }
+
+ const_iterator begin() const {
+ return num_buckets ? iter_at_bucket(0) : end();
+ }
+
+ const_iterator end() const {
+ return (const_iterator)(((char*)this + data_end_off));
+ }
+
+public:
+ size_type size_in_bytes() const {
+ return data_end_off;
+ }
+
+ size_type bucket_count() const {
+ return num_buckets;
+ }
+
+ size_type elems_in_bucket(size_type bucket) const {
+ size_type result = 0;
+ const_iterator first = iter_at_bucket(bucket);
+ const_iterator last = iter_at_bucket_or_end(bucket + 1);
+
+ for (; first != last; ++first)
+ ++result;
+ return result;
+ }
+
+ template <class TheKey>
+ const_iterator find(const TheKey& key) const {
+ size_type n = bkt_num_key(key);
+ const_iterator first(iter_at_bucket(n)), last(iter_at_bucket_or_end(n + 1));
+ for (;
+ first != last && !first.KeyEquals(equals, key);
+ ++first) {
+ }
+ if (first != last)
+ return first;
+ return end();
+ }
+
+ size_type count(const key_type& key) const {
+ const size_type n = bkt_num_key(key);
+ size_type result = 0;
+ const_iterator first = iter_at_bucket(n);
+ const_iterator last = iter_at_bucket_or_end(n + 1);
+
+ for (; first != last; ++first)
+ if (first.KeyEquals(equals, key))
+ ++result;
+ return result;
+ }
+
+ std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const;
+
+private:
+ template <class TheKey>
+ size_type bkt_num_key(const TheKey& key) const {
+ return hash(key) % num_buckets;
+ }
+};
+
+template <class I, class size_type_f>
+std::pair<I, I> sthashtable<I, size_type_f>::equal_range(const key_type& key) const {
+ typedef std::pair<const_iterator, const_iterator> pii;
+ const size_type n = bkt_num_key(key);
+ const_iterator first = iter_at_bucket(n);
+ const_iterator last = iter_at_bucket_or_end(n + 1);
+
+ for (; first != last; ++first) {
+ if (first.KeyEquals(equals, key)) {
+ const_iterator cur = first;
+ ++cur;
+ for (; cur != last; ++cur)
+ if (!cur.KeyEquals(equals, key))
+ return pii(const_iterator(first),
+ const_iterator(cur));
+ return pii(const_iterator(first),
+ const_iterator(last));
+ }
+ }
+ return pii(end(), end());
+}
+
+/* end __SGI_STL_HASHTABLE_H */
+
+template <class Key, class T, class HashFcn /*= hash<Key>*/,
+ class EqualKey = TEqualTo<Key>, typename size_type_f = ui64>
+class sthash {
+private:
+ typedef sthashtable<TSthashIterator<const Key, const T, HashFcn, EqualKey>, size_type_f> ht;
+ ht rep;
+
+public:
+ typedef typename ht::key_type key_type;
+ typedef typename ht::value_type value_type;
+ typedef typename ht::hasher hasher;
+ typedef typename ht::key_equal key_equal;
+ typedef T mapped_type;
+
+ typedef typename ht::size_type size_type;
+ typedef typename ht::difference_type difference_type;
+ typedef typename ht::const_pointer const_pointer;
+ typedef typename ht::const_reference const_reference;
+
+ typedef typename ht::const_iterator const_iterator;
+
+ const hasher hash_funct() const {
+ return rep.hash_funct();
+ }
+ const key_equal key_eq() const {
+ return rep.key_eq();
+ }
+
+public:
+ size_type size() const {
+ return rep.size();
+ }
+ size_type max_size() const {
+ return rep.max_size();
+ }
+ bool empty() const {
+ return rep.empty();
+ }
+
+ const_iterator begin() const {
+ return rep.begin();
+ }
+ const_iterator end() const {
+ return rep.end();
+ }
+
+public:
+ template <class TheKey>
+ const_iterator find(const TheKey& key) const {
+ return rep.find(key);
+ }
+ template <class TheKey>
+ bool has(const TheKey& key) const {
+ return rep.find(key) != rep.end();
+ }
+
+ size_type count(const key_type& key) const {
+ return rep.count(key);
+ }
+
+ std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const {
+ return rep.equal_range(key);
+ }
+
+ size_type size_in_bytes() const {
+ return rep.size_in_bytes();
+ }
+
+ size_type bucket_count() const {
+ return rep.bucket_count();
+ }
+ size_type max_bucket_count() const {
+ return rep.max_bucket_count();
+ }
+ size_type elems_in_bucket(size_type n) const {
+ return rep.elems_in_bucket(n);
+ }
+
+ const size_type* buckets() const {
+ return rep.buckets();
+ }
+ const size_type buckets(size_type n) const {
+ return rep.buckets()[n];
+ }
+};
+
+template <class Key, class HashFcn,
+ class EqualKey = TEqualTo<Key>, typename size_type_f = ui64>
+class sthash_set: public sthash<Key, TEmptyValue, HashFcn, EqualKey, size_type_f> {
+ typedef sthash<Key, TEmptyValue, HashFcn, EqualKey, size_type_f> Base;
+
+public:
+ using Base::const_iterator;
+ using Base::hasher;
+ using Base::key_equal;
+ using Base::key_type;
+ using Base::size_type;
+ using Base::value_type;
+};
+
+template <class Key, class T, class HashFcn /*= hash<Key>*/,
+ class EqualKey = TEqualTo<Key>, typename size_type_f = ui64>
+class sthash_mm {
+private:
+ typedef sthashtable<TSthashIterator<const Key, T, HashFcn, EqualKey>, size_type_f> ht;
+ ht rep;
+
+public:
+ typedef typename ht::key_type key_type;
+ typedef typename ht::value_type value_type;
+ typedef typename ht::hasher hasher;
+ typedef typename ht::key_equal key_equal;
+ typedef T mapped_type;
+
+ typedef typename ht::size_type size_type;
+ typedef typename ht::difference_type difference_type;
+ typedef typename ht::const_pointer const_pointer;
+ typedef typename ht::const_reference const_reference;
+
+ typedef typename ht::const_iterator const_iterator;
+
+ const hasher hash_funct() const {
+ return rep.hash_funct();
+ }
+ const key_equal key_eq() const {
+ return rep.key_eq();
+ }
+
+public:
+ size_type size() const {
+ return rep.size();
+ }
+ size_type max_size() const {
+ return rep.max_size();
+ }
+ bool empty() const {
+ return rep.empty();
+ }
+
+ const_iterator begin() const {
+ return rep.begin();
+ }
+ const_iterator end() const {
+ return rep.end();
+ }
+
+ const_iterator find(const key_type& key) const {
+ return rep.find(key);
+ }
+
+ size_type count(const key_type& key) const {
+ return rep.count(key);
+ }
+
+ std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const {
+ return rep.equal_range(key);
+ }
+
+ size_type bucket_count() const {
+ return rep.bucket_count();
+ }
+ size_type max_bucket_count() const {
+ return rep.max_bucket_count();
+ }
+ size_type elems_in_bucket(size_type n) const {
+ return rep.elems_in_bucket(n);
+ }
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
diff --git a/library/cpp/on_disk/st_hash/static_hash_map.h b/library/cpp/on_disk/st_hash/static_hash_map.h
new file mode 100644
index 0000000000..5dc50abd39
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/static_hash_map.h
@@ -0,0 +1,59 @@
+#pragma once
+
+#include "static_hash.h"
+
+#include <library/cpp/deprecated/mapped_file/mapped_file.h>
+
+#include <util/system/filemap.h>
+
+template <class SH>
+struct sthash_mapped_c {
+ typedef SH H;
+ typedef typename H::const_iterator const_iterator;
+ TMappedFile M;
+ H* hsh;
+ sthash_mapped_c()
+ : M()
+ , hsh(nullptr)
+ {
+ }
+ sthash_mapped_c(const char* fname, bool precharge)
+ : M()
+ , hsh(nullptr)
+ {
+ Open(fname, precharge);
+ }
+ void Open(const char* fname, bool precharge) {
+ M.init(fname);
+ if (precharge)
+ M.precharge();
+ hsh = (H*)M.getData();
+ if (M.getSize() < sizeof(H) || (ssize_t)M.getSize() != hsh->end().Data - (char*)hsh)
+ ythrow yexception() << "Could not map hash: " << fname << " is damaged";
+ }
+ H* operator->() {
+ return hsh;
+ }
+ const H* operator->() const {
+ return hsh;
+ }
+ H* GetSthash() {
+ return hsh;
+ }
+ const H* GetSthash() const {
+ return hsh;
+ }
+};
+
+template <class Key, class T, class Hash>
+struct sthash_mapped: public sthash_mapped_c<sthash<Key, T, Hash>> {
+ typedef sthash<Key, T, Hash> H;
+ sthash_mapped(const char* fname, bool precharge)
+ : sthash_mapped_c<H>(fname, precharge)
+ {
+ }
+ sthash_mapped()
+ : sthash_mapped_c<H>()
+ {
+ }
+};
diff --git a/library/cpp/on_disk/st_hash/sthash_iterators.h b/library/cpp/on_disk/st_hash/sthash_iterators.h
new file mode 100644
index 0000000000..6a9ebdd6c3
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/sthash_iterators.h
@@ -0,0 +1,334 @@
+#pragma once
+
+#include "save_stl.h"
+
+#include <util/system/align.h>
+
+/**
+ This file provides functionality for saving some relatively simple THashMap object
+ to disk in a form that can be mapped read-only (via mmap) at any address.
+ That saved object is accessed via pointer to sthash object (that must have
+ the same parameters as original THashMap object)
+
+ If either key or value are variable-sized (i.e. contain pointers), user must
+ write his own instantiation of TSthashIterator (read iterator for sthash) and
+ TSthashWriter (write iterator for THashMap).
+ An example for <const char *, B> pair is in here.
+**/
+
+// TEmptyValue and SizeOfEx are helpers for sthash_set
+struct TEmptyValue {
+ TEmptyValue() = default;
+};
+
+template <class T>
+inline size_t SizeOfEx() {
+ return sizeof(T);
+}
+
+template <>
+inline size_t SizeOfEx<TEmptyValue>() {
+ return 0;
+}
+template <>
+inline size_t SizeOfEx<const TEmptyValue>() {
+ return 0;
+}
+
+template <class TKey, class TValue, class HashFcn, class EqualKey>
+struct TSthashIterator {
+ // Implementation for simple types
+ typedef const TKey TKeyType;
+ typedef const TValue TValueType;
+ typedef EqualKey TKeyEqualType;
+ typedef HashFcn THasherType;
+
+ const char* Data;
+ TSthashIterator()
+ : Data(nullptr)
+ {
+ }
+ explicit TSthashIterator(const char* data)
+ : Data(data)
+ {
+ }
+ void operator++() {
+ Data += GetLength();
+ }
+
+ bool operator!=(const TSthashIterator& that) const {
+ return Data != that.Data;
+ }
+ bool operator==(const TSthashIterator& that) const {
+ return Data == that.Data;
+ }
+ TKey& Key() const {
+ return *(TKey*)Data;
+ }
+ TValue& Value() {
+ return *(TValue*)(Data + sizeof(TKey));
+ }
+ const TValue& Value() const {
+ return *(const TValue*)(Data + sizeof(TKey));
+ }
+
+ template <class AnotherKeyType>
+ bool KeyEquals(const EqualKey& eq, const AnotherKeyType& key) const {
+ return eq(*(TKey*)Data, key);
+ }
+
+ size_t GetLength() const {
+ return sizeof(TKey) + SizeOfEx<TValue>();
+ }
+};
+
+template <class Key, class Value, typename size_type_o = ui64>
+struct TSthashWriter {
+ typedef size_type_o TSizeType;
+ size_t GetRecordSize(const std::pair<const Key, const Value>&) const {
+ return sizeof(Key) + SizeOfEx<Value>();
+ }
+ int SaveRecord(IOutputStream* stream, const std::pair<const Key, const Value>& record) const {
+ stream->Write(&record.first, sizeof(Key));
+ stream->Write(&record.second, SizeOfEx<Value>());
+ return 0;
+ }
+};
+
+// Remember that this simplified implementation makes a copy of `key' in std::make_pair.
+// It can also waste some memory on undesired alignment.
+template <class Key, typename size_type_o = ui64>
+struct TSthashSetWriter: public TSthashWriter<Key, TEmptyValue, size_type_o> {
+ typedef TSthashWriter<Key, TEmptyValue, size_type_o> MapWriter;
+ size_t GetRecordSize(const Key& key) const {
+ return MapWriter::GetRecordSize(std::make_pair(key, TEmptyValue()));
+ }
+ int SaveRecord(IOutputStream* stream, const Key& key) const {
+ return MapWriter::SaveRecord(stream, std::make_pair(key, TEmptyValue()));
+ }
+};
+
+// we can't save something with pointers without additional tricks
+
+template <class A, class B, class HashFcn, class EqualKey>
+struct TSthashIterator<A*, B, HashFcn, EqualKey> {};
+
+template <class A, class B, class HashFcn, class EqualKey>
+struct TSthashIterator<A, B*, HashFcn, EqualKey> {};
+
+template <class A, class B, typename size_type_o>
+struct TSthashWriter<A*, B*, size_type_o> {};
+
+template <class A, class B, typename size_type_o>
+struct TSthashWriter<A*, B, size_type_o> {};
+
+template <class A, class B, typename size_type_o>
+struct TSthashWriter<A, B*, size_type_o> {};
+
+template <class T>
+inline size_t AlignForChrKey() {
+ return 4; // TODO: change this (requeres rebuilt of a few existing files)
+}
+
+template <>
+inline size_t AlignForChrKey<TEmptyValue>() {
+ return 1;
+}
+
+template <>
+inline size_t AlignForChrKey<const TEmptyValue>() {
+ return AlignForChrKey<TEmptyValue>();
+}
+
+// !! note that for char*, physical placement of key and value is swapped
+template <class TValue, class HashFcn, class EqualKey>
+struct TSthashIterator<const char* const, TValue, HashFcn, EqualKey> {
+ typedef const TValue TValueType;
+ typedef const char* TKeyType;
+ typedef EqualKey TKeyEqualType;
+ typedef HashFcn THasherType;
+
+ const char* Data;
+ TSthashIterator()
+ : Data(nullptr)
+ {
+ }
+ TSthashIterator(const char* data)
+ : Data(data)
+ {
+ }
+ void operator++() {
+ Data += GetLength();
+ }
+
+ bool operator!=(const TSthashIterator& that) const {
+ return Data != that.Data;
+ }
+ bool operator==(const TSthashIterator& that) const {
+ return Data == that.Data;
+ }
+ const char* Key() const {
+ return Data + SizeOfEx<TValue>();
+ }
+ TValue& Value() {
+ return *(TValue*)Data;
+ }
+ const TValue& Value() const {
+ return *(const TValue*)Data;
+ }
+
+ template <class K>
+ bool KeyEquals(const EqualKey& eq, const K& k) const {
+ return eq(Data + SizeOfEx<TValue>(), k);
+ }
+
+ size_t GetLength() const {
+ size_t length = strlen(Data + SizeOfEx<TValue>()) + 1 + SizeOfEx<TValue>();
+ length = AlignUp(length, AlignForChrKey<TValue>());
+ return length;
+ }
+};
+
+template <class Value, typename size_type_o>
+struct TSthashWriter<const char*, Value, size_type_o> {
+ typedef size_type_o TSizeType;
+ size_t GetRecordSize(const std::pair<const char*, const Value>& record) const {
+ size_t length = strlen(record.first) + 1 + SizeOfEx<Value>();
+ length = AlignUp(length, AlignForChrKey<Value>());
+ return length;
+ }
+ int SaveRecord(IOutputStream* stream, const std::pair<const char*, const Value>& record) const {
+ const char* alignBuffer = "qqqq";
+ stream->Write(&record.second, SizeOfEx<Value>());
+ size_t length = strlen(record.first) + 1;
+ stream->Write(record.first, length);
+ length = AlignUpSpace(length, AlignForChrKey<Value>());
+ if (length)
+ stream->Write(alignBuffer, length);
+ return 0;
+ }
+};
+
+template <class TKey, class HashFcn, class EqualKey>
+struct TSthashIterator<TKey, const char* const, HashFcn, EqualKey> {
+ typedef const TKey TKeyType;
+ typedef const char* TValueType;
+ typedef EqualKey TKeyEqualType;
+ typedef HashFcn THasherType;
+
+ const char* Data;
+ TSthashIterator()
+ : Data(nullptr)
+ {
+ }
+ TSthashIterator(const char* data)
+ : Data(data)
+ {
+ }
+ void operator++() {
+ Data += GetLength();
+ }
+
+ bool operator!=(const TSthashIterator& that) const {
+ return Data != that.Data;
+ }
+ bool operator==(const TSthashIterator& that) const {
+ return Data == that.Data;
+ }
+ TKey& Key() {
+ return *(TKey*)Data;
+ }
+ const char* Value() const {
+ return Data + sizeof(TKey);
+ }
+
+ template <class K>
+ bool KeyEquals(const EqualKey& eq, const K& k) const {
+ return eq(*(TKey*)Data, k);
+ }
+
+ size_t GetLength() const {
+ size_t length = strlen(Data + sizeof(TKey)) + 1 + sizeof(TKey);
+ length = AlignUp(length, (size_t)4);
+ return length;
+ }
+};
+
+template <class Key, typename size_type_o>
+struct TSthashWriter<Key, const char*, size_type_o> {
+ typedef size_type_o TSizeType;
+ size_t GetRecordSize(const std::pair<const Key, const char*>& record) const {
+ size_t length = strlen(record.second) + 1 + sizeof(Key);
+ length = AlignUp(length, (size_t)4);
+ return length;
+ }
+ int SaveRecord(IOutputStream* stream, const std::pair<const Key, const char*>& record) const {
+ const char* alignBuffer = "qqqq";
+ stream->Write(&record.first, sizeof(Key));
+ size_t length = strlen(record.second) + 1;
+ stream->Write(record.second, length);
+ length = AlignUpSpace(length, (size_t)4);
+ if (length)
+ stream->Write(alignBuffer, length);
+ return 0;
+ }
+};
+
+template <class HashFcn, class EqualKey>
+struct TSthashIterator<const char* const, const char* const, HashFcn, EqualKey> {
+ typedef const char* TKeyType;
+ typedef const char* TValueType;
+ typedef EqualKey TKeyEqualType;
+ typedef HashFcn THasherType;
+
+ const char* Data;
+ TSthashIterator()
+ : Data(nullptr)
+ {
+ }
+ TSthashIterator(const char* data)
+ : Data(data)
+ {
+ }
+ void operator++() {
+ Data += GetLength();
+ }
+
+ bool operator!=(const TSthashIterator& that) const {
+ return Data != that.Data;
+ }
+ bool operator==(const TSthashIterator& that) const {
+ return Data == that.Data;
+ }
+ const char* Key() const {
+ return Data;
+ }
+ const char* Value() const {
+ return Data + strlen(Data) + 1;
+ }
+
+ template <class K>
+ bool KeyEquals(const EqualKey& eq, const K& k) const {
+ return eq(Data, k);
+ }
+
+ size_t GetLength() const {
+ size_t length = strlen(Data) + 1;
+ length += strlen(Data + length) + 1;
+ return length;
+ }
+};
+
+template <typename size_type_o>
+struct TSthashWriter<const char*, const char*, size_type_o> {
+ typedef size_type_o TSizeType;
+ size_t GetRecordSize(const std::pair<const char*, const char*>& record) const {
+ size_t size = strlen(record.first) + strlen(record.second) + 2;
+ return size;
+ }
+ int SaveRecord(IOutputStream* stream, const std::pair<const char*, const char*>& record) const {
+ stream->Write(record.first, strlen(record.first) + 1);
+ stream->Write(record.second, strlen(record.second) + 1);
+ return 0;
+ }
+};
diff --git a/library/cpp/on_disk/st_hash/ya.make b/library/cpp/on_disk/st_hash/ya.make
new file mode 100644
index 0000000000..8c6d05711c
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/ya.make
@@ -0,0 +1,15 @@
+LIBRARY()
+
+SRCS(
+ fake.cpp
+ save_stl.h
+ static_hash.h
+ static_hash_map.h
+ sthash_iterators.h
+)
+
+PEERDIR(
+ library/cpp/deprecated/mapped_file
+)
+
+END()