aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/on_disk/st_hash/sthash_iterators.h
blob: 6a9ebdd6c3f215600dc828a27305a63fb1d2aa4a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
#pragma once

#include "save_stl.h"

#include <util/system/align.h>

/**
    This file provides functionality for saving some relatively simple THashMap object
    to disk in a form that can be mapped read-only (via mmap) at any address.
    That saved object is accessed via pointer to sthash object (that must have
    the same parameters as original THashMap object)

    If either key or value are variable-sized (i.e. contain pointers), user must
    write his own instantiation of TSthashIterator (read iterator for sthash) and
    TSthashWriter (write iterator for THashMap).
    An example for <const char *, B> pair is in here.
**/

// TEmptyValue and SizeOfEx are helpers for sthash_set
struct TEmptyValue {
    TEmptyValue() = default;
};

template <class T>
inline size_t SizeOfEx() {
    return sizeof(T);
}

template <>
inline size_t SizeOfEx<TEmptyValue>() {
    return 0;
}
template <>
inline size_t SizeOfEx<const TEmptyValue>() {
    return 0;
}

template <class TKey, class TValue, class HashFcn, class EqualKey>
struct TSthashIterator {
    // Implementation for simple types
    typedef const TKey TKeyType;
    typedef const TValue TValueType;
    typedef EqualKey TKeyEqualType;
    typedef HashFcn THasherType;

    const char* Data;
    TSthashIterator()
        : Data(nullptr)
    {
    }
    explicit TSthashIterator(const char* data)
        : Data(data)
    {
    }
    void operator++() {
        Data += GetLength();
    }

    bool operator!=(const TSthashIterator& that) const {
        return Data != that.Data;
    }
    bool operator==(const TSthashIterator& that) const {
        return Data == that.Data;
    }
    TKey& Key() const {
        return *(TKey*)Data;
    }
    TValue& Value() {
        return *(TValue*)(Data + sizeof(TKey));
    }
    const TValue& Value() const {
        return *(const TValue*)(Data + sizeof(TKey));
    }

    template <class AnotherKeyType>
    bool KeyEquals(const EqualKey& eq, const AnotherKeyType& key) const {
        return eq(*(TKey*)Data, key);
    }

    size_t GetLength() const {
        return sizeof(TKey) + SizeOfEx<TValue>();
    }
};

template <class Key, class Value, typename size_type_o = ui64>
struct TSthashWriter {
    typedef size_type_o TSizeType;
    size_t GetRecordSize(const std::pair<const Key, const Value>&) const {
        return sizeof(Key) + SizeOfEx<Value>();
    }
    int SaveRecord(IOutputStream* stream, const std::pair<const Key, const Value>& record) const {
        stream->Write(&record.first, sizeof(Key));
        stream->Write(&record.second, SizeOfEx<Value>());
        return 0;
    }
};

// Remember that this simplified implementation makes a copy of `key' in std::make_pair.
// It can also waste some memory on undesired alignment.
template <class Key, typename size_type_o = ui64>
struct TSthashSetWriter: public TSthashWriter<Key, TEmptyValue, size_type_o> {
    typedef TSthashWriter<Key, TEmptyValue, size_type_o> MapWriter;
    size_t GetRecordSize(const Key& key) const {
        return MapWriter::GetRecordSize(std::make_pair(key, TEmptyValue()));
    }
    int SaveRecord(IOutputStream* stream, const Key& key) const {
        return MapWriter::SaveRecord(stream, std::make_pair(key, TEmptyValue()));
    }
};

// we can't save something with pointers without additional tricks

template <class A, class B, class HashFcn, class EqualKey>
struct TSthashIterator<A*, B, HashFcn, EqualKey> {};

template <class A, class B, class HashFcn, class EqualKey>
struct TSthashIterator<A, B*, HashFcn, EqualKey> {};

template <class A, class B, typename size_type_o>
struct TSthashWriter<A*, B*, size_type_o> {};

template <class A, class B, typename size_type_o>
struct TSthashWriter<A*, B, size_type_o> {};

template <class A, class B, typename size_type_o>
struct TSthashWriter<A, B*, size_type_o> {};

template <class T>
inline size_t AlignForChrKey() {
    return 4; // TODO: change this (requeres rebuilt of a few existing files)
}

template <>
inline size_t AlignForChrKey<TEmptyValue>() {
    return 1;
}

template <>
inline size_t AlignForChrKey<const TEmptyValue>() {
    return AlignForChrKey<TEmptyValue>();
}

// !! note that for char*, physical placement of key and value is swapped
template <class TValue, class HashFcn, class EqualKey>
struct TSthashIterator<const char* const, TValue, HashFcn, EqualKey> {
    typedef const TValue TValueType;
    typedef const char* TKeyType;
    typedef EqualKey TKeyEqualType;
    typedef HashFcn THasherType;

    const char* Data;
    TSthashIterator()
        : Data(nullptr)
    {
    }
    TSthashIterator(const char* data)
        : Data(data)
    {
    }
    void operator++() {
        Data += GetLength();
    }

    bool operator!=(const TSthashIterator& that) const {
        return Data != that.Data;
    }
    bool operator==(const TSthashIterator& that) const {
        return Data == that.Data;
    }
    const char* Key() const {
        return Data + SizeOfEx<TValue>();
    }
    TValue& Value() {
        return *(TValue*)Data;
    }
    const TValue& Value() const {
        return *(const TValue*)Data;
    }

    template <class K>
    bool KeyEquals(const EqualKey& eq, const K& k) const {
        return eq(Data + SizeOfEx<TValue>(), k);
    }

    size_t GetLength() const {
        size_t length = strlen(Data + SizeOfEx<TValue>()) + 1 + SizeOfEx<TValue>();
        length = AlignUp(length, AlignForChrKey<TValue>());
        return length;
    }
};

template <class Value, typename size_type_o>
struct TSthashWriter<const char*, Value, size_type_o> {
    typedef size_type_o TSizeType;
    size_t GetRecordSize(const std::pair<const char*, const Value>& record) const {
        size_t length = strlen(record.first) + 1 + SizeOfEx<Value>();
        length = AlignUp(length, AlignForChrKey<Value>());
        return length;
    }
    int SaveRecord(IOutputStream* stream, const std::pair<const char*, const Value>& record) const {
        const char* alignBuffer = "qqqq";
        stream->Write(&record.second, SizeOfEx<Value>());
        size_t length = strlen(record.first) + 1;
        stream->Write(record.first, length);
        length = AlignUpSpace(length, AlignForChrKey<Value>());
        if (length)
            stream->Write(alignBuffer, length);
        return 0;
    }
};

template <class TKey, class HashFcn, class EqualKey>
struct TSthashIterator<TKey, const char* const, HashFcn, EqualKey> {
    typedef const TKey TKeyType;
    typedef const char* TValueType;
    typedef EqualKey TKeyEqualType;
    typedef HashFcn THasherType;

    const char* Data;
    TSthashIterator()
        : Data(nullptr)
    {
    }
    TSthashIterator(const char* data)
        : Data(data)
    {
    }
    void operator++() {
        Data += GetLength();
    }

    bool operator!=(const TSthashIterator& that) const {
        return Data != that.Data;
    }
    bool operator==(const TSthashIterator& that) const {
        return Data == that.Data;
    }
    TKey& Key() {
        return *(TKey*)Data;
    }
    const char* Value() const {
        return Data + sizeof(TKey);
    }

    template <class K>
    bool KeyEquals(const EqualKey& eq, const K& k) const {
        return eq(*(TKey*)Data, k);
    }

    size_t GetLength() const {
        size_t length = strlen(Data + sizeof(TKey)) + 1 + sizeof(TKey);
        length = AlignUp(length, (size_t)4);
        return length;
    }
};

template <class Key, typename size_type_o>
struct TSthashWriter<Key, const char*, size_type_o> {
    typedef size_type_o TSizeType;
    size_t GetRecordSize(const std::pair<const Key, const char*>& record) const {
        size_t length = strlen(record.second) + 1 + sizeof(Key);
        length = AlignUp(length, (size_t)4);
        return length;
    }
    int SaveRecord(IOutputStream* stream, const std::pair<const Key, const char*>& record) const {
        const char* alignBuffer = "qqqq";
        stream->Write(&record.first, sizeof(Key));
        size_t length = strlen(record.second) + 1;
        stream->Write(record.second, length);
        length = AlignUpSpace(length, (size_t)4);
        if (length)
            stream->Write(alignBuffer, length);
        return 0;
    }
};

template <class HashFcn, class EqualKey>
struct TSthashIterator<const char* const, const char* const, HashFcn, EqualKey> {
    typedef const char* TKeyType;
    typedef const char* TValueType;
    typedef EqualKey TKeyEqualType;
    typedef HashFcn THasherType;

    const char* Data;
    TSthashIterator()
        : Data(nullptr)
    {
    }
    TSthashIterator(const char* data)
        : Data(data)
    {
    }
    void operator++() {
        Data += GetLength();
    }

    bool operator!=(const TSthashIterator& that) const {
        return Data != that.Data;
    }
    bool operator==(const TSthashIterator& that) const {
        return Data == that.Data;
    }
    const char* Key() const {
        return Data;
    }
    const char* Value() const {
        return Data + strlen(Data) + 1;
    }

    template <class K>
    bool KeyEquals(const EqualKey& eq, const K& k) const {
        return eq(Data, k);
    }

    size_t GetLength() const {
        size_t length = strlen(Data) + 1;
        length += strlen(Data + length) + 1;
        return length;
    }
};

template <typename size_type_o>
struct TSthashWriter<const char*, const char*, size_type_o> {
    typedef size_type_o TSizeType;
    size_t GetRecordSize(const std::pair<const char*, const char*>& record) const {
        size_t size = strlen(record.first) + strlen(record.second) + 2;
        return size;
    }
    int SaveRecord(IOutputStream* stream, const std::pair<const char*, const char*>& record) const {
        stream->Write(record.first, strlen(record.first) + 1);
        stream->Write(record.second, strlen(record.second) + 1);
        return 0;
    }
};