aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Common/HashTable/StringHashMap.h
blob: 828a54d357a3e801192c43d31dce6eaebbe58d52 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#pragma once

#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashTableAllocator.h>
#include <Common/HashTable/StringHashTable.h>

template <typename Key, typename TMapped>
struct StringHashMapCell : public HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>
{
    using Base = HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>;
    using value_type = typename Base::value_type;
    using Base::Base;
    static constexpr bool need_zero_value_storage = false;
    // external
    StringRef getKey() const { return toStringRef(this->value.first); } /// NOLINT
    // internal
    static const Key & getKey(const value_type & value_) { return value_.first; }
};

template <typename TMapped>
struct StringHashMapCell<StringKey16, TMapped> : public HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>
{
    using Base = HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>;
    using value_type = typename Base::value_type;
    using Base::Base;
    static constexpr bool need_zero_value_storage = false;
    bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }

    // Zero means unoccupied cells in hash table. Use key with last word = 0 as
    // zero keys, because such keys are unrepresentable (no way to encode length).
    static bool isZero(const StringKey16 & key, const HashTableNoState &) { return key.items[1] == 0; }
    void setZero() { this->value.first.items[1] = 0; }

    // external
    StringRef getKey() const { return toStringRef(this->value.first); } /// NOLINT
    // internal
    static const StringKey16 & getKey(const value_type & value_) { return value_.first; }
};

template <typename TMapped>
struct StringHashMapCell<StringKey24, TMapped> : public HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>
{
    using Base = HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>;
    using value_type = typename Base::value_type;
    using Base::Base;
    static constexpr bool need_zero_value_storage = false;
    bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }

    // Zero means unoccupied cells in hash table. Use key with last word = 0 as
    // zero keys, because such keys are unrepresentable (no way to encode length).
    static bool isZero(const StringKey24 & key, const HashTableNoState &)
    { return key.c == 0; }
    void setZero() { this->value.first.c = 0; }

    // external
    StringRef getKey() const { return toStringRef(this->value.first); } /// NOLINT
    // internal
    static const StringKey24 & getKey(const value_type & value_) { return value_.first; }
};

template <typename TMapped>
struct StringHashMapCell<StringRef, TMapped> : public HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>
{
    using Base = HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>;
    using value_type = typename Base::value_type;
    using Base::Base;
    static constexpr bool need_zero_value_storage = false;
    // external
    using Base::getKey;
    // internal
    static const StringRef & getKey(const value_type & value_) { return value_.first; }
};

template <typename TMapped, typename Allocator>
struct StringHashMapSubMaps
{
    using T0 = StringHashTableEmpty<StringHashMapCell<StringRef, TMapped>>;
    using T1 = HashMapTable<StringKey8, StringHashMapCell<StringKey8, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
    using T2 = HashMapTable<StringKey16, StringHashMapCell<StringKey16, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
    using T3 = HashMapTable<StringKey24, StringHashMapCell<StringKey24, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
    using Ts = HashMapTable<StringRef, StringHashMapCell<StringRef, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
};

template <typename TMapped, typename Allocator = HashTableAllocator>
class StringHashMap : public StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>
{
public:
    using Key = StringRef;
    using Base = StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>;
    using Self = StringHashMap;
    using LookupResult = typename Base::LookupResult;

    using Base::Base;

    /// Merge every cell's value of current map into the destination map.
    ///  Func should have signature void(Mapped & dst, Mapped & src, bool emplaced).
    ///  Each filled cell in current map will invoke func once. If that map doesn't
    ///  have a key equals to the given cell, a new cell gets emplaced into that map,
    ///  and func is invoked with the third argument emplaced set to true. Otherwise
    ///  emplaced is set to false.
    template <typename Func, bool>
    void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
    {
        if (this->m0.hasZero() && that.m0.hasZero())
            func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false);
        else if (this->m0.hasZero())
        {
            that.m0.setHasZero();
            func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true);
        }
        this->m1.mergeToViaEmplace(that.m1, func);
        this->m2.mergeToViaEmplace(that.m2, func);
        this->m3.mergeToViaEmplace(that.m3, func);
        this->ms.mergeToViaEmplace(that.ms, func);
    }

    /// Merge every cell's value of current map into the destination map via find.
    ///  Func should have signature void(Mapped & dst, Mapped & src, bool exist).
    ///  Each filled cell in current map will invoke func once. If that map doesn't
    ///  have a key equals to the given cell, func is invoked with the third argument
    ///  exist set to false. Otherwise exist is set to true.
    template <typename Func>
    void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func)
    {
        if (this->m0.size() && that.m0.size())
            func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true);
        else if (this->m0.size())
            func(this->m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false);
        this->m1.mergeToViaFind(that.m1, func);
        this->m2.mergeToViaFind(that.m2, func);
        this->m3.mergeToViaFind(that.m3, func);
        this->ms.mergeToViaFind(that.ms, func);
    }

    TMapped & ALWAYS_INLINE operator[](const Key & x)
    {
        LookupResult it;
        bool inserted;
        this->emplace(x, it, inserted);
        if (inserted)
            new (&it->getMapped()) TMapped();

        return it->getMapped();
    }

    template <typename Func>
    void ALWAYS_INLINE forEachValue(Func && func)
    {
        if (this->m0.size())
        {
            func(StringRef{}, this->m0.zeroValue()->getMapped());
        }

        for (auto & v : this->m1)
        {
            func(v.getKey(), v.getMapped());
        }

        for (auto & v : this->m2)
        {
            func(v.getKey(), v.getMapped());
        }

        for (auto & v : this->m3)
        {
            func(v.getKey(), v.getMapped());
        }

        for (auto & v : this->ms)
        {
            func(v.getKey(), v.getMapped());
        }
    }

    template <typename Func>
    void ALWAYS_INLINE forEachMapped(Func && func)
    {
        if (this->m0.size())
            func(this->m0.zeroValue()->getMapped());
        for (auto & v : this->m1)
            func(v.getMapped());
        for (auto & v : this->m2)
            func(v.getMapped());
        for (auto & v : this->m3)
            func(v.getMapped());
        for (auto & v : this->ms)
            func(v.getMapped());
    }
};