aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Common/HashTable/HashTableKeyHolder.h
blob: e2f9d80db531c99f285cbc27227b50bc631557d5 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#pragma once

#include <base/StringRef.h>

#include <Common/Arena.h>

/**
  * In some aggregation scenarios, when adding a key to the hash table, we
  * start with a temporary key object, and if it turns out to be a new key,
  * we must make it persistent (e.g. copy to an Arena) and use the resulting
  * persistent object as hash table key. This happens only for StringRef keys,
  * because other key types are stored by value, but StringRef is a pointer-like
  * type: the actual data are stored elsewhere. Even for StringRef, we don't
  * make a persistent copy of the key in each of the following cases:
  * 1) the aggregation method doesn't use temporary keys, so they're persistent
  *    from the start;
  * 2) the key is already present in the hash table;
  * 3) that particular key is stored by value, e.g. a short StringRef key in
  *    StringHashMap.
  *
  * In the past, the caller was responsible for making the key persistent after
  * in was inserted. emplace() returned whether the key is new or not, so the
  * caller only stored new keys (this is case (2) from the above list). However,
  * now we are adding a compound hash table for StringRef keys, so case (3)
  * appears. The decision about persistence now depends on some properties of
  * the key, and the logic of this decision is tied to the particular hash table
  * implementation. This means that the hash table user now doesn't have enough
  * data and logic to make this decision by itself.
  *
  * To support these new requirements, we now manage key persistence by passing
  * a special key holder to emplace(), which has the functions to make the key
  * persistent or to discard it. emplace() then calls these functions at the
  * appropriate moments.
  *
  * This approach has the following benefits:
  * - no extra runtime branches in the caller to make the key persistent.
  * - no additional data is stored in the hash table itself, which is important
  *   when it's used in aggregate function states.
  * - no overhead when the key memory management isn't needed: we just pass the
  *   bare key without any wrapper to emplace(), and the default callbacks do
  *   nothing.
  *
  * This file defines the default key persistence functions, as well as two
  * different key holders and corresponding functions for storing StringRef
  * keys to Arena.
  */

/**
  * Returns the key. Can return the temporary key initially.
  * After the call to keyHolderPersistKey(), must return the persistent key.
  */
template <typename Key>
inline Key & ALWAYS_INLINE keyHolderGetKey(Key && key) { return key; }

/**
  * Make the key persistent. keyHolderGetKey() must return the persistent key
  * after this call.
  */
template <typename Key>
inline void ALWAYS_INLINE keyHolderPersistKey(Key &&) {}

/**
  * Discard the key. Calling keyHolderGetKey() is ill-defined after this.
  */
template <typename Key>
inline void ALWAYS_INLINE keyHolderDiscardKey(Key &&) {}

namespace DB
{

/**
  * ArenaKeyHolder is a key holder for hash tables that serializes a StringRef
  * key to an Arena.
  */
struct ArenaKeyHolder
{
    StringRef key;
    Arena & pool;

};

}

inline StringRef & ALWAYS_INLINE keyHolderGetKey(DB::ArenaKeyHolder & holder)
{
    return holder.key;
}

inline void ALWAYS_INLINE keyHolderPersistKey(DB::ArenaKeyHolder & holder)
{
    // Normally, our hash table shouldn't ask to persist a zero key,
    // but it can happened in the case of clearable hash table (ClearableHashSet, for example).
    // The clearable hash table doesn't use zero storage and
    // distinguishes empty keys by using cell version, not the value itself.
    // So, when an empty StringRef is inserted in ClearableHashSet we'll get here key of zero size.
    // assert(holder.key.size > 0);
    holder.key.data = holder.pool.insert(holder.key.data, holder.key.size);
}

inline void ALWAYS_INLINE keyHolderDiscardKey(DB::ArenaKeyHolder &)
{
}

namespace DB
{

/** SerializedKeyHolder is a key holder for a StringRef key that is already
  * serialized to an Arena. The key must be the last allocation in this Arena,
  * and is discarded by rolling back the allocation.
  */
struct SerializedKeyHolder
{
    StringRef key;
    Arena & pool;
};

}

inline StringRef & ALWAYS_INLINE keyHolderGetKey(DB::SerializedKeyHolder & holder)
{
    return holder.key;
}

inline void ALWAYS_INLINE keyHolderPersistKey(DB::SerializedKeyHolder &)
{
}

inline void ALWAYS_INLINE keyHolderDiscardKey(DB::SerializedKeyHolder & holder)
{
    [[maybe_unused]] void * new_head = holder.pool.rollback(holder.key.size);
    assert(new_head == holder.key.data);
    holder.key.data = nullptr;
    holder.key.size = 0;
}