aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/AggregateFunctions/UniqVariadicHash.h
blob: 840380e7f0f2ea15bc921bc2fe72340892acc6aa (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#pragma once

#include <city.h>
#include <Core/Defines.h>
#include <Common/SipHash.h>
#include <Common/assert_cast.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/IDataType.h>


namespace DB
{
struct Settings;

/** Hashes a set of arguments to the aggregate function
  *  to calculate the number of unique values
  *  and adds them to the set.
  *
  * Four options (2 x 2)
  *
  * - for approximate calculation, uses a non-cryptographic 64-bit hash function;
  * - for an accurate calculation, uses a cryptographic 128-bit hash function;
  *
  * - for several arguments passed in the usual way;
  * - for one argument-tuple.
  */

template <bool exact, bool for_tuple>
struct UniqVariadicHash;


/// If some arguments are not contiguous, we cannot use simple hash function,
///  because it requires method IColumn::getDataAt to work.
/// Note that we treat single tuple argument in the same way as multiple arguments.
bool isAllArgumentsContiguousInMemory(const DataTypes & argument_types);


template <>
struct UniqVariadicHash<false, false>
{
    static inline UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num)
    {
        UInt64 hash;

        const IColumn ** column = columns;
        const IColumn ** columns_end = column + num_args;

        {
            StringRef value = (*column)->getDataAt(row_num);
            hash = CityHash_v1_0_2::CityHash64(value.data, value.size);
            ++column;
        }

        while (column < columns_end)
        {
            StringRef value = (*column)->getDataAt(row_num);
            hash = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(CityHash_v1_0_2::CityHash64(value.data, value.size), hash));
            ++column;
        }

        return hash;
    }
};

template <>
struct UniqVariadicHash<false, true>
{
    static inline UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num)
    {
        UInt64 hash;

        const auto & tuple_columns = assert_cast<const ColumnTuple *>(columns[0])->getColumns();

        const auto * column = tuple_columns.data();
        const auto * columns_end = column + num_args;

        {
            StringRef value = column->get()->getDataAt(row_num);
            hash = CityHash_v1_0_2::CityHash64(value.data, value.size);
            ++column;
        }

        while (column < columns_end)
        {
            StringRef value = column->get()->getDataAt(row_num);
            hash = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(CityHash_v1_0_2::CityHash64(value.data, value.size), hash));
            ++column;
        }

        return hash;
    }
};

template <>
struct UniqVariadicHash<true, false>
{
    static inline UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num)
    {
        const IColumn ** column = columns;
        const IColumn ** columns_end = column + num_args;

        SipHash hash;

        while (column < columns_end)
        {
            (*column)->updateHashWithValue(row_num, hash);
            ++column;
        }

        return hash.get128();
    }
};

template <>
struct UniqVariadicHash<true, true>
{
    static inline UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num)
    {
        const auto & tuple_columns = assert_cast<const ColumnTuple *>(columns[0])->getColumns();

        const auto * column = tuple_columns.data();
        const auto * columns_end = column + num_args;

        SipHash hash;

        while (column < columns_end)
        {
            (*column)->updateHashWithValue(row_num, hash);
            ++column;
        }

        return hash.get128();
    }
};

}