aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/AggregateFunctions/AggregateFunctionTheilsU.cpp
blob: 96772a0daa8677580d3556971a352b172cd7d04e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/CrossTab.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <memory>
#include <cmath>


namespace DB
{

namespace
{

struct TheilsUData : CrossTabData
{
    static const char * getName()
    {
        return "theilsU";
    }

    Float64 getResult() const
    {
        if (count < 2)
            return std::numeric_limits<Float64>::quiet_NaN();

        Float64 h_a = 0.0;
        for (const auto & [key, value] : count_a)
        {
            Float64 value_float = value;
            h_a += (value_float / count) * log(value_float / count);
        }

        Float64 dep = 0.0;
        for (const auto & [key, value] : count_ab)
        {
            Float64 value_ab = value;
            Float64 value_b = count_b.at(key.items[1]);

            dep += (value_ab / count) * log(value_ab / value_b);
        }

        dep -= h_a;
        dep /= h_a;
        return dep;
    }
};

}

void registerAggregateFunctionTheilsU(AggregateFunctionFactory & factory)
{
    factory.registerFunction(TheilsUData::getName(),
        [](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
        {
            assertBinary(name, argument_types);
            assertNoParameters(name, parameters);
            return std::make_shared<AggregateFunctionCrossTab<TheilsUData>>(argument_types);
        });
}

}