summaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp
diff options
context:
space:
mode:
authorvitalyisaev <[email protected]>2023-11-14 09:58:56 +0300
committervitalyisaev <[email protected]>2023-11-14 10:20:20 +0300
commitc2b2dfd9827a400a8495e172a56343462e3ceb82 (patch)
treecd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/clickhouse/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp
parentd4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff)
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'contrib/clickhouse/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp')
-rw-r--r--contrib/clickhouse/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp98
1 files changed, 98 insertions, 0 deletions
diff --git a/contrib/clickhouse/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/contrib/clickhouse/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp
new file mode 100644
index 00000000000..849f81279e7
--- /dev/null
+++ b/contrib/clickhouse/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp
@@ -0,0 +1,98 @@
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/AggregateFunctionTTest.h>
+#include <AggregateFunctions/FactoryHelpers.h>
+#include <AggregateFunctions/Moments.h>
+
+
+namespace ErrorCodes
+{
+ extern const int BAD_ARGUMENTS;
+ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
+namespace DB
+{
+struct Settings;
+
+namespace
+{
+
+struct WelchTTestData : public TTestMoments<Float64>
+{
+ static constexpr auto name = "welchTTest";
+
+ bool hasEnoughObservations() const
+ {
+ return nx > 1 && ny > 1;
+ }
+
+ Float64 getDegreesOfFreedom() const
+ {
+ Float64 mean_x = getMeanX();
+ Float64 mean_y = getMeanY();
+
+ Float64 sx2 = (x2 + nx * mean_x * mean_x - 2 * mean_x * x1) / (nx - 1);
+ Float64 sy2 = (y2 + ny * mean_y * mean_y - 2 * mean_y * y1) / (ny - 1);
+
+ Float64 numerator_sqrt = sx2 / nx + sy2 / ny;
+ Float64 numerator = numerator_sqrt * numerator_sqrt;
+
+ Float64 denominator_x = sx2 * sx2 / (nx * nx * (nx - 1));
+ Float64 denominator_y = sy2 * sy2 / (ny * ny * (ny - 1));
+
+ auto result = numerator / (denominator_x + denominator_y);
+
+ if (result <= 0 || std::isinf(result) || isNaN(result))
+ throw Exception(
+ ErrorCodes::BAD_ARGUMENTS,
+ "Cannot calculate p_value, because the t-distribution \
+ has inappropriate value of degrees of freedom (={}). It should be > 0", result);
+
+ return result;
+ }
+
+ std::tuple<Float64, Float64> getResult() const
+ {
+ Float64 mean_x = getMeanX();
+ Float64 mean_y = getMeanY();
+
+ /// t-statistic
+ Float64 se = getStandardError();
+ Float64 t_stat = (mean_x - mean_y) / se;
+
+ if (unlikely(!std::isfinite(t_stat)))
+ return {std::numeric_limits<Float64>::quiet_NaN(), std::numeric_limits<Float64>::quiet_NaN()};
+
+ auto students_t_distribution = boost::math::students_t_distribution<Float64>(getDegreesOfFreedom());
+ Float64 pvalue = 0;
+ if (t_stat > 0)
+ pvalue = 2 * boost::math::cdf<Float64>(students_t_distribution, -t_stat);
+ else
+ pvalue = 2 * boost::math::cdf<Float64>(students_t_distribution, t_stat);
+
+ return {t_stat, pvalue};
+ }
+};
+
+AggregateFunctionPtr createAggregateFunctionWelchTTest(
+ const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
+{
+ assertBinary(name, argument_types);
+
+ if (parameters.size() > 1)
+ throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires zero or one parameter.", name);
+
+ if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} only supports numerical types", name);
+
+ return std::make_shared<AggregateFunctionTTest<WelchTTestData>>(argument_types, parameters);
+}
+
+}
+
+void registerAggregateFunctionWelchTTest(AggregateFunctionFactory & factory)
+{
+ factory.registerFunction("welchTTest", createAggregateFunctionWelchTTest);
+}
+
+}