diff options
author | aneporada <aneporada@ydb.tech> | 2023-10-17 09:48:10 +0300 |
---|---|---|
committer | aneporada <aneporada@ydb.tech> | 2023-10-17 10:08:40 +0300 |
commit | 03ea6e03502c722409c7c1eb1fe2741049d5791d (patch) | |
tree | a259e1180538e8a6c31f6151cabfedd6fb05abd6 | |
parent | 86f79c5885de0ada17ad009a0793fa5335101d5b (diff) | |
download | ydb-03ea6e03502c722409c7c1eb1fe2741049d5791d.tar.gz |
Support struct and list as second argument of PERCENTILE
5 files changed, 56 insertions, 6 deletions
diff --git a/ydb/docs/ru/core/yql/reference/yql-core/builtins/_includes/aggregation/percentile_median.md b/ydb/docs/ru/core/yql/reference/yql-core/builtins/_includes/aggregation/percentile_median.md index 048cffd767..30cb901673 100644 --- a/ydb/docs/ru/core/yql/reference/yql-core/builtins/_includes/aggregation/percentile_median.md +++ b/ydb/docs/ru/core/yql/reference/yql-core/builtins/_includes/aggregation/percentile_median.md @@ -2,19 +2,35 @@ **Сигнатура** ``` -PERCENTILE(Double?, Double)->Double? -PERCENTILE(Interval?, Double)->Interval? +PERCENTILE(T, Double)->T +PERCENTILE(T, Tuple<Double, ...>)->Tuple<T, ...> +PERCENTILE(T, Struct<name1:Double, ...>)->Struct<name1:T, ...> +PERCENTILE(T, List<Double>)->List<T> -MEDIAN(Double? [, Double])->Double? -MEDIAN(Interval? [, Double])->Interval? +MEDIAN(T, [ Double ])->T +MEDIAN(T, [ Tuple<Double, ...> ])->Tuple<T, ...> +MEDIAN(T, [ Struct<name1:Double, ...> ])->Struct<name1:T, ...> +MEDIAN(T, [ List<Double> ])->List<T> ``` -Подсчет процентилей по амортизированной версии алгоритма [TDigest](https://github.com/tdunning/t-digest). `MEDIAN` — алиас для `PERCENTILE(N, 0.5)`. +Подсчет процентилей по амортизированной версии алгоритма [TDigest](https://github.com/tdunning/t-digest). `MEDIAN(x)` без второго аргумента — алиас для `PERCENTILE(x, 0.5)`. +`MEDIAN` с двумя аргументами полностью эквивалентен `PERCENTILE`. +В качестве первого аргумента `PERCENTILE`/`MEDIAN` принимает выражение типа `T`. В качестве типа `T` на данный момент поддерживаются типы `Interval` и `Double` +(а также типы которые допускают неявное приведение к ним - например целочисленные типы). + +В качестве второго аргумента можно использовать либо один `Double` (значение перцентиля), либо сразу несколько значений перцентиля в виде `Tuple`/`Struct`/`List`. + +Значения прецентиля должны лежать в диапазоне от `0.0` до `1.0` включительно. + +**Примеры** ``` yql SELECT MEDIAN(numeric_column), - PERCENTILE(numeric_column, 0.99) + PERCENTILE(numeric_column, 0.99), + PERCENTILE(CAST(string_column as Double), (0.01, 0.5, 0.99)), -- подсчет сразу трех перцентилей + PERCENtILE(numeric_column, AsStruct(0.01 as p01, 0.5 as median, 0.99 as p99)), -- используя структуру, значениям перцентиля можно дать удобные имена + PERCENTILE(numeric_column, ListFromRange(0.00, 1.05, 0.05)), -- подсчет множества перцентилей (от 0.0 до 1.0 включительно с шагом 0.05) FROM my_table; ``` diff --git a/ydb/library/yql/mount/lib/yql/aggregate.yql b/ydb/library/yql/mount/lib/yql/aggregate.yql index a98db5fa4d..b43c91ce40 100644 --- a/ydb/library/yql/mount/lib/yql/aggregate.yql +++ b/ydb/library/yql/mount/lib/yql/aggregate.yql @@ -368,6 +368,8 @@ (let merge (lambda '(one two) (Apply (Udf 'Stat.TDigest_Merge) one two))) (let finish (lambda '(state) (MatchType n 'Tuple (lambda '() (StaticMap n (lambda '(n) (Apply get_convert_percentile state n)))) + 'Struct (lambda '() (StaticMap n (lambda '(n) (Apply get_convert_percentile state n)))) + 'List (lambda '() (OrderedMap n (lambda '(n) (Apply get_convert_percentile state n)))) (lambda '() (Apply get_convert_percentile state n))))) (return (AggregationTraits (ListItemType list_type) init update save load merge finish (Null))) )))) diff --git a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json index f41c5f9dd1..8b56a86c3f 100644 --- a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json +++ b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json @@ -2505,6 +2505,13 @@ "uri": "https://storage.yandex-team.ru/get-devtools/1925821/ec9ae1e25388a76d5f7a0df27259196bc4217c7e/resource.tar.gz#test_sql2yql.test_aggregate-percentile_interval_/sql.yql" } ], + "test_sql2yql.test[aggregate-percentiles_containers]": [ + { + "checksum": "6769c75a2308dcc142c7ff777b855721", + "size": 6031, + "uri": "https://storage.yandex-team.ru/get-devtools/1784117/5fac73a22f194fa5439186fe33ef2e6bf62271e1/resource.tar.gz#test_sql2yql.test_aggregate-percentiles_containers_/sql.yql" + } + ], "test_sql2yql.test[aggregate-percentiles_grouped]": [ { "checksum": "ebde86f573b7ae4e2377b469d7808202", @@ -19403,6 +19410,13 @@ "uri": "https://storage.yandex-team.ru/get-devtools/1599023/4bd7e6892a9762eec433e60bda88f20cddadc74c/resource.tar.gz#test_sql_format.test_aggregate-percentile_interval_/formatted.sql" } ], + "test_sql_format.test[aggregate-percentiles_containers]": [ + { + "checksum": "cc2cb2011f3026190a876c533f069785", + "size": 657, + "uri": "https://storage.yandex-team.ru/get-devtools/1784117/5fac73a22f194fa5439186fe33ef2e6bf62271e1/resource.tar.gz#test_sql_format.test_aggregate-percentiles_containers_/formatted.sql" + } + ], "test_sql_format.test[aggregate-percentiles_grouped]": [ { "checksum": "29b37bc2ae8bf1d5a60f9d4869daee88", diff --git a/ydb/library/yql/tests/sql/suites/aggregate/percentiles_containers.cfg b/ydb/library/yql/tests/sql/suites/aggregate/percentiles_containers.cfg new file mode 100644 index 0000000000..b0152e1177 --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/aggregate/percentiles_containers.cfg @@ -0,0 +1,2 @@ +in Input percentiles_input.txt +udf stat_udf diff --git a/ydb/library/yql/tests/sql/suites/aggregate/percentiles_containers.sql b/ydb/library/yql/tests/sql/suites/aggregate/percentiles_containers.sql new file mode 100644 index 0000000000..7bf4633e45 --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/aggregate/percentiles_containers.sql @@ -0,0 +1,16 @@ +select + key, + median(val) as med, + percentile(val, AsTuple(0.2, 0.4, 0.6)) as ptuple, + percentile(val, AsStruct(0.2 as p20, 0.4 as p40, 0.6 as p60)) as pstruct, + percentile(val, AsList(0.2, 0.4, 0.6)) as plist, +from (select key, cast(value as int) as val from plato.Input) +group by key +order by key; + +select + median(val) as med, + percentile(val, AsTuple(0.2, 0.4, 0.6)) as ptuple, + percentile(val, AsStruct(0.2 as p20, 0.4 as p40, 0.6 as p60)) as pstruct, + percentile(val, AsList(0.2, 0.4, 0.6)) as plist, +from (select key, cast(value as int) as val from plato.Input) |