aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoraneporada <aneporada@ydb.tech>2023-10-17 09:48:10 +0300
committeraneporada <aneporada@ydb.tech>2023-10-17 10:08:40 +0300
commit03ea6e03502c722409c7c1eb1fe2741049d5791d (patch)
treea259e1180538e8a6c31f6151cabfedd6fb05abd6
parent86f79c5885de0ada17ad009a0793fa5335101d5b (diff)
downloadydb-03ea6e03502c722409c7c1eb1fe2741049d5791d.tar.gz
Support struct and list as second argument of PERCENTILE
-rw-r--r--ydb/docs/ru/core/yql/reference/yql-core/builtins/_includes/aggregation/percentile_median.md28
-rw-r--r--ydb/library/yql/mount/lib/yql/aggregate.yql2
-rw-r--r--ydb/library/yql/tests/sql/sql2yql/canondata/result.json14
-rw-r--r--ydb/library/yql/tests/sql/suites/aggregate/percentiles_containers.cfg2
-rw-r--r--ydb/library/yql/tests/sql/suites/aggregate/percentiles_containers.sql16
5 files changed, 56 insertions, 6 deletions
diff --git a/ydb/docs/ru/core/yql/reference/yql-core/builtins/_includes/aggregation/percentile_median.md b/ydb/docs/ru/core/yql/reference/yql-core/builtins/_includes/aggregation/percentile_median.md
index 048cffd767..30cb901673 100644
--- a/ydb/docs/ru/core/yql/reference/yql-core/builtins/_includes/aggregation/percentile_median.md
+++ b/ydb/docs/ru/core/yql/reference/yql-core/builtins/_includes/aggregation/percentile_median.md
@@ -2,19 +2,35 @@
**Сигнатура**
```
-PERCENTILE(Double?, Double)->Double?
-PERCENTILE(Interval?, Double)->Interval?
+PERCENTILE(T, Double)->T
+PERCENTILE(T, Tuple<Double, ...>)->Tuple<T, ...>
+PERCENTILE(T, Struct<name1:Double, ...>)->Struct<name1:T, ...>
+PERCENTILE(T, List<Double>)->List<T>
-MEDIAN(Double? [, Double])->Double?
-MEDIAN(Interval? [, Double])->Interval?
+MEDIAN(T, [ Double ])->T
+MEDIAN(T, [ Tuple<Double, ...> ])->Tuple<T, ...>
+MEDIAN(T, [ Struct<name1:Double, ...> ])->Struct<name1:T, ...>
+MEDIAN(T, [ List<Double> ])->List<T>
```
-Подсчет процентилей по амортизированной версии алгоритма [TDigest](https://github.com/tdunning/t-digest). `MEDIAN` — алиас для `PERCENTILE(N, 0.5)`.
+Подсчет процентилей по амортизированной версии алгоритма [TDigest](https://github.com/tdunning/t-digest). `MEDIAN(x)` без второго аргумента — алиас для `PERCENTILE(x, 0.5)`.
+`MEDIAN` с двумя аргументами полностью эквивалентен `PERCENTILE`.
+В качестве первого аргумента `PERCENTILE`/`MEDIAN` принимает выражение типа `T`. В качестве типа `T` на данный момент поддерживаются типы `Interval` и `Double`
+(а также типы которые допускают неявное приведение к ним - например целочисленные типы).
+
+В качестве второго аргумента можно использовать либо один `Double` (значение перцентиля), либо сразу несколько значений перцентиля в виде `Tuple`/`Struct`/`List`.
+
+Значения прецентиля должны лежать в диапазоне от `0.0` до `1.0` включительно.
+
+**Примеры**
``` yql
SELECT
MEDIAN(numeric_column),
- PERCENTILE(numeric_column, 0.99)
+ PERCENTILE(numeric_column, 0.99),
+ PERCENTILE(CAST(string_column as Double), (0.01, 0.5, 0.99)), -- подсчет сразу трех перцентилей
+ PERCENtILE(numeric_column, AsStruct(0.01 as p01, 0.5 as median, 0.99 as p99)), -- используя структуру, значениям перцентиля можно дать удобные имена
+ PERCENTILE(numeric_column, ListFromRange(0.00, 1.05, 0.05)), -- подсчет множества перцентилей (от 0.0 до 1.0 включительно с шагом 0.05)
FROM my_table;
```
diff --git a/ydb/library/yql/mount/lib/yql/aggregate.yql b/ydb/library/yql/mount/lib/yql/aggregate.yql
index a98db5fa4d..b43c91ce40 100644
--- a/ydb/library/yql/mount/lib/yql/aggregate.yql
+++ b/ydb/library/yql/mount/lib/yql/aggregate.yql
@@ -368,6 +368,8 @@
(let merge (lambda '(one two) (Apply (Udf 'Stat.TDigest_Merge) one two)))
(let finish (lambda '(state) (MatchType n
'Tuple (lambda '() (StaticMap n (lambda '(n) (Apply get_convert_percentile state n))))
+ 'Struct (lambda '() (StaticMap n (lambda '(n) (Apply get_convert_percentile state n))))
+ 'List (lambda '() (OrderedMap n (lambda '(n) (Apply get_convert_percentile state n))))
(lambda '() (Apply get_convert_percentile state n)))))
(return (AggregationTraits (ListItemType list_type) init update save load merge finish (Null)))
))))
diff --git a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json
index f41c5f9dd1..8b56a86c3f 100644
--- a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json
+++ b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json
@@ -2505,6 +2505,13 @@
"uri": "https://storage.yandex-team.ru/get-devtools/1925821/ec9ae1e25388a76d5f7a0df27259196bc4217c7e/resource.tar.gz#test_sql2yql.test_aggregate-percentile_interval_/sql.yql"
}
],
+ "test_sql2yql.test[aggregate-percentiles_containers]": [
+ {
+ "checksum": "6769c75a2308dcc142c7ff777b855721",
+ "size": 6031,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1784117/5fac73a22f194fa5439186fe33ef2e6bf62271e1/resource.tar.gz#test_sql2yql.test_aggregate-percentiles_containers_/sql.yql"
+ }
+ ],
"test_sql2yql.test[aggregate-percentiles_grouped]": [
{
"checksum": "ebde86f573b7ae4e2377b469d7808202",
@@ -19403,6 +19410,13 @@
"uri": "https://storage.yandex-team.ru/get-devtools/1599023/4bd7e6892a9762eec433e60bda88f20cddadc74c/resource.tar.gz#test_sql_format.test_aggregate-percentile_interval_/formatted.sql"
}
],
+ "test_sql_format.test[aggregate-percentiles_containers]": [
+ {
+ "checksum": "cc2cb2011f3026190a876c533f069785",
+ "size": 657,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1784117/5fac73a22f194fa5439186fe33ef2e6bf62271e1/resource.tar.gz#test_sql_format.test_aggregate-percentiles_containers_/formatted.sql"
+ }
+ ],
"test_sql_format.test[aggregate-percentiles_grouped]": [
{
"checksum": "29b37bc2ae8bf1d5a60f9d4869daee88",
diff --git a/ydb/library/yql/tests/sql/suites/aggregate/percentiles_containers.cfg b/ydb/library/yql/tests/sql/suites/aggregate/percentiles_containers.cfg
new file mode 100644
index 0000000000..b0152e1177
--- /dev/null
+++ b/ydb/library/yql/tests/sql/suites/aggregate/percentiles_containers.cfg
@@ -0,0 +1,2 @@
+in Input percentiles_input.txt
+udf stat_udf
diff --git a/ydb/library/yql/tests/sql/suites/aggregate/percentiles_containers.sql b/ydb/library/yql/tests/sql/suites/aggregate/percentiles_containers.sql
new file mode 100644
index 0000000000..7bf4633e45
--- /dev/null
+++ b/ydb/library/yql/tests/sql/suites/aggregate/percentiles_containers.sql
@@ -0,0 +1,16 @@
+select
+ key,
+ median(val) as med,
+ percentile(val, AsTuple(0.2, 0.4, 0.6)) as ptuple,
+ percentile(val, AsStruct(0.2 as p20, 0.4 as p40, 0.6 as p60)) as pstruct,
+ percentile(val, AsList(0.2, 0.4, 0.6)) as plist,
+from (select key, cast(value as int) as val from plato.Input)
+group by key
+order by key;
+
+select
+ median(val) as med,
+ percentile(val, AsTuple(0.2, 0.4, 0.6)) as ptuple,
+ percentile(val, AsStruct(0.2 as p20, 0.4 as p40, 0.6 as p60)) as pstruct,
+ percentile(val, AsList(0.2, 0.4, 0.6)) as plist,
+from (select key, cast(value as int) as val from plato.Input)