summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVitaly Stoyan <[email protected]>2024-09-11 02:30:48 +0300
committerGitHub <[email protected]>2024-09-10 23:30:48 +0000
commita8fb87e9659374402d5c50524b7be8e4b4c8944f (patch)
treeedf81465db266362150994556c2677f54070dbae
parentf9cd2486c0a85b57390dd0d9aa72fe51c8ad4f3b (diff)
Support of table functions in pg syntax (#9040)
-rw-r--r--ydb/library/yql/core/type_ann/type_ann_core.cpp2
-rw-r--r--ydb/library/yql/sql/pg/pg_sql.cpp159
-rw-r--r--ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json22
-rw-r--r--ydb/library/yql/tests/sql/hybrid_file/part10/canondata/result.json14
-rw-r--r--ydb/library/yql/tests/sql/sql2yql/canondata/result.json7
-rw-r--r--ydb/library/yql/tests/sql/suites/pg/table_func.sql15
-rw-r--r--ydb/library/yql/tests/sql/yt_native_file/part17/canondata/result.json21
7 files changed, 224 insertions, 16 deletions
diff --git a/ydb/library/yql/core/type_ann/type_ann_core.cpp b/ydb/library/yql/core/type_ann/type_ann_core.cpp
index 654ec85e42e..9571bd75a8f 100644
--- a/ydb/library/yql/core/type_ann/type_ann_core.cpp
+++ b/ydb/library/yql/core/type_ann/type_ann_core.cpp
@@ -12715,7 +12715,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
ColumnOrderFunctions["Merge"] = ColumnOrderFunctions["Extend"] = &OrderForMergeExtend;
ColumnOrderFunctions[RightName] = &OrderFromFirst;
- ColumnOrderFunctions["UnionAll"] = &OrderForUnionAll;
+ ColumnOrderFunctions["UnionMerge"] = ColumnOrderFunctions["UnionAll"] = &OrderForUnionAll;
ColumnOrderFunctions["Union"] = &OrderForUnionAll;
ColumnOrderFunctions["EquiJoin"] = &OrderForEquiJoin;
ColumnOrderFunctions["CalcOverWindow"] = &OrderForCalcOverWindow;
diff --git a/ydb/library/yql/sql/pg/pg_sql.cpp b/ydb/library/yql/sql/pg/pg_sql.cpp
index a599e8dc242..4bb649bac2e 100644
--- a/ydb/library/yql/sql/pg/pg_sql.cpp
+++ b/ydb/library/yql/sql/pg/pg_sql.cpp
@@ -1520,12 +1520,9 @@ public:
}
} else if (NodeTag(r->val) == T_FuncCall) {
auto func = CAST_NODE(FuncCall, r->val);
- TVector<TString> names;
- if (!ExtractFuncName(func, names)) {
+ if (!ExtractFuncName(func, name, nullptr)) {
return nullptr;
}
-
- name = names.back();
}
}
@@ -3427,12 +3424,13 @@ public:
return {};
}
- auto func = ParseFuncCall(CAST_NODE(FuncCall, node), settings, true);
+ bool injectRead = false;
+ auto func = ParseFuncCall(CAST_NODE(FuncCall, node), settings, true, injectRead);
if (!func) {
return {};
}
- return TFromDesc{ func, alias, colnames, false };
+ return TFromDesc{ func, alias, colnames, injectRead };
}
TMaybe<TFromDesc> ParseRangeSubselect(const RangeSubselect* value) {
@@ -3723,7 +3721,8 @@ public:
return ParseNullTestExpr(CAST_NODE(NullTest, node), settings);
}
case T_FuncCall: {
- return ParseFuncCall(CAST_NODE(FuncCall, node), settings, false);
+ bool injectRead;
+ return ParseFuncCall(CAST_NODE(FuncCall, node), settings, false, injectRead);
}
case T_A_ArrayExpr: {
return ParseAArrayExpr(CAST_NODE(A_ArrayExpr, node), settings);
@@ -4009,7 +4008,124 @@ public:
return L(A("PgSubLink"), QA(linkType), L(A("Void")), L(A("Void")), rowTest, L(A("lambda"), QL(), select));
}
- TAstNode* ParseFuncCall(const FuncCall* value, const TExprSettings& settings, bool rangeFunction) {
+ TAstNode* ParseTableRangeFunction(const TString& name, const TString& schema, List* args) {
+ auto source = BuildClusterSinkOrSourceExpression(false, schema);
+ if (!source) {
+ return nullptr;
+ }
+
+ TVector<TString> argStrs;
+ for (int i = 0; i < ListLength(args); ++i) {
+ auto arg = ListNodeNth(args, i);
+ if (NodeTag(arg) == T_A_Const && (NodeTag(CAST_NODE(A_Const, arg)->val) == T_String)) {
+ TString rawStr = StrVal(CAST_NODE(A_Const, arg)->val);
+ argStrs.push_back(rawStr);
+ } else {
+ AddError("Expected String argument for table function");
+ return nullptr;
+ }
+ }
+
+ if (argStrs.empty()) {
+ AddError("Expected at least one argument for table function");
+ return nullptr;
+ }
+
+ TAstNode* key;
+ auto lowerName = to_lower(name);
+ auto options = QL();
+ if (lowerName == "concat") {
+ TVector<TAstNode*> concatArgs;
+ concatArgs.push_back(A("MrTableConcat"));
+ for (const auto& s : argStrs) {
+ concatArgs.push_back(L(A("Key"), QL(QA("table"),L(A("String"), QAX(s)))));
+ }
+
+ key = VL(concatArgs);
+ } else if (lowerName == "concat_view") {
+ if (argStrs.size() % 2 != 0) {
+ AddError("Expected sequence of pairs of table and view for concat_view");
+ return nullptr;
+ }
+
+ TVector<TAstNode*> concatArgs;
+ concatArgs.push_back(A("MrTableConcat"));
+ for (ui32 i = 0; i < argStrs.size(); i += 2) {
+ concatArgs.push_back(L(A("Key"),
+ QL(QA("table"),L(A("String"), QAX(argStrs[i]))),
+ QL(QA("view"),L(A("String"), QAX(argStrs[i + 1])))));
+ }
+
+ key = VL(concatArgs);
+ } else if (lowerName == "range") {
+ if (argStrs.size() > 5) {
+ AddError("Too many arguments");
+ return nullptr;
+ }
+
+ options = QL(QL(QA("ignorenonexisting")));
+ TAstNode* expr;
+ if (argStrs.size() == 1) {
+ expr = L(A("Bool"),QA("true"));
+ } else if (argStrs.size() == 2) {
+ expr = L(A(">="),A("item"),L(A("String"),QAX(argStrs[1])));
+ } else {
+ expr = L(A("And"),
+ L(A(">="),A("item"),L(A("String"),QAX(argStrs[1]))),
+ L(A("<="),A("item"),L(A("String"),QAX(argStrs[2])))
+ );
+ }
+
+ auto lambda = L(A("lambda"), QL(A("item")), expr);
+ auto range = L(A("MrTableRange"), QAX(argStrs[0]), lambda, QAX(argStrs.size() < 4 ? "" : argStrs[3]));
+ if (argStrs.size() < 5) {
+ key = L(A("Key"), QL(QA("table"),range));
+ } else {
+ key = L(A("Key"), QL(QA("table"),range), QL(QA("view"),L(A("String"), QAX(argStrs[4]))));
+ }
+ } else if (lowerName == "regexp" || lowerName == "like") {
+ if (argStrs.size() < 2 || argStrs.size() > 4) {
+ AddError("Expected from 2 to 4 arguments");
+ return nullptr;
+ }
+
+ options = QL(QL(QA("ignorenonexisting")));
+ TAstNode* expr;
+ if (lowerName == "regexp") {
+ expr = L(A("Apply"),L(A("Udf"),QA("Re2.Grep"),
+ QL(L(A("String"),QAX(argStrs[1])),L(A("Null")))),
+ A("item"));
+ } else {
+ expr = L(A("Apply"),L(A("Udf"),QA("Re2.Match"),
+ QL(L(A("Apply"),
+ L(A("Udf"), QA("Re2.PatternFromLike")),
+ L(A("String"),QAX(argStrs[1]))),L(A("Null")))),
+ A("item"));
+ }
+
+ auto lambda = L(A("lambda"), QL(A("item")), expr);
+ auto range = L(A("MrTableRange"), QAX(argStrs[0]), lambda, QAX(argStrs.size() < 3 ? "" : argStrs[2]));
+ if (argStrs.size() < 4) {
+ key = L(A("Key"), QL(QA("table"),range));
+ } else {
+ key = L(A("Key"), QL(QA("table"),range), QL(QA("view"),L(A("String"), QAX(argStrs[3]))));
+ }
+ } else {
+ AddError(TStringBuilder() << "Unknown table function: " << name);
+ return nullptr;
+ }
+
+ return L(
+ A("Read!"),
+ A("world"),
+ source,
+ key,
+ L(A("Void")),
+ options
+ );
+ }
+
+ TAstNode* ParseFuncCall(const FuncCall* value, const TExprSettings& settings, bool rangeFunction, bool& injectRead) {
AT_LOCATION(value);
if (ListLength(value->agg_order) > 0) {
AddError("FuncCall: unsupported agg_order");
@@ -4052,12 +4168,17 @@ public:
}
}
- TVector<TString> names;
- if (!ExtractFuncName(value, names)) {
+ TString name;
+ TString schema;
+ if (!ExtractFuncName(value, name, rangeFunction ? &schema : nullptr)) {
return nullptr;
}
- auto name = names.back();
+ if (rangeFunction && !schema.empty() && schema != "pg_catalog") {
+ injectRead = true;
+ return ParseTableRangeFunction(name, schema, value->args);
+ }
+
if (name == "shobj_description" || name == "obj_description") {
AddWarning(TIssuesIds::PG_COMPAT, name + " function forced to NULL");
return L(A("Null"));
@@ -4159,7 +4280,8 @@ public:
return VL(args.data(), args.size());
}
- bool ExtractFuncName(const FuncCall* value, TVector<TString>& names) {
+ bool ExtractFuncName(const FuncCall* value, TString& name, TString* schemaName) {
+ TVector<TString> names;
for (int i = 0; i < ListLength(value->funcname); ++i) {
auto x = ListNodeNth(value->funcname, i);
if (NodeTag(x) != T_String) {
@@ -4180,11 +4302,18 @@ public:
return false;
}
- if (names.size() == 2 && names[0] != "pg_catalog") {
- AddError(TStringBuilder() << "FuncCall: expected pg_catalog, but got: " << names[0]);
- return false;
+ if (names.size() == 2) {
+ if (!schemaName && names[0] != "pg_catalog") {
+ AddError(TStringBuilder() << "FuncCall: expected pg_catalog, but got: " << names[0]);
+ return false;
+ }
+
+ if (schemaName) {
+ *schemaName = names[0];
+ }
}
+ name = names.back();
return true;
}
diff --git a/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json
index 1e901fa579b..5c2c84a6b29 100644
--- a/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json
+++ b/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json
@@ -2288,6 +2288,28 @@
}
],
"test.test[pg-sublink_having_any-default.txt-Results]": [],
+ "test.test[pg-table_func-default.txt-Analyze]": [
+ {
+ "checksum": "90f90cb0bb8d60304471e5cf9a37436a",
+ "size": 22788,
+ "uri": "https://{canondata_backend}/1917492/73fd38e9ffcd658585f52c248a634ae9046b0ff5/resource.tar.gz#test.test_pg-table_func-default.txt-Analyze_/plan.txt"
+ }
+ ],
+ "test.test[pg-table_func-default.txt-Debug]": [
+ {
+ "checksum": "009e570dc4b46891c5263130b7e90036",
+ "size": 6644,
+ "uri": "https://{canondata_backend}/1917492/73fd38e9ffcd658585f52c248a634ae9046b0ff5/resource.tar.gz#test.test_pg-table_func-default.txt-Debug_/opt.yql_patched"
+ }
+ ],
+ "test.test[pg-table_func-default.txt-Plan]": [
+ {
+ "checksum": "90f90cb0bb8d60304471e5cf9a37436a",
+ "size": 22788,
+ "uri": "https://{canondata_backend}/1917492/73fd38e9ffcd658585f52c248a634ae9046b0ff5/resource.tar.gz#test.test_pg-table_func-default.txt-Plan_/plan.txt"
+ }
+ ],
+ "test.test[pg-table_func-default.txt-Results]": [],
"test.test[pg-tpcds-q20-default.txt-Analyze]": [
{
"checksum": "212be881133a20b5b73ef1250dbeda51",
diff --git a/ydb/library/yql/tests/sql/hybrid_file/part10/canondata/result.json b/ydb/library/yql/tests/sql/hybrid_file/part10/canondata/result.json
index a0037920143..6488a67d02d 100644
--- a/ydb/library/yql/tests/sql/hybrid_file/part10/canondata/result.json
+++ b/ydb/library/yql/tests/sql/hybrid_file/part10/canondata/result.json
@@ -2295,6 +2295,20 @@
"uri": "https://{canondata_backend}/1775319/3515b86fb929979a6751f93bd43a0291eaa01262/resource.tar.gz#test.test_pg-sublink_projection_exists_corr-default.txt-Plan_/plan.txt"
}
],
+ "test.test[pg-table_func-default.txt-Debug]": [
+ {
+ "checksum": "f58d79752c5632a904d7c675fd2cd887",
+ "size": 6681,
+ "uri": "https://{canondata_backend}/1871182/03581f8f43b6630387f93dcffb64efda102a5104/resource.tar.gz#test.test_pg-table_func-default.txt-Debug_/opt.yql_patched"
+ }
+ ],
+ "test.test[pg-table_func-default.txt-Plan]": [
+ {
+ "checksum": "95e2fb9330b8431fa9d166b01b6a47b0",
+ "size": 19319,
+ "uri": "https://{canondata_backend}/1871182/03581f8f43b6630387f93dcffb64efda102a5104/resource.tar.gz#test.test_pg-table_func-default.txt-Plan_/plan.txt"
+ }
+ ],
"test.test[pg-tpcds-q07-default.txt-Debug]": [
{
"checksum": "f61d3822f18e6a66d0991534554f20fb",
diff --git a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json
index b0e06b3a485..1febcffbf95 100644
--- a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json
+++ b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json
@@ -14076,6 +14076,13 @@
"uri": "https://{canondata_backend}/1881367/79a71c1478c556da1931a7565c12bdd14cc63567/resource.tar.gz#test_sql2yql.test_pg-sublink_where_in_corr_/sql.yql"
}
],
+ "test_sql2yql.test[pg-table_func]": [
+ {
+ "checksum": "52fc030d0a5ec71d08efd26d9f101c65",
+ "size": 8198,
+ "uri": "https://{canondata_backend}/1784826/4a52e4f284dee1aa5ddb5ef05566fbf6d624ec38/resource.tar.gz#test_sql2yql.test_pg-table_func_/sql.yql"
+ }
+ ],
"test_sql2yql.test[pg-tpcds-q01]": [
{
"checksum": "d7a119a877ea0e8b9211601d372e99b9",
diff --git a/ydb/library/yql/tests/sql/suites/pg/table_func.sql b/ydb/library/yql/tests/sql/suites/pg/table_func.sql
new file mode 100644
index 00000000000..3d481c9cf89
--- /dev/null
+++ b/ydb/library/yql/tests/sql/suites/pg/table_func.sql
@@ -0,0 +1,15 @@
+--!syntax_pg
+select count(*) from plato.concat('Input','Input');
+select count(*) from plato.concat_view('Input','raw','Input','raw');
+select count(*) from plato.range('');
+select count(*) from plato.range('','A');
+select count(*) from plato.range('','A','Z');
+select count(*) from plato.range('','A','Z','');
+select count(*) from plato.range('','A','Z','','raw');
+select count(*) from plato.regexp('','Inpu.?');
+select count(*) from plato.regexp('','Inpu.?','');
+select count(*) from plato.regexp('','Inpu.?','','raw');
+select count(*) from plato.like('','Inpu%');
+select count(*) from plato.like('','Inpu%','');
+select count(*) from plato.like('','Inpu%','','raw');
+
diff --git a/ydb/library/yql/tests/sql/yt_native_file/part17/canondata/result.json b/ydb/library/yql/tests/sql/yt_native_file/part17/canondata/result.json
index 1aade14c399..4ef8f8834a2 100644
--- a/ydb/library/yql/tests/sql/yt_native_file/part17/canondata/result.json
+++ b/ydb/library/yql/tests/sql/yt_native_file/part17/canondata/result.json
@@ -2042,6 +2042,27 @@
"uri": "https://{canondata_backend}/1942415/9dc26178536314feaac77333a6a0e27c8703d1e2/resource.tar.gz#test.test_pg-sublink_having_any-default.txt-Results_/results.txt"
}
],
+ "test.test[pg-table_func-default.txt-Debug]": [
+ {
+ "checksum": "afed4824bc574f8c4d4470e01e377627",
+ "size": 4991,
+ "uri": "https://{canondata_backend}/1784826/f75802903846b7b25514887495c6eaa1859db07a/resource.tar.gz#test.test_pg-table_func-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[pg-table_func-default.txt-Plan]": [
+ {
+ "checksum": "bf2b0c772eaf69c15399605d7fbd7b0e",
+ "size": 14773,
+ "uri": "https://{canondata_backend}/1784826/f75802903846b7b25514887495c6eaa1859db07a/resource.tar.gz#test.test_pg-table_func-default.txt-Plan_/plan.txt"
+ }
+ ],
+ "test.test[pg-table_func-default.txt-Results]": [
+ {
+ "checksum": "db24edd3094d41f02121a7b1f3629af3",
+ "size": 9490,
+ "uri": "https://{canondata_backend}/1784826/f75802903846b7b25514887495c6eaa1859db07a/resource.tar.gz#test.test_pg-table_func-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[pg-tpcds-q20-default.txt-Debug]": [
{
"checksum": "3d98e10d734329d04d97423b4026d52d",