aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorfedor-miron <fedor-miron@yandex-team.com>2023-09-08 12:19:27 +0300
committerfedor-miron <fedor-miron@yandex-team.com>2023-09-08 13:09:52 +0300
commit5d91ce8329809af2787020c1acdb81266ff8cb6c (patch)
tree716890c82bb8bbcda0c20b752873e475997a80a7
parent96458135d7e98502174a7ea7a5e79e4198bff277 (diff)
downloadydb-5d91ce8329809af2787020c1acdb81266ff8cb6c.tar.gz
YQL-16259: auto parametrize consts in pg expressions
-rw-r--r--ydb/library/yql/ast/yql_ast.h2
-rw-r--r--ydb/library/yql/sql/pg/pg_sql.cpp68
-rw-r--r--ydb/library/yql/sql/pg/pg_sql_autoparam_ut.cpp180
-rw-r--r--ydb/library/yql/sql/pg/ut/util.h11
-rw-r--r--ydb/library/yql/sql/settings/translation_settings.h1
5 files changed, 199 insertions, 63 deletions
diff --git a/ydb/library/yql/ast/yql_ast.h b/ydb/library/yql/ast/yql_ast.h
index 40866252a42..2a2b3926354 100644
--- a/ydb/library/yql/ast/yql_ast.h
+++ b/ydb/library/yql/ast/yql_ast.h
@@ -154,7 +154,7 @@ struct TAstNode {
if (childrenCount) {
if (childrenCount > SmallListCount) {
poolChildren = pool.AllocateArray<TAstNode*>(childrenCount);
- memcpy(poolChildren, children, sizeof(TAstNode*) * childrenCount);
+ memcpy(poolChildren, children, sizeof(void*) * childrenCount);
} else {
poolChildren = children;
}
diff --git a/ydb/library/yql/sql/pg/pg_sql.cpp b/ydb/library/yql/sql/pg/pg_sql.cpp
index 5d6f32dab51..d80f314434a 100644
--- a/ydb/library/yql/sql/pg/pg_sql.cpp
+++ b/ydb/library/yql/sql/pg/pg_sql.cpp
@@ -210,7 +210,7 @@ public:
};
struct TPgConst {
- TString value;
+ TMaybe<TString> value;
enum class Type {
int4,
int8,
@@ -485,6 +485,13 @@ public:
return columnTypes;
}
+ using TAutoParamName = TString;
+ TAutoParamName AddAutoParam(Ydb::TypedValue&& val) {
+ auto nextName = TString(AUTO_PARAM_PREFIX) + ToString(AutoParamValues.size());
+ AutoParamValues.emplace(nextName, std::move(val));
+ return nextName;
+ }
+
TAstNode* MakeValuesStmtAutoParam(TVector<TPgConst>&& values, TVector<TPgConst::Type>&& columnTypes) {
TVector<Ydb::Value> ydbValues;
for (auto&& pgConst : values) {
@@ -493,7 +500,7 @@ public:
if (pgConst.type == TPgConst::Type::nil) {
literal.set_null_flag_value(NProtoBuf::NULL_VALUE);
} else {
- literal.set_text_value(std::move(pgConst.value));
+ literal.set_text_value(std::move(pgConst.value.GetRef()));
}
ydbValues.push_back(literal);
}
@@ -501,11 +508,14 @@ public:
TVector<TAstNode*> autoParamTupleType;
autoParamTupleType.reserve(columnTypes.size());
autoParamTupleType.push_back(A("TupleType"));
+ for (const auto& type : columnTypes) {
+ auto pgType = L(A("PgType"), QA(TPgConst::ToString(type)));
+ autoParamTupleType.push_back(pgType);
+ }
+ const auto paramType = L(A("ListType"), VL(autoParamTupleType));
- const auto paramName = TString(AUTO_PARAM_PREFIX) + ToString(AutoParamValues.size());
- AutoParamValues[paramName] = MakeYdbListTupleParamValue(std::move(ydbValues), std::move(columnTypes));
-
- Statements.push_back(L(A("declare"), A(paramName), L(A("ListType"), VL(autoParamTupleType))));
+ const auto paramName = AddAutoParam(MakeYdbListTupleParamValue(std::move(ydbValues), std::move(columnTypes)));
+ Statements.push_back(L(A("declare"), A(paramName), paramType));
YQL_CLOG(INFO, Default) << "Successfully autoparametrized VALUES at" << Positions.back();
@@ -2542,7 +2552,7 @@ public:
TAstNode* ParseExpr(const Node* node, const TExprSettings& settings) {
switch (NodeTag(node)) {
case T_A_Const: {
- return ParseAConst(CAST_NODE(A_Const, node));
+ return ParseAConst(CAST_NODE(A_Const, node), settings);
}
case T_A_Expr: {
return ParseAExpr(CAST_NODE(A_Expr, node), settings);
@@ -2608,7 +2618,7 @@ public:
}
case T_String: {
pgConst.value = ToString(StrVal(val));
- pgConst.type = TPgConst::Type::unknown;
+ pgConst.type = TPgConst::Type::unknown; // to support implicit casts
return pgConst;
}
case T_Null: {
@@ -2621,26 +2631,58 @@ public:
}
}
}
+
+ TAstNode* AutoParametrizeConst(TPgConst&& valueNType, TAstNode* pgType) {
+ Ydb::TypedValue typedValue;
+
+ auto oid = NPg::LookupType(TPgConst::ToString(valueNType.type)).TypeId;
+ typedValue.mutable_type()->mutable_pg_type()->set_oid(oid);
+
+ auto* value = typedValue.mutable_value();
+ if (valueNType.value) {
+ value->set_text_value(std::move(valueNType.value.GetRef()));
+ } else {
+ Y_VERIFY(valueNType.type == TPgConst::Type::unknown, "NULL is allowed to only be of unknown type");
+ value->set_null_flag_value(NProtoBuf::NULL_VALUE);
+ }
+
+ const auto& paramName = AddAutoParam(std::move(typedValue));
+ Statements.push_back(L(A("declare"), A(paramName), pgType));
+
+ YQL_CLOG(INFO, Default) << "Autoparametrized " << paramName << " at " << Positions.back();
+
+ return A(paramName);
+ }
- TAstNode* ParseAConst(const A_Const* value) {
+ TAstNode* ParseAConst(const A_Const* value, const TExprSettings& settings) {
AT_LOCATION(value);
const auto& val = value->val;
auto valueNType = GetValueNType(value);
if (!valueNType) {
return nullptr;
}
+
+ TAstNode* pgTypeNode = NodeTag(val) != T_Null
+ ? L(A("PgType"), QA(TPgConst::ToString(valueNType->type)))
+ : L(A("PgType"), QA("unknown"));
+
+ if (Settings.AutoParametrizeEnabled &&
+ Settings.AutoParametrizeEnabledScopes.contains(settings.Scope)) {
+ return AutoParametrizeConst(std::move(valueNType.GetRef()), pgTypeNode);
+ }
+
switch (NodeTag(val)) {
case T_Integer: {
- return L(A("PgConst"), QA(valueNType->value), L(A("PgType"), QA(TPgConst::ToString(valueNType->type))));
+ return L(A("PgConst"), QA(valueNType->value.GetRef()), pgTypeNode);
}
case T_Float: {
- return L(A("PgConst"), QA(valueNType->value), L(A("PgType"), QA(TPgConst::ToString(valueNType->type))));
+ return L(A("PgConst"), QA(valueNType->value.GetRef()), pgTypeNode);
}
case T_String: {
- return L(A("PgConst"), QAX(valueNType->value), L(A("PgType"), QA(TPgConst::ToString(valueNType->type))));
+ return L(A("PgConst"), QAX(valueNType->value.GetRef()), pgTypeNode);
}
case T_Null: {
- return L(A("PgCast"), L(A("Null")), L(A("PgType"), QA("unknown")));
+ return L(A("PgCast"), L(A("Null")), pgTypeNode);
}
default: {
ValueNotImplemented(value, val);
diff --git a/ydb/library/yql/sql/pg/pg_sql_autoparam_ut.cpp b/ydb/library/yql/sql/pg/pg_sql_autoparam_ut.cpp
index 258d330acce..53cc47b5646 100644
--- a/ydb/library/yql/sql/pg/pg_sql_autoparam_ut.cpp
+++ b/ydb/library/yql/sql/pg/pg_sql_autoparam_ut.cpp
@@ -8,17 +8,17 @@
using namespace NSQLTranslation;
Y_UNIT_TEST_SUITE(PgSqlParsingAutoparam) {
- Y_UNIT_TEST(AutoParamStmt_DisabledByDefault) {
+ Y_UNIT_TEST(AutoParamValues_DisabledByDefault) {
auto res = PgSqlToYql("insert into plato.Output values (1,2,3), (1,2,3)");
UNIT_ASSERT_C(res.Issues.Empty(), "Failed to parse statement, issues: " + res.Issues.ToString());
UNIT_ASSERT_C(res.PgAutoParamValues.Empty(), "Expected no auto parametrization");
}
- Y_UNIT_TEST(AutoParamStmt_DifferentTypes) {
+ Y_UNIT_TEST(AutoParamValues_DifferentTypes) {
TTranslationSettings settings;
settings.AutoParametrizeEnabled = true;
auto res = SqlToYqlWithMode(
- R"(insert into plato.Output values (1,2,3), (1,'2',3))",
+ R"(insert into plato.Output values (1,2,3), (1,2.0,3))",
NSQLTranslation::ESqlMode::QUERY,
10,
{},
@@ -30,9 +30,46 @@ Y_UNIT_TEST_SUITE(PgSqlParsingAutoparam) {
UNIT_ASSERT_C(res.PgAutoParamValues.Empty(), "Expected no auto parametrization");
}
- void TestAutoParam(const TString& query, const THashMap<TString, TString>& expectedParamNameToJsonYdbVal) {
+ using TUsedParamsGetter = std::function<void(TSet<TString>&, const NYql::TAstNode& node)>;
+
+ void GetUsedParamsInValues(TSet<TString>& usedParams, const NYql::TAstNode& node) {
+ const bool isPgSetItem =
+ node.IsListOfSize(2) && node.GetChild(0)->IsAtom()
+ && node.GetChild(0)->GetContent() == "PgSetItem";
+ if (!isPgSetItem) {
+ return;
+ }
+ const auto pgSetItemOptions = node.GetChild(1)->GetChild(1);
+
+ for (const auto* pgOption : pgSetItemOptions->GetChildren()) {
+ const bool isQuotedList =
+ pgOption->IsListOfSize(2) && pgOption->GetChild(0)->IsAtom()
+ && pgOption->GetChild(0)->GetContent() == "quote";
+ if (!isQuotedList) {
+ return;
+ }
+
+ const auto* option = pgOption->GetChild(1);
+ const auto* optionName = option->GetChild(0);
+
+ const bool isValuesNode =
+ optionName->IsListOfSize(2) && optionName->GetChild(0)->IsAtom()
+ && optionName->GetChild(0)->GetContent() == "quote"
+ && optionName->GetChild(1)->GetContent() == "values";
+ if (!isValuesNode) {
+ return;
+ }
+ const auto values = option->GetChild(2);
+ if (values->IsAtom()) {
+ usedParams.insert(TString(values->GetContent()));
+ }
+ }
+ }
+
+ void TestAutoParam(const TString& query, const THashMap<TString, TString>& expectedParamNameToJsonYdbVal, const TMap<TString, TString>& expectedParamTypes, TUsedParamsGetter usedParamsGetter, THashSet<TString> enabledParametrizeScopes = {}) {
TTranslationSettings settings;
settings.AutoParametrizeEnabled = true;
+ settings.AutoParametrizeEnabledScopes = enabledParametrizeScopes;
auto res = SqlToYqlWithMode(
query,
NSQLTranslation::ESqlMode::QUERY,
@@ -45,51 +82,23 @@ Y_UNIT_TEST_SUITE(PgSqlParsingAutoparam) {
UNIT_ASSERT_C(res.PgAutoParamValues && !res.PgAutoParamValues->empty(), "Expected auto param values");
TSet<TString> declaredParams;
- VisitAstNodes(*res.Root, [&declaredParams] (const NYql::TAstNode& node) {
+ TMap<TString, TString> actualParamTypes;
+ VisitAstNodes(*res.Root, [&declaredParams, &actualParamTypes] (const NYql::TAstNode& node) {
const bool isDeclareNode =
node.IsList() && node.GetChildrenCount() > 0
&& node.GetChild(0)->IsAtom()
&& node.GetChild(0)->GetContent() == "declare";
if (isDeclareNode) {
UNIT_ASSERT_VALUES_EQUAL(node.GetChildrenCount(), 3);
- declaredParams.insert(TString(node.GetChild(1)->GetContent()));
+ const auto name = TString(node.GetChild(1)->GetContent());
+ declaredParams.insert(name);
+ actualParamTypes[name] = node.GetChild(2)->ToString();
}
});
+ UNIT_ASSERT_VALUES_EQUAL(expectedParamTypes, actualParamTypes);
TSet<TString> usedParams;
- VisitAstNodes(*res.Root, [&usedParams] (const NYql::TAstNode& node) {
- const bool isPgSetItem =
- node.IsListOfSize(2) && node.GetChild(0)->IsAtom()
- && node.GetChild(0)->GetContent() == "PgSetItem";
- if (!isPgSetItem) {
- return;
- }
- const auto pgSetItemOptions = node.GetChild(1)->GetChild(1);
-
- for (const auto* pgOption : pgSetItemOptions->GetChildren()) {
- const bool isQuotedList =
- pgOption->IsListOfSize(2) && pgOption->GetChild(0)->IsAtom()
- && pgOption->GetChild(0)->GetContent() == "quote";
- if (!isQuotedList) {
- return;
- }
-
- const auto* option = pgOption->GetChild(1);
- const auto& optionName = option->GetChild(0);
-
- const bool isValuesNode =
- optionName->IsListOfSize(2) && optionName->GetChild(0)->IsAtom()
- && optionName->GetChild(0)->GetContent() == "quote"
- && optionName->GetChild(1)->GetContent() == "values";
- if (!isValuesNode) {
- return;
- }
- const auto values = option->GetChild(2);
- if (values->IsAtom()) {
- usedParams.insert(TString(values->GetContent()));
- }
- }
- });
+ VisitAstNodes(*res.Root, [&usedParams, &usedParamsGetter] (const auto& node) { return usedParamsGetter(usedParams, node); });
UNIT_ASSERT_VALUES_EQUAL(declaredParams, usedParams);
TMap<TString, Ydb::TypedValue> expectedParams;
@@ -109,25 +118,27 @@ Y_UNIT_TEST_SUITE(PgSqlParsingAutoparam) {
UNIT_ASSERT_VALUES_EQUAL(declaredParams.size(), expectedParams.size());
}
- Y_UNIT_TEST(AutoParamStmt_Int4) {
+ Y_UNIT_TEST(AutoParamValues_Int4) {
TString query = R"(insert into plato.Output values (1,2), (3,4), (4,5))";
TString expectedParamJson = R"(
{"type":{"list_type":{"item":{"tuple_type":{"elements":[{"pg_type":{"oid":23}},{"pg_type":{"oid":23}}]}}}},
"value":{"items":[{"items":[{"text_value":"1"},{"text_value":"2"}]},{"items":[{"text_value":"3"},{"text_value":"4"}]},{"items":[{"text_value":"4"},{"text_value":"5"}]}]}}
)";
- TestAutoParam(query, {{"a0", expectedParamJson}});
+ TString type = "(ListType (TupleType (PgType 'int4) (PgType 'int4)))";
+ TestAutoParam(query, {{"a0", expectedParamJson}}, {{"a0", type}}, GetUsedParamsInValues);
}
- Y_UNIT_TEST(AutoParamStmt_Int4Text) {
+ Y_UNIT_TEST(AutoParamValues_Int4Text) {
TString query = R"(insert into plato.Output values (1,'2'), (3,'4'))";
TString expectedParamJson = R"(
{"type":{"list_type":{"item":{"tuple_type":{"elements":[{"pg_type":{"oid":23}},{"pg_type":{"oid":705}}]}}}},
"value":{"items":[{"items":[{"text_value":"1"},{"text_value":"2"}]},{"items":[{"text_value":"3"},{"text_value":"4"}]}]}}
)";
- TestAutoParam(query, {{"a0", expectedParamJson}});
+ TString type = "(ListType (TupleType (PgType 'int4) (PgType 'unknown)))";
+ TestAutoParam(query, {{"a0", expectedParamJson}}, {{"a0", type}}, GetUsedParamsInValues);
}
- Y_UNIT_TEST(AutoParamStmt_MultipleStmts) {
+ Y_UNIT_TEST(AutoParamValues_MultipleStmts) {
TString query = R"(
insert into plato.Output values (1,'2'), (3,'4');
insert into plato.Output1 values (1.23);
@@ -140,10 +151,14 @@ Y_UNIT_TEST_SUITE(PgSqlParsingAutoparam) {
{"type":{"list_type":{"item":{"tuple_type":{"elements":[{"pg_type":{"oid":1700}}]}}}},
"value":{"items":[{"items":[{"text_value":"1.23"}]}]}}
)";
- TestAutoParam(query, {{"a0", expectedParamJson0}, {"a1", expectedParamJson1}});
+ TMap<TString, TString> expectedParamTypes {
+ {"a0", "(ListType (TupleType (PgType 'int4) (PgType 'unknown)))"},
+ {"a1", "(ListType (TupleType (PgType 'numeric)))"}
+ };
+ TestAutoParam(query, {{"a0", expectedParamJson0}, {"a1", expectedParamJson1}}, expectedParamTypes, GetUsedParamsInValues);
}
- Y_UNIT_TEST(AutoParamStmt_WithNull) {
+ Y_UNIT_TEST(AutoParamValues_WithNull) {
TString query = R"(
insert into plato.Output values (null, '2'), (3, '4')
)";
@@ -151,10 +166,11 @@ Y_UNIT_TEST_SUITE(PgSqlParsingAutoparam) {
{"type":{"list_type":{"item":{"tuple_type":{"elements":[{"pg_type":{"oid":23}},{"pg_type":{"oid":705}}]}}}},
"value":{"items":[{"items":[{"null_flag_value":"NULL_VALUE"},{"text_value":"2"}]},{"items":[{"text_value":"3"},{"text_value":"4"}]}]}}
)";
- TestAutoParam(query, {{"a0", expectedParamJson}});
+ TString type = "(ListType (TupleType (PgType 'int4) (PgType 'unknown)))";
+ TestAutoParam(query, {{"a0", expectedParamJson}}, {{"a0", type}}, GetUsedParamsInValues);
}
- Y_UNIT_TEST(AutoParamStmt_NullCol) {
+ Y_UNIT_TEST(AutoParamValues_NullCol) {
TString query = R"(
insert into plato.Output values (null,1), (null,1)
)";
@@ -162,6 +178,72 @@ Y_UNIT_TEST_SUITE(PgSqlParsingAutoparam) {
{"type":{"list_type":{"item":{"tuple_type":{"elements":[{"pg_type":{"oid":705}},{"pg_type":{"oid":23}}]}}}},
"value":{"items":[{"items":[{"null_flag_value":"NULL_VALUE"},{"text_value":"1"}]},{"items":[{"null_flag_value":"NULL_VALUE"},{"text_value":"1"}]}]}}
)";
- TestAutoParam(query, {{"a0", expectedParamJson}});
+ TString type = "(ListType (TupleType (PgType 'unknown) (PgType 'int4)))";
+ TestAutoParam(query, {{"a0", expectedParamJson}}, {{"a0", type}}, GetUsedParamsInValues);
+ }
+
+ Y_UNIT_TEST(AutoParamConsts_Where) {
+ TString query = R"(
+ select * from plato.Output where key > 1
+ )";
+ TString expectedParamJson = R"(
+ {"type":{"pg_type": {"oid": 23}},
+ "value":{"text_value": "1"}}
+ )";
+ THashSet<TString> enabledScopes {"WHERE"};
+
+ // We expect: (PgOp '">" (PgColumnRef '"key") a0)
+ const TUsedParamsGetter usedInWhereComp = [] (TSet<TString>& usedParams, const NYql::TAstNode& node) {
+ const auto maybeQuote = MaybeGetQuotedValue(node);
+ if (!maybeQuote) {
+ return;
+ }
+ const auto quotedVal = maybeQuote.GetRef();
+ const bool isWhere =
+ quotedVal->IsListOfSize(2) && quotedVal->GetChild(1)->IsListOfSize(3)
+ && quotedVal->GetChild(1)->IsListOfSize(3)
+ && quotedVal->GetChild(1)->GetChild(0)->IsAtom()
+ && quotedVal->GetChild(1)->GetChild(0)->GetContent() == "PgWhere";
+
+ if (!isWhere) {
+ return;
+ }
+ const auto* whereCallable = quotedVal->GetChild(1);
+
+ const auto* whereLambda = whereCallable->GetChild(2);
+ const auto* pgOp = whereLambda->GetChild(2);
+ const bool isBinaryOp = pgOp->IsListOfSize(4);
+ if (!isBinaryOp) {
+ return;
+ }
+ const auto* pgBinOpSecondArg = pgOp->GetChild(3);
+ usedParams.insert(TString(pgBinOpSecondArg->GetContent()));
+ };
+ TString type = "(PgType 'int4)";
+ TestAutoParam(query, {{"a0", expectedParamJson}}, {{"a0", type}}, usedInWhereComp, enabledScopes);
}
+
+ Y_UNIT_TEST(AutoParamConsts_Select) {
+ TString query = R"(
+ select 1, 'test'
+ )";
+ TString expectedParamJsonInt4 = R"(
+ {"type":{"pg_type": {"oid": 23}},
+ "value":{"text_value": "1"}}
+ )";
+ TString expectedParamJsonText = R"(
+ {"type":{"pg_type": {"oid": 705}},
+ "value":{"text_value": "test"}}
+ )";
+ THashSet<TString> enabledScopes {"SELECT"};
+ const TUsedParamsGetter dummyGetter = [] (TSet<TString>& usedParams, const NYql::TAstNode&) {
+ usedParams = {"a0", "a1"};
+ };
+ TMap<TString, TString> expectedParamTypes {
+ {"a0", "(PgType 'int4)"},
+ {"a1", "(PgType 'unknown)"},
+ };
+ TestAutoParam(query, {{"a0", expectedParamJsonInt4}, {"a1", expectedParamJsonText}}, expectedParamTypes, dummyGetter, enabledScopes);
+ }
+
}
diff --git a/ydb/library/yql/sql/pg/ut/util.h b/ydb/library/yql/sql/pg/ut/util.h
index ace07033a63..19f636483c1 100644
--- a/ydb/library/yql/sql/pg/ut/util.h
+++ b/ydb/library/yql/sql/pg/ut/util.h
@@ -59,3 +59,14 @@ inline void VisitAstNodes(const NYql::TAstNode& root, const TAstNodeVisitFunc& v
VisitAstNodes(*root.GetChild(childIdx), visitFunc);
}
}
+
+
+inline TMaybe<const NYql::TAstNode*> MaybeGetQuotedValue(const NYql::TAstNode& node) {
+ const bool isQuotedList =
+ node.IsListOfSize(2) && node.GetChild(0)->IsAtom()
+ && node.GetChild(0)->GetContent() == "quote";
+ if (isQuotedList) {
+ return node.GetChild(1);
+ }
+ return {};
+}
diff --git a/ydb/library/yql/sql/settings/translation_settings.h b/ydb/library/yql/sql/settings/translation_settings.h
index 7029acfa299..76f626e0c92 100644
--- a/ydb/library/yql/sql/settings/translation_settings.h
+++ b/ydb/library/yql/sql/settings/translation_settings.h
@@ -107,6 +107,7 @@ namespace NSQLTranslation {
TVector<ui32> PgParameterTypeOids;
bool AutoParametrizeEnabled = false;
+ THashSet<TString> AutoParametrizeEnabledScopes = {};
};
bool ParseTranslationSettings(const TString& query, NSQLTranslation::TTranslationSettings& settings, NYql::TIssues& issues);