diff options
author | fedor-miron <fedor-miron@yandex-team.com> | 2023-09-08 12:19:27 +0300 |
---|---|---|
committer | fedor-miron <fedor-miron@yandex-team.com> | 2023-09-08 13:09:52 +0300 |
commit | 5d91ce8329809af2787020c1acdb81266ff8cb6c (patch) | |
tree | 716890c82bb8bbcda0c20b752873e475997a80a7 | |
parent | 96458135d7e98502174a7ea7a5e79e4198bff277 (diff) | |
download | ydb-5d91ce8329809af2787020c1acdb81266ff8cb6c.tar.gz |
YQL-16259: auto parametrize consts in pg expressions
-rw-r--r-- | ydb/library/yql/ast/yql_ast.h | 2 | ||||
-rw-r--r-- | ydb/library/yql/sql/pg/pg_sql.cpp | 68 | ||||
-rw-r--r-- | ydb/library/yql/sql/pg/pg_sql_autoparam_ut.cpp | 180 | ||||
-rw-r--r-- | ydb/library/yql/sql/pg/ut/util.h | 11 | ||||
-rw-r--r-- | ydb/library/yql/sql/settings/translation_settings.h | 1 |
5 files changed, 199 insertions, 63 deletions
diff --git a/ydb/library/yql/ast/yql_ast.h b/ydb/library/yql/ast/yql_ast.h index 40866252a42..2a2b3926354 100644 --- a/ydb/library/yql/ast/yql_ast.h +++ b/ydb/library/yql/ast/yql_ast.h @@ -154,7 +154,7 @@ struct TAstNode { if (childrenCount) { if (childrenCount > SmallListCount) { poolChildren = pool.AllocateArray<TAstNode*>(childrenCount); - memcpy(poolChildren, children, sizeof(TAstNode*) * childrenCount); + memcpy(poolChildren, children, sizeof(void*) * childrenCount); } else { poolChildren = children; } diff --git a/ydb/library/yql/sql/pg/pg_sql.cpp b/ydb/library/yql/sql/pg/pg_sql.cpp index 5d6f32dab51..d80f314434a 100644 --- a/ydb/library/yql/sql/pg/pg_sql.cpp +++ b/ydb/library/yql/sql/pg/pg_sql.cpp @@ -210,7 +210,7 @@ public: }; struct TPgConst { - TString value; + TMaybe<TString> value; enum class Type { int4, int8, @@ -485,6 +485,13 @@ public: return columnTypes; } + using TAutoParamName = TString; + TAutoParamName AddAutoParam(Ydb::TypedValue&& val) { + auto nextName = TString(AUTO_PARAM_PREFIX) + ToString(AutoParamValues.size()); + AutoParamValues.emplace(nextName, std::move(val)); + return nextName; + } + TAstNode* MakeValuesStmtAutoParam(TVector<TPgConst>&& values, TVector<TPgConst::Type>&& columnTypes) { TVector<Ydb::Value> ydbValues; for (auto&& pgConst : values) { @@ -493,7 +500,7 @@ public: if (pgConst.type == TPgConst::Type::nil) { literal.set_null_flag_value(NProtoBuf::NULL_VALUE); } else { - literal.set_text_value(std::move(pgConst.value)); + literal.set_text_value(std::move(pgConst.value.GetRef())); } ydbValues.push_back(literal); } @@ -501,11 +508,14 @@ public: TVector<TAstNode*> autoParamTupleType; autoParamTupleType.reserve(columnTypes.size()); autoParamTupleType.push_back(A("TupleType")); + for (const auto& type : columnTypes) { + auto pgType = L(A("PgType"), QA(TPgConst::ToString(type))); + autoParamTupleType.push_back(pgType); + } + const auto paramType = L(A("ListType"), VL(autoParamTupleType)); - const auto paramName = TString(AUTO_PARAM_PREFIX) + ToString(AutoParamValues.size()); - AutoParamValues[paramName] = MakeYdbListTupleParamValue(std::move(ydbValues), std::move(columnTypes)); - - Statements.push_back(L(A("declare"), A(paramName), L(A("ListType"), VL(autoParamTupleType)))); + const auto paramName = AddAutoParam(MakeYdbListTupleParamValue(std::move(ydbValues), std::move(columnTypes))); + Statements.push_back(L(A("declare"), A(paramName), paramType)); YQL_CLOG(INFO, Default) << "Successfully autoparametrized VALUES at" << Positions.back(); @@ -2542,7 +2552,7 @@ public: TAstNode* ParseExpr(const Node* node, const TExprSettings& settings) { switch (NodeTag(node)) { case T_A_Const: { - return ParseAConst(CAST_NODE(A_Const, node)); + return ParseAConst(CAST_NODE(A_Const, node), settings); } case T_A_Expr: { return ParseAExpr(CAST_NODE(A_Expr, node), settings); @@ -2608,7 +2618,7 @@ public: } case T_String: { pgConst.value = ToString(StrVal(val)); - pgConst.type = TPgConst::Type::unknown; + pgConst.type = TPgConst::Type::unknown; // to support implicit casts return pgConst; } case T_Null: { @@ -2621,26 +2631,58 @@ public: } } } + + TAstNode* AutoParametrizeConst(TPgConst&& valueNType, TAstNode* pgType) { + Ydb::TypedValue typedValue; + + auto oid = NPg::LookupType(TPgConst::ToString(valueNType.type)).TypeId; + typedValue.mutable_type()->mutable_pg_type()->set_oid(oid); + + auto* value = typedValue.mutable_value(); + if (valueNType.value) { + value->set_text_value(std::move(valueNType.value.GetRef())); + } else { + Y_VERIFY(valueNType.type == TPgConst::Type::unknown, "NULL is allowed to only be of unknown type"); + value->set_null_flag_value(NProtoBuf::NULL_VALUE); + } + + const auto& paramName = AddAutoParam(std::move(typedValue)); + Statements.push_back(L(A("declare"), A(paramName), pgType)); + + YQL_CLOG(INFO, Default) << "Autoparametrized " << paramName << " at " << Positions.back(); + + return A(paramName); + } - TAstNode* ParseAConst(const A_Const* value) { + TAstNode* ParseAConst(const A_Const* value, const TExprSettings& settings) { AT_LOCATION(value); const auto& val = value->val; auto valueNType = GetValueNType(value); if (!valueNType) { return nullptr; } + + TAstNode* pgTypeNode = NodeTag(val) != T_Null + ? L(A("PgType"), QA(TPgConst::ToString(valueNType->type))) + : L(A("PgType"), QA("unknown")); + + if (Settings.AutoParametrizeEnabled && + Settings.AutoParametrizeEnabledScopes.contains(settings.Scope)) { + return AutoParametrizeConst(std::move(valueNType.GetRef()), pgTypeNode); + } + switch (NodeTag(val)) { case T_Integer: { - return L(A("PgConst"), QA(valueNType->value), L(A("PgType"), QA(TPgConst::ToString(valueNType->type)))); + return L(A("PgConst"), QA(valueNType->value.GetRef()), pgTypeNode); } case T_Float: { - return L(A("PgConst"), QA(valueNType->value), L(A("PgType"), QA(TPgConst::ToString(valueNType->type)))); + return L(A("PgConst"), QA(valueNType->value.GetRef()), pgTypeNode); } case T_String: { - return L(A("PgConst"), QAX(valueNType->value), L(A("PgType"), QA(TPgConst::ToString(valueNType->type)))); + return L(A("PgConst"), QAX(valueNType->value.GetRef()), pgTypeNode); } case T_Null: { - return L(A("PgCast"), L(A("Null")), L(A("PgType"), QA("unknown"))); + return L(A("PgCast"), L(A("Null")), pgTypeNode); } default: { ValueNotImplemented(value, val); diff --git a/ydb/library/yql/sql/pg/pg_sql_autoparam_ut.cpp b/ydb/library/yql/sql/pg/pg_sql_autoparam_ut.cpp index 258d330acce..53cc47b5646 100644 --- a/ydb/library/yql/sql/pg/pg_sql_autoparam_ut.cpp +++ b/ydb/library/yql/sql/pg/pg_sql_autoparam_ut.cpp @@ -8,17 +8,17 @@ using namespace NSQLTranslation; Y_UNIT_TEST_SUITE(PgSqlParsingAutoparam) { - Y_UNIT_TEST(AutoParamStmt_DisabledByDefault) { + Y_UNIT_TEST(AutoParamValues_DisabledByDefault) { auto res = PgSqlToYql("insert into plato.Output values (1,2,3), (1,2,3)"); UNIT_ASSERT_C(res.Issues.Empty(), "Failed to parse statement, issues: " + res.Issues.ToString()); UNIT_ASSERT_C(res.PgAutoParamValues.Empty(), "Expected no auto parametrization"); } - Y_UNIT_TEST(AutoParamStmt_DifferentTypes) { + Y_UNIT_TEST(AutoParamValues_DifferentTypes) { TTranslationSettings settings; settings.AutoParametrizeEnabled = true; auto res = SqlToYqlWithMode( - R"(insert into plato.Output values (1,2,3), (1,'2',3))", + R"(insert into plato.Output values (1,2,3), (1,2.0,3))", NSQLTranslation::ESqlMode::QUERY, 10, {}, @@ -30,9 +30,46 @@ Y_UNIT_TEST_SUITE(PgSqlParsingAutoparam) { UNIT_ASSERT_C(res.PgAutoParamValues.Empty(), "Expected no auto parametrization"); } - void TestAutoParam(const TString& query, const THashMap<TString, TString>& expectedParamNameToJsonYdbVal) { + using TUsedParamsGetter = std::function<void(TSet<TString>&, const NYql::TAstNode& node)>; + + void GetUsedParamsInValues(TSet<TString>& usedParams, const NYql::TAstNode& node) { + const bool isPgSetItem = + node.IsListOfSize(2) && node.GetChild(0)->IsAtom() + && node.GetChild(0)->GetContent() == "PgSetItem"; + if (!isPgSetItem) { + return; + } + const auto pgSetItemOptions = node.GetChild(1)->GetChild(1); + + for (const auto* pgOption : pgSetItemOptions->GetChildren()) { + const bool isQuotedList = + pgOption->IsListOfSize(2) && pgOption->GetChild(0)->IsAtom() + && pgOption->GetChild(0)->GetContent() == "quote"; + if (!isQuotedList) { + return; + } + + const auto* option = pgOption->GetChild(1); + const auto* optionName = option->GetChild(0); + + const bool isValuesNode = + optionName->IsListOfSize(2) && optionName->GetChild(0)->IsAtom() + && optionName->GetChild(0)->GetContent() == "quote" + && optionName->GetChild(1)->GetContent() == "values"; + if (!isValuesNode) { + return; + } + const auto values = option->GetChild(2); + if (values->IsAtom()) { + usedParams.insert(TString(values->GetContent())); + } + } + } + + void TestAutoParam(const TString& query, const THashMap<TString, TString>& expectedParamNameToJsonYdbVal, const TMap<TString, TString>& expectedParamTypes, TUsedParamsGetter usedParamsGetter, THashSet<TString> enabledParametrizeScopes = {}) { TTranslationSettings settings; settings.AutoParametrizeEnabled = true; + settings.AutoParametrizeEnabledScopes = enabledParametrizeScopes; auto res = SqlToYqlWithMode( query, NSQLTranslation::ESqlMode::QUERY, @@ -45,51 +82,23 @@ Y_UNIT_TEST_SUITE(PgSqlParsingAutoparam) { UNIT_ASSERT_C(res.PgAutoParamValues && !res.PgAutoParamValues->empty(), "Expected auto param values"); TSet<TString> declaredParams; - VisitAstNodes(*res.Root, [&declaredParams] (const NYql::TAstNode& node) { + TMap<TString, TString> actualParamTypes; + VisitAstNodes(*res.Root, [&declaredParams, &actualParamTypes] (const NYql::TAstNode& node) { const bool isDeclareNode = node.IsList() && node.GetChildrenCount() > 0 && node.GetChild(0)->IsAtom() && node.GetChild(0)->GetContent() == "declare"; if (isDeclareNode) { UNIT_ASSERT_VALUES_EQUAL(node.GetChildrenCount(), 3); - declaredParams.insert(TString(node.GetChild(1)->GetContent())); + const auto name = TString(node.GetChild(1)->GetContent()); + declaredParams.insert(name); + actualParamTypes[name] = node.GetChild(2)->ToString(); } }); + UNIT_ASSERT_VALUES_EQUAL(expectedParamTypes, actualParamTypes); TSet<TString> usedParams; - VisitAstNodes(*res.Root, [&usedParams] (const NYql::TAstNode& node) { - const bool isPgSetItem = - node.IsListOfSize(2) && node.GetChild(0)->IsAtom() - && node.GetChild(0)->GetContent() == "PgSetItem"; - if (!isPgSetItem) { - return; - } - const auto pgSetItemOptions = node.GetChild(1)->GetChild(1); - - for (const auto* pgOption : pgSetItemOptions->GetChildren()) { - const bool isQuotedList = - pgOption->IsListOfSize(2) && pgOption->GetChild(0)->IsAtom() - && pgOption->GetChild(0)->GetContent() == "quote"; - if (!isQuotedList) { - return; - } - - const auto* option = pgOption->GetChild(1); - const auto& optionName = option->GetChild(0); - - const bool isValuesNode = - optionName->IsListOfSize(2) && optionName->GetChild(0)->IsAtom() - && optionName->GetChild(0)->GetContent() == "quote" - && optionName->GetChild(1)->GetContent() == "values"; - if (!isValuesNode) { - return; - } - const auto values = option->GetChild(2); - if (values->IsAtom()) { - usedParams.insert(TString(values->GetContent())); - } - } - }); + VisitAstNodes(*res.Root, [&usedParams, &usedParamsGetter] (const auto& node) { return usedParamsGetter(usedParams, node); }); UNIT_ASSERT_VALUES_EQUAL(declaredParams, usedParams); TMap<TString, Ydb::TypedValue> expectedParams; @@ -109,25 +118,27 @@ Y_UNIT_TEST_SUITE(PgSqlParsingAutoparam) { UNIT_ASSERT_VALUES_EQUAL(declaredParams.size(), expectedParams.size()); } - Y_UNIT_TEST(AutoParamStmt_Int4) { + Y_UNIT_TEST(AutoParamValues_Int4) { TString query = R"(insert into plato.Output values (1,2), (3,4), (4,5))"; TString expectedParamJson = R"( {"type":{"list_type":{"item":{"tuple_type":{"elements":[{"pg_type":{"oid":23}},{"pg_type":{"oid":23}}]}}}}, "value":{"items":[{"items":[{"text_value":"1"},{"text_value":"2"}]},{"items":[{"text_value":"3"},{"text_value":"4"}]},{"items":[{"text_value":"4"},{"text_value":"5"}]}]}} )"; - TestAutoParam(query, {{"a0", expectedParamJson}}); + TString type = "(ListType (TupleType (PgType 'int4) (PgType 'int4)))"; + TestAutoParam(query, {{"a0", expectedParamJson}}, {{"a0", type}}, GetUsedParamsInValues); } - Y_UNIT_TEST(AutoParamStmt_Int4Text) { + Y_UNIT_TEST(AutoParamValues_Int4Text) { TString query = R"(insert into plato.Output values (1,'2'), (3,'4'))"; TString expectedParamJson = R"( {"type":{"list_type":{"item":{"tuple_type":{"elements":[{"pg_type":{"oid":23}},{"pg_type":{"oid":705}}]}}}}, "value":{"items":[{"items":[{"text_value":"1"},{"text_value":"2"}]},{"items":[{"text_value":"3"},{"text_value":"4"}]}]}} )"; - TestAutoParam(query, {{"a0", expectedParamJson}}); + TString type = "(ListType (TupleType (PgType 'int4) (PgType 'unknown)))"; + TestAutoParam(query, {{"a0", expectedParamJson}}, {{"a0", type}}, GetUsedParamsInValues); } - Y_UNIT_TEST(AutoParamStmt_MultipleStmts) { + Y_UNIT_TEST(AutoParamValues_MultipleStmts) { TString query = R"( insert into plato.Output values (1,'2'), (3,'4'); insert into plato.Output1 values (1.23); @@ -140,10 +151,14 @@ Y_UNIT_TEST_SUITE(PgSqlParsingAutoparam) { {"type":{"list_type":{"item":{"tuple_type":{"elements":[{"pg_type":{"oid":1700}}]}}}}, "value":{"items":[{"items":[{"text_value":"1.23"}]}]}} )"; - TestAutoParam(query, {{"a0", expectedParamJson0}, {"a1", expectedParamJson1}}); + TMap<TString, TString> expectedParamTypes { + {"a0", "(ListType (TupleType (PgType 'int4) (PgType 'unknown)))"}, + {"a1", "(ListType (TupleType (PgType 'numeric)))"} + }; + TestAutoParam(query, {{"a0", expectedParamJson0}, {"a1", expectedParamJson1}}, expectedParamTypes, GetUsedParamsInValues); } - Y_UNIT_TEST(AutoParamStmt_WithNull) { + Y_UNIT_TEST(AutoParamValues_WithNull) { TString query = R"( insert into plato.Output values (null, '2'), (3, '4') )"; @@ -151,10 +166,11 @@ Y_UNIT_TEST_SUITE(PgSqlParsingAutoparam) { {"type":{"list_type":{"item":{"tuple_type":{"elements":[{"pg_type":{"oid":23}},{"pg_type":{"oid":705}}]}}}}, "value":{"items":[{"items":[{"null_flag_value":"NULL_VALUE"},{"text_value":"2"}]},{"items":[{"text_value":"3"},{"text_value":"4"}]}]}} )"; - TestAutoParam(query, {{"a0", expectedParamJson}}); + TString type = "(ListType (TupleType (PgType 'int4) (PgType 'unknown)))"; + TestAutoParam(query, {{"a0", expectedParamJson}}, {{"a0", type}}, GetUsedParamsInValues); } - Y_UNIT_TEST(AutoParamStmt_NullCol) { + Y_UNIT_TEST(AutoParamValues_NullCol) { TString query = R"( insert into plato.Output values (null,1), (null,1) )"; @@ -162,6 +178,72 @@ Y_UNIT_TEST_SUITE(PgSqlParsingAutoparam) { {"type":{"list_type":{"item":{"tuple_type":{"elements":[{"pg_type":{"oid":705}},{"pg_type":{"oid":23}}]}}}}, "value":{"items":[{"items":[{"null_flag_value":"NULL_VALUE"},{"text_value":"1"}]},{"items":[{"null_flag_value":"NULL_VALUE"},{"text_value":"1"}]}]}} )"; - TestAutoParam(query, {{"a0", expectedParamJson}}); + TString type = "(ListType (TupleType (PgType 'unknown) (PgType 'int4)))"; + TestAutoParam(query, {{"a0", expectedParamJson}}, {{"a0", type}}, GetUsedParamsInValues); + } + + Y_UNIT_TEST(AutoParamConsts_Where) { + TString query = R"( + select * from plato.Output where key > 1 + )"; + TString expectedParamJson = R"( + {"type":{"pg_type": {"oid": 23}}, + "value":{"text_value": "1"}} + )"; + THashSet<TString> enabledScopes {"WHERE"}; + + // We expect: (PgOp '">" (PgColumnRef '"key") a0) + const TUsedParamsGetter usedInWhereComp = [] (TSet<TString>& usedParams, const NYql::TAstNode& node) { + const auto maybeQuote = MaybeGetQuotedValue(node); + if (!maybeQuote) { + return; + } + const auto quotedVal = maybeQuote.GetRef(); + const bool isWhere = + quotedVal->IsListOfSize(2) && quotedVal->GetChild(1)->IsListOfSize(3) + && quotedVal->GetChild(1)->IsListOfSize(3) + && quotedVal->GetChild(1)->GetChild(0)->IsAtom() + && quotedVal->GetChild(1)->GetChild(0)->GetContent() == "PgWhere"; + + if (!isWhere) { + return; + } + const auto* whereCallable = quotedVal->GetChild(1); + + const auto* whereLambda = whereCallable->GetChild(2); + const auto* pgOp = whereLambda->GetChild(2); + const bool isBinaryOp = pgOp->IsListOfSize(4); + if (!isBinaryOp) { + return; + } + const auto* pgBinOpSecondArg = pgOp->GetChild(3); + usedParams.insert(TString(pgBinOpSecondArg->GetContent())); + }; + TString type = "(PgType 'int4)"; + TestAutoParam(query, {{"a0", expectedParamJson}}, {{"a0", type}}, usedInWhereComp, enabledScopes); } + + Y_UNIT_TEST(AutoParamConsts_Select) { + TString query = R"( + select 1, 'test' + )"; + TString expectedParamJsonInt4 = R"( + {"type":{"pg_type": {"oid": 23}}, + "value":{"text_value": "1"}} + )"; + TString expectedParamJsonText = R"( + {"type":{"pg_type": {"oid": 705}}, + "value":{"text_value": "test"}} + )"; + THashSet<TString> enabledScopes {"SELECT"}; + const TUsedParamsGetter dummyGetter = [] (TSet<TString>& usedParams, const NYql::TAstNode&) { + usedParams = {"a0", "a1"}; + }; + TMap<TString, TString> expectedParamTypes { + {"a0", "(PgType 'int4)"}, + {"a1", "(PgType 'unknown)"}, + }; + TestAutoParam(query, {{"a0", expectedParamJsonInt4}, {"a1", expectedParamJsonText}}, expectedParamTypes, dummyGetter, enabledScopes); + } + } diff --git a/ydb/library/yql/sql/pg/ut/util.h b/ydb/library/yql/sql/pg/ut/util.h index ace07033a63..19f636483c1 100644 --- a/ydb/library/yql/sql/pg/ut/util.h +++ b/ydb/library/yql/sql/pg/ut/util.h @@ -59,3 +59,14 @@ inline void VisitAstNodes(const NYql::TAstNode& root, const TAstNodeVisitFunc& v VisitAstNodes(*root.GetChild(childIdx), visitFunc); } } + + +inline TMaybe<const NYql::TAstNode*> MaybeGetQuotedValue(const NYql::TAstNode& node) { + const bool isQuotedList = + node.IsListOfSize(2) && node.GetChild(0)->IsAtom() + && node.GetChild(0)->GetContent() == "quote"; + if (isQuotedList) { + return node.GetChild(1); + } + return {}; +} diff --git a/ydb/library/yql/sql/settings/translation_settings.h b/ydb/library/yql/sql/settings/translation_settings.h index 7029acfa299..76f626e0c92 100644 --- a/ydb/library/yql/sql/settings/translation_settings.h +++ b/ydb/library/yql/sql/settings/translation_settings.h @@ -107,6 +107,7 @@ namespace NSQLTranslation { TVector<ui32> PgParameterTypeOids; bool AutoParametrizeEnabled = false; + THashSet<TString> AutoParametrizeEnabledScopes = {}; }; bool ParseTranslationSettings(const TString& query, NSQLTranslation::TTranslationSettings& settings, NYql::TIssues& issues); |