diff options
author | fedor-miron <fedor-miron@yandex-team.com> | 2023-08-28 11:30:32 +0300 |
---|---|---|
committer | fedor-miron <fedor-miron@yandex-team.com> | 2023-08-28 11:44:39 +0300 |
commit | 8deda63a4dadb4d18d6e0f4d0dfcb1de03140ab0 (patch) | |
tree | 299afe5f458ac873e496377a79c23509257b611f | |
parent | 1a01c1b3371f79b0ab44e91d1c0af4763b8f6c80 (diff) | |
download | ydb-8deda63a4dadb4d18d6e0f4d0dfcb1de03140ab0.tar.gz |
YQL-16258: automatically parametrize constants in VALUES stmt
21 files changed, 563 insertions, 133 deletions
diff --git a/ydb/library/yql/ast/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/ast/CMakeLists.darwin-x86_64.txt index b486ffda55..702a50bcc6 100644 --- a/ydb/library/yql/ast/CMakeLists.darwin-x86_64.txt +++ b/ydb/library/yql/ast/CMakeLists.darwin-x86_64.txt @@ -27,6 +27,7 @@ target_link_libraries(library-yql-ast PUBLIC yql-core-issue core-url_lister-interface yql-parser-pg_catalog + api-protos ) target_sources(library-yql-ast PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/ast/yql_ast.cpp diff --git a/ydb/library/yql/ast/CMakeLists.linux-aarch64.txt b/ydb/library/yql/ast/CMakeLists.linux-aarch64.txt index 6a823d859e..47978239de 100644 --- a/ydb/library/yql/ast/CMakeLists.linux-aarch64.txt +++ b/ydb/library/yql/ast/CMakeLists.linux-aarch64.txt @@ -28,6 +28,7 @@ target_link_libraries(library-yql-ast PUBLIC yql-core-issue core-url_lister-interface yql-parser-pg_catalog + api-protos ) target_sources(library-yql-ast PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/ast/yql_ast.cpp diff --git a/ydb/library/yql/ast/CMakeLists.linux-x86_64.txt b/ydb/library/yql/ast/CMakeLists.linux-x86_64.txt index 6a823d859e..47978239de 100644 --- a/ydb/library/yql/ast/CMakeLists.linux-x86_64.txt +++ b/ydb/library/yql/ast/CMakeLists.linux-x86_64.txt @@ -28,6 +28,7 @@ target_link_libraries(library-yql-ast PUBLIC yql-core-issue core-url_lister-interface yql-parser-pg_catalog + api-protos ) target_sources(library-yql-ast PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/ast/yql_ast.cpp diff --git a/ydb/library/yql/ast/CMakeLists.windows-x86_64.txt b/ydb/library/yql/ast/CMakeLists.windows-x86_64.txt index b486ffda55..702a50bcc6 100644 --- a/ydb/library/yql/ast/CMakeLists.windows-x86_64.txt +++ b/ydb/library/yql/ast/CMakeLists.windows-x86_64.txt @@ -27,6 +27,7 @@ target_link_libraries(library-yql-ast PUBLIC yql-core-issue core-url_lister-interface yql-parser-pg_catalog + api-protos ) target_sources(library-yql-ast PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/ast/yql_ast.cpp diff --git a/ydb/library/yql/ast/ya.make b/ydb/library/yql/ast/ya.make index ea0cd2fd3e..9fed20bea3 100644 --- a/ydb/library/yql/ast/ya.make +++ b/ydb/library/yql/ast/ya.make @@ -38,6 +38,7 @@ PEERDIR( ydb/library/yql/core/issue ydb/library/yql/core/url_lister/interface ydb/library/yql/parser/pg_catalog + ydb/public/api/protos ) END() diff --git a/ydb/library/yql/ast/yql_ast.h b/ydb/library/yql/ast/yql_ast.h index 5f9312f560..40866252a4 100644 --- a/ydb/library/yql/ast/yql_ast.h +++ b/ydb/library/yql/ast/yql_ast.h @@ -1,5 +1,6 @@ #pragma once +#include "ydb/public/api/protos/ydb_value.pb.h" #include "yql_errors.h" #include <library/cpp/deprecated/enum_codegen/enum_codegen.h> @@ -129,7 +130,7 @@ struct TAstNode { } } - inline TArrayRef<TAstNode*> GetChildren() { + inline TArrayRef<TAstNode* const> GetChildren() const { Y_VERIFY(IsList()); return {ListCount <= SmallListCount ? Data.S.Children : Data.L.Children, ListCount}; } @@ -244,6 +245,7 @@ struct TAstParseResult { std::unique_ptr<TMemoryPool> Pool; TAstNode* Root = nullptr; TIssues Issues; + TMaybe<THashMap<TString, Ydb::TypedValue>> PgAutoParamValues = Nothing(); inline bool IsOk() const { return !!Root; diff --git a/ydb/library/yql/sql/pg/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/sql/pg/CMakeLists.darwin-x86_64.txt index 84681e1679..709a613678 100644 --- a/ydb/library/yql/sql/pg/CMakeLists.darwin-x86_64.txt +++ b/ydb/library/yql/sql/pg/CMakeLists.darwin-x86_64.txt @@ -25,6 +25,7 @@ target_link_libraries(yql-sql-pg PUBLIC library-yql-core yql-parser-pg_catalog yql-sql-settings + api-protos ) target_sources(yql-sql-pg PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/pg_sql.cpp diff --git a/ydb/library/yql/sql/pg/CMakeLists.linux-aarch64.txt b/ydb/library/yql/sql/pg/CMakeLists.linux-aarch64.txt index 73f54b66fa..a158724b0f 100644 --- a/ydb/library/yql/sql/pg/CMakeLists.linux-aarch64.txt +++ b/ydb/library/yql/sql/pg/CMakeLists.linux-aarch64.txt @@ -26,6 +26,7 @@ target_link_libraries(yql-sql-pg PUBLIC library-yql-core yql-parser-pg_catalog yql-sql-settings + api-protos ) target_sources(yql-sql-pg PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/pg_sql.cpp diff --git a/ydb/library/yql/sql/pg/CMakeLists.linux-x86_64.txt b/ydb/library/yql/sql/pg/CMakeLists.linux-x86_64.txt index 73f54b66fa..a158724b0f 100644 --- a/ydb/library/yql/sql/pg/CMakeLists.linux-x86_64.txt +++ b/ydb/library/yql/sql/pg/CMakeLists.linux-x86_64.txt @@ -26,6 +26,7 @@ target_link_libraries(yql-sql-pg PUBLIC library-yql-core yql-parser-pg_catalog yql-sql-settings + api-protos ) target_sources(yql-sql-pg PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/pg_sql.cpp diff --git a/ydb/library/yql/sql/pg/CMakeLists.windows-x86_64.txt b/ydb/library/yql/sql/pg/CMakeLists.windows-x86_64.txt index 15dfee3761..25a4744632 100644 --- a/ydb/library/yql/sql/pg/CMakeLists.windows-x86_64.txt +++ b/ydb/library/yql/sql/pg/CMakeLists.windows-x86_64.txt @@ -28,6 +28,7 @@ target_link_libraries(yql-sql-pg PUBLIC library-yql-core yql-parser-pg_catalog yql-sql-settings + api-protos ) target_sources(yql-sql-pg PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/pg_sql.cpp diff --git a/ydb/library/yql/sql/pg/pg_sql.cpp b/ydb/library/yql/sql/pg/pg_sql.cpp index f14f6cb3ff..e92b34b676 100644 --- a/ydb/library/yql/sql/pg/pg_sql.cpp +++ b/ydb/library/yql/sql/pg/pg_sql.cpp @@ -1,5 +1,6 @@ #include "util/charset/utf8.h" #include "utils.h" +#include "ydb/public/api/protos/ydb_value.pb.h" #include <ydb/library/yql/sql/settings/partitioning.h> #include <ydb/library/yql/parser/pg_wrapper/interface/config.h> #include <ydb/library/yql/parser/pg_wrapper/interface/parser.h> @@ -11,6 +12,8 @@ #include <ydb/library/yql/core/issue/yql_issue.h> #include <ydb/library/yql/core/yql_callable_names.h> #include <ydb/library/yql/parser/pg_catalog/catalog.h> +#include <ydb/library/yql/utils/log/log_level.h> +#include <ydb/library/yql/utils/log/log.h> #include <util/string/builder.h> #include <util/string/cast.h> #include <util/generic/scope.h> @@ -33,9 +36,18 @@ extern "C" { #undef Max #undef TypeName #undef SortBy + +#undef TRACE +#undef INFO +#undef WARNING +#undef ERROR +#undef FATAL +#undef NOTICE } constexpr auto PREPARED_PARAM_PREFIX = "$p"; +constexpr auto AUTO_PARAM_PREFIX = "a"; +constexpr auto DEFAULT_PARAM_TYPE = "text"; namespace NSQLTranslationPG { @@ -196,6 +208,34 @@ public: TVector<TString> ColNames; TAstNode* Source = nullptr; }; + + struct TPgConst { + TString value; + enum class Type { + int4, + int8, + numeric, + text, + unknown + }; + + static TString ToString(const TPgConst::Type& type) { + switch (type) { + case TPgConst::Type::int4: + return "int4"; + case TPgConst::Type::int8: + return "int8"; + case TPgConst::Type::numeric: + return "numeric"; + case TPgConst::Type::text: + return "text"; + case TPgConst::Type::unknown: + return "unknown"; + } + } + + Type type; + }; using TViews = THashMap<TString, TView>; @@ -225,14 +265,20 @@ public: } for (size_t i = 0; i < Settings.PgParameterTypeOids.size(); ++i) { - auto paramName = PREPARED_PARAM_PREFIX + ToString(i + 1); - ParamNameToTypeOid[paramName] = Settings.PgParameterTypeOids[i]; + const auto paramName = PREPARED_PARAM_PREFIX + ToString(i + 1); + const auto typeOid = Settings.PgParameterTypeOids[i]; + const auto& typeName = + typeOid != UNKNOWNOID ? NPg::LookupType(typeOid).Name : "text"; + ParamNameToPgTypeName[paramName] = typeName; } } void OnResult(const List* raw) { AstParseResult.Pool = std::make_unique<TMemoryPool>(4096); AstParseResult.Root = ParseResult(raw); + if (!AutoParamValues.empty()) { + AstParseResult.PgAutoParamValues = std::move(AutoParamValues); + } } void OnError(const TIssue& issue) { @@ -343,6 +389,204 @@ public: } } + [[nodiscard]] + static Ydb::TypedValue MakeYdbListTupleParamValue(TVector<Ydb::Value>&& autoParamLiterals, TVector<TPgConst::Type>&& columnTypes) { + Ydb::TypedValue listOfTuples; + + auto* tupleType = listOfTuples.mutable_type()->mutable_list_type()->mutable_item()->mutable_tuple_type(); + for (auto&& colTypeStr : columnTypes) { + auto* pgColType = tupleType->add_elements()->mutable_pg_type(); + pgColType->set_oid(NPg::LookupType(TPgConst::ToString(colTypeStr)).TypeId); + } + + auto* tuples = listOfTuples.mutable_value()->mutable_items(); + size_t idx = 0; + size_t cols = columnTypes.size(); + for (size_t idx = 0; idx < autoParamLiterals.size(); idx += cols){ + auto* tuple = tuples->Add(); + auto next_row_items_from = autoParamLiterals.begin() + idx; + auto next_row_items_to = next_row_items_from + cols; + *tuple->mutable_items() = { + std::make_move_iterator(next_row_items_from), + std::make_move_iterator(next_row_items_to) + }; + } + + return listOfTuples; + } + + [[nodiscard]] + bool ExtractPgConstsForAutoParam(List* rawValuesLists, TVector<TPgConst>& pgConsts) { + YQL_LOG_CTX_SCOPE(TStringBuf("PgSql Autoparametrize"), __FUNCTION__); + Y_VERIFY(rawValuesLists); + size_t rows = ListLength(rawValuesLists); + + if (rows == 0 || !Settings.AutoParametrizeEnabled) { + return false; + } + + size_t cols = ListLength(CAST_NODE(List, ListNodeNth(rawValuesLists, 0))); + pgConsts.reserve(rows * cols); + + for (int rowIdx = 0; rowIdx < ListLength(rawValuesLists); ++rowIdx) { + const auto rawRow = CAST_NODE(List, ListNodeNth(rawValuesLists, rowIdx)); + + for (int colIdx = 0; colIdx < ListLength(rawRow); ++colIdx) { + const auto rawCell = ListNodeNth(rawRow, colIdx); + if (NodeTag(rawCell) != T_A_Const) { + YQL_CLOG(INFO, Default) << "Auto parametrization of " << NodeTag(rawCell) << " is not supported"; + return false; + } + auto pgConst = GetValueNType(CAST_NODE(A_Const, rawCell)); + if (!pgConst) { + return false; + } + pgConsts.push_back(std::move(pgConst.GetRef())); + } + } + return true; + } + + TMaybe<TVector<TPgConst::Type>> InferColumnTypesForValuesStmt(const TVector<TPgConst>& values, size_t cols) { + Y_VERIFY((values.size() % cols == 0), "wrong amount of columns for auto param values vector"); + TVector<TMaybe<TPgConst::Type>> maybeColumnTypes(cols); + + for (size_t i = 0; i < values.size(); ++i) { + const auto& value = values[i]; + size_t col = i % cols; + auto& columnType = maybeColumnTypes[col]; + + if (!columnType || columnType.GetRef() == TPgConst::Type::unknown) { + columnType = value.type; + } + + if (columnType.GetRef() != value.type && value.type != TPgConst::Type::unknown) { + YQL_CLOG(INFO, Default) + << "Failed to auto parametrize: different types: " + << TPgConst::ToString(columnType.GetRef()) << " and " << TPgConst::ToString(value.type) + << " in col " << col; + return {}; + } + } + + TVector<TPgConst::Type> columnTypes; + for (auto& maybeColumnType: maybeColumnTypes) { + if (maybeColumnType.Empty()) { + YQL_CLOG(INFO, Default) << "Failed to auto parametrize: can't infer PgType for column"; + return {}; + } + columnTypes.emplace_back(maybeColumnType.GetRef()); + } + return columnTypes; + } + + TAstNode* MakeValuesStmtAutoParam(TVector<TPgConst>&& values, TVector<TPgConst::Type>&& columnTypes) { + TVector<Ydb::Value> ydbValues; + for (auto&& pgConst : values) { + Ydb::Value literal; + + // Assuming context of VALUES(...), + // otherwise we have to PgCast (Null) to unknown! + if (pgConst.type == TPgConst::Type::unknown) { + literal.set_null_flag_value(NProtoBuf::NULL_VALUE); + } else { + literal.set_text_value(std::move(pgConst.value)); + } + ydbValues.push_back(literal); + } + + TVector<TAstNode*> autoParamTupleType; + autoParamTupleType.reserve(columnTypes.size()); + autoParamTupleType.push_back(A("TupleType")); + + const auto paramName = TString(AUTO_PARAM_PREFIX) + ToString(AutoParamValues.size()); + AutoParamValues[paramName] = MakeYdbListTupleParamValue(std::move(ydbValues), std::move(columnTypes)); + + Statements.push_back(L(A("declare"), A(paramName), L(A("ListType"), VL(autoParamTupleType)))); + + YQL_CLOG(INFO, Default) << "Successfully autoparametrized VALUES at" << Positions.back(); + + return A(paramName); + } + + [[nodiscard]] + TAstNode* ParseValuesList(List* valuesLists) { + TVector<TAstNode*> valNames; + uint64 colIdx = 0; + + TExprSettings settings; + settings.AllowColumns = false; + settings.Scope = "VALUES"; + + for (int valueIndex = 0; valueIndex < ListLength(valuesLists); ++valueIndex) { + auto node = ListNodeNth(valuesLists, valueIndex); + if (NodeTag(node) != T_List) { + NodeNotImplemented(node); + return nullptr; + } + + auto lst = CAST_NODE(List, node); + if (valueIndex == 0) { + for (int item = 0; item < ListLength(lst); ++item) { + valNames.push_back(QA("column" + ToString(colIdx++))); + } + } else { + if (ListLength(lst) != (int)valNames.size()) { + AddError("VALUES lists must all be the same length"); + return nullptr; + } + } + } + + const auto buildValuesTupleList = [this] (TVector<TVector<TAstNode*>>& values) { + TVector<TAstNode*> valueRows; + valueRows.reserve(values.size() + 1); + valueRows.push_back(A("AsList")); + + for (auto& row: values) { + valueRows.push_back(QVL(row.data(), row.size())); + } + return VL(valueRows); + }; + + TVector<TPgConst> pgConsts; + bool allValsAreLiteral = ExtractPgConstsForAutoParam(valuesLists, pgConsts); + if (allValsAreLiteral) { + auto maybeColumnTypes = InferColumnTypesForValuesStmt(pgConsts, valNames.size()); + if (maybeColumnTypes) { + auto valuesNode = MakeValuesStmtAutoParam(std::move(pgConsts), std::move(maybeColumnTypes.GetRef())); + return QL(QA("values"), QVL(valNames.data(), valNames.size()), valuesNode); + } + } + + TVector<TAstNode*> valueRows; + valueRows.reserve(ListLength(valuesLists)); + valueRows.push_back(A("AsList")); + for (int valueIndex = 0; valueIndex < ListLength(valuesLists); ++valueIndex) { + auto node = ListNodeNth(valuesLists, valueIndex); + if (NodeTag(node) != T_List) { + NodeNotImplemented(node); + return nullptr; + } + + auto lst = CAST_NODE(List, node); + TVector<TAstNode*> row; + + for (int item = 0; item < ListLength(lst); ++item) { + auto cell = ParseExpr(ListNodeNth(lst, item), settings); + if (!cell) { + return nullptr; + } + + row.push_back(cell); + } + + valueRows.push_back(QVL(row.data(), row.size())); + } + + return QL(QA("values"), QVL(valNames.data(), valNames.size()), VL(valueRows)); + } + using TTraverseSelectStack = TStack<std::pair<const SelectStmt*, bool>>; using TTraverseNodeStack = TStack<std::pair<const Node*, bool>>; @@ -708,47 +952,6 @@ public: res.push_back(CreatePgResultItem(r, x, i)); } - TVector<TAstNode*> val; - TVector<TAstNode*> valNames; - val.push_back(A("AsList")); - - for (int valueIndex = 0; valueIndex < ListLength(x->valuesLists); ++valueIndex) { - TExprSettings settings; - settings.AllowColumns = false; - settings.Scope = "VALUES"; - - auto node = ListNodeNth(x->valuesLists, valueIndex); - if (NodeTag(node) != T_List) { - NodeNotImplemented(x, node); - return nullptr; - } - - auto lst = CAST_NODE(List, node); - TVector<TAstNode*> row; - if (valueIndex == 0) { - for (int item = 0; item < ListLength(lst); ++item) { - valNames.push_back(QA("column" + ToString(i++))); - } - } else { - if (ListLength(lst) != (int)valNames.size()) { - AddError("SelectStmt: VALUES lists must all be the same length"); - return nullptr; - } - } - - for (int item = 0; item < ListLength(lst); ++item) { - auto cell = ParseExpr(ListNodeNth(lst, item), settings); - if (!cell) { - return nullptr; - } - - row.push_back(cell); - } - - val.push_back(QVL(row.data(), row.size())); - } - - TVector<TAstNode*> setItemOptions; if (emitPgStar) { setItemOptions.push_back(QL(QA("emit_pg_star"))); @@ -759,7 +962,7 @@ public: if (ListLength(x->targetList) > 0) { setItemOptions.push_back(QL(QA("result"), QVL(res.data(), res.size()))); } else { - setItemOptions.push_back(QL(QA("values"), QVL(valNames.data(), valNames.size()), VL(val.data(), val.size()))); + setItemOptions.push_back(ParseValuesList(x->valuesLists)); } if (!fromList.empty()) { @@ -2294,8 +2497,8 @@ public: TAstNode* ParseParamRefExpr(const ParamRef* value) { const auto varName = PREPARED_PARAM_PREFIX + ToString(value->number); - if (!ParamNameToTypeOid.contains(varName)) { - ParamNameToTypeOid[varName] = UNKNOWNOID; + if (!ParamNameToPgTypeName.contains(varName)) { + ParamNameToPgTypeName[varName] = DEFAULT_PARAM_TYPE; } return A(varName); } @@ -2382,30 +2585,65 @@ public: return nullptr; } } + + TMaybe<TPgConst> GetValueNType(const A_Const* value) { + TPgConst pgConst; + const auto& val = value->val; + switch (NodeTag(val)) { + case T_Integer: { + pgConst.value = ToString(IntVal(val)); + pgConst.type = TPgConst::Type::int4; + return pgConst; + } + case T_Float: { + auto s = StrFloatVal(val); + i64 v; + const bool isInt8 = TryFromString<i64>(s, v); + pgConst.value = ToString(s); + pgConst.type = isInt8 ? TPgConst::Type::int8 : TPgConst::Type::numeric; + return pgConst; + } + case T_String: { + pgConst.value = ToString(StrVal(val)); + pgConst.type = TPgConst::Type::text; + return pgConst; + } + case T_Null: { + pgConst.type = TPgConst::Type::unknown; + return pgConst; + } + default: { + ValueNotImplemented(value, val); + return {}; + } + } + } TAstNode* ParseAConst(const A_Const* value) { AT_LOCATION(value); const auto& val = value->val; - switch (NodeTag(val)) { - case T_Integer: { - return L(A("PgConst"), QA(ToString(IntVal(val))), L(A("PgType"), QA("int4"))); - } - case T_Float: { - auto s = StrFloatVal(val); - i64 v; - const bool isInt8 = TryFromString<i64>(s, v); - return L(A("PgConst"), QA(ToString(s)), L(A("PgType"), isInt8 ? QA("int8") : QA("numeric"))); - } - case T_String: { - return L(A("PgConst"), QAX(ToString(StrVal(val))), L(A("PgType"), QA("text"))); - } - case T_Null: { - return L(A("PgCast"), L(A("Null")), L(A("PgType"), QA("unknown"))); - } - default: - ValueNotImplemented(value, val); + auto valueNType = GetValueNType(value); + if (!valueNType) { return nullptr; } + switch (NodeTag(val)) { + case T_Integer: { + return L(A("PgConst"), QA(valueNType->value), L(A("PgType"), QA(TPgConst::ToString(valueNType->type)))); + } + case T_Float: { + return L(A("PgConst"), QA(valueNType->value), L(A("PgType"), QA(TPgConst::ToString(valueNType->type)))); + } + case T_String: { + return L(A("PgConst"), QAX(valueNType->value), L(A("PgType"), QA(TPgConst::ToString(valueNType->type)))); + } + case T_Null: { + return L(A("PgCast"), L(A("Null")), L(A("PgType"), QA("unknown"))); + } + default: { + ValueNotImplemented(value, val); + return nullptr; + } + } } TAstNode* ParseAArrayExpr(const A_ArrayExpr* value, const TExprSettings& settings) { @@ -3465,9 +3703,7 @@ public: } void AddVariableDeclarations() { - for (const auto &[varName, typeOid] : ParamNameToTypeOid) { - const auto &typeName = - typeOid != UNKNOWNOID ? NPg::LookupType(typeOid).Name : "text"; + for (const auto& [varName, typeName] : ParamNameToPgTypeName) { const auto pgType = L(A("PgType"), QA(typeName)); Statements.push_back(L(A("declare"), A(varName), pgType)); } @@ -3511,6 +3747,10 @@ public: return TAstNode::NewList(pos.Row ? pos : Positions.back(), nodes, size, *AstParseResult.Pool); } + TAstNode* VL(TArrayRef<TAstNode*> nodes, TPosition pos = {}) { + return TAstNode::NewList(pos.Row ? pos : Positions.back(), nodes.data(), nodes.size(), *AstParseResult.Pool); + } + TAstNode* QVL(TAstNode** nodes, ui32 size, TPosition pos = {}) { return Q(VL(nodes, size, pos), pos); } @@ -3518,8 +3758,12 @@ public: TAstNode* QVL(TAstNode* node, TPosition pos = {}) { return QVL(&node, 1, pos); } + + TAstNode* QVL(TArrayRef<TAstNode*> nodes, TPosition pos = {}) { + return Q(VL(nodes, pos), pos); + } - TAstNode* A(const TString& str, TPosition pos = {}, ui32 flags = 0) { + TAstNode* A(const TStringBuf str, TPosition pos = {}, ui32 flags = 0) { return TAstNode::NewAtom(pos.Row ? pos : Positions.back(), str, *AstParseResult.Pool, flags); } @@ -3531,7 +3775,7 @@ public: return L(A("quote", pos), node, pos); } - TAstNode* QA(const TString& str, TPosition pos = {}, ui32 flags = 0) { + TAstNode* QA(const TStringBuf str, TPosition pos = {}, ui32 flags = 0) { return Q(A(str, pos, flags), pos); } @@ -3678,7 +3922,8 @@ private: TString Provider; static const THashMap<TStringBuf, TString> ProviderToInsertModeMap; - THashMap<TString, Oid> ParamNameToTypeOid; + THashMap<TString, TString> ParamNameToPgTypeName; + THashMap<TString, Ydb::TypedValue> AutoParamValues; }; const THashMap<TStringBuf, TString> TConverter::ProviderToInsertModeMap = { diff --git a/ydb/library/yql/sql/pg/pg_sql_autoparam_ut.cpp b/ydb/library/yql/sql/pg/pg_sql_autoparam_ut.cpp new file mode 100644 index 0000000000..83e94bc289 --- /dev/null +++ b/ydb/library/yql/sql/pg/pg_sql_autoparam_ut.cpp @@ -0,0 +1,167 @@ +#include "ut/util.h" + +#include <google/protobuf/util/json_util.h> +#include <google/protobuf/util/message_differencer.h> + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NSQLTranslation; + +Y_UNIT_TEST_SUITE(PgSqlParsingAutoparam) { + Y_UNIT_TEST(AutoParamStmt_DisabledByDefault) { + auto res = PgSqlToYql("insert into plato.Output values (1,2,3), (1,2,3)"); + UNIT_ASSERT_C(res.Issues.Empty(), "Failed to parse statement, issues: " + res.Issues.ToString()); + UNIT_ASSERT_C(res.PgAutoParamValues.Empty(), "Expected no auto parametrization"); + } + + Y_UNIT_TEST(AutoParamStmt_DifferentTypes) { + TTranslationSettings settings; + settings.AutoParametrizeEnabled = true; + auto res = SqlToYqlWithMode( + R"(insert into plato.Output values (1,2,3), (1,'2',3))", + NSQLTranslation::ESqlMode::QUERY, + 10, + {}, + EDebugOutput::None, + false, + settings); + + UNIT_ASSERT_C(res.Issues.Empty(), "Failed to parse statement, issues: " + res.Issues.ToString()); + UNIT_ASSERT_C(res.PgAutoParamValues.Empty(), "Expected no auto parametrization"); + } + + void TestAutoParam(const TString& query, const THashMap<TString, TString>& expectedParamNameToJsonYdbVal) { + TTranslationSettings settings; + settings.AutoParametrizeEnabled = true; + auto res = SqlToYqlWithMode( + query, + NSQLTranslation::ESqlMode::QUERY, + 10, + {}, + EDebugOutput::None, + false, + settings); + UNIT_ASSERT_C(res.Issues.Empty(), "Failed to parse statement, issues: " + res.Issues.ToString()); + UNIT_ASSERT_C(res.PgAutoParamValues && !res.PgAutoParamValues->empty(), "Expected auto param values"); + + TSet<TString> declaredParams; + VisitAstNodes(*res.Root, [&declaredParams] (const NYql::TAstNode& node) { + const bool isDeclareNode = + node.IsList() && node.GetChildrenCount() > 0 + && node.GetChild(0)->IsAtom() + && node.GetChild(0)->GetContent() == "declare"; + if (isDeclareNode) { + UNIT_ASSERT_VALUES_EQUAL(node.GetChildrenCount(), 3); + declaredParams.insert(TString(node.GetChild(1)->GetContent())); + } + }); + + TSet<TString> usedParams; + VisitAstNodes(*res.Root, [&usedParams] (const NYql::TAstNode& node) { + const bool isPgSetItem = + node.IsListOfSize(2) && node.GetChild(0)->IsAtom() + && node.GetChild(0)->GetContent() == "PgSetItem"; + if (!isPgSetItem) { + return; + } + const auto pgSetItemOptions = node.GetChild(1)->GetChild(1); + + for (const auto* pgOption : pgSetItemOptions->GetChildren()) { + const bool isQuotedList = + pgOption->IsListOfSize(2) && pgOption->GetChild(0)->IsAtom() + && pgOption->GetChild(0)->GetContent() == "quote"; + if (!isQuotedList) { + return; + } + + const auto* option = pgOption->GetChild(1); + const auto& optionName = option->GetChild(0); + + const bool isValuesNode = + optionName->IsListOfSize(2) && optionName->GetChild(0)->IsAtom() + && optionName->GetChild(0)->GetContent() == "quote" + && optionName->GetChild(1)->GetContent() == "values"; + if (!isValuesNode) { + return; + } + const auto values = option->GetChild(2); + if (values->IsAtom()) { + usedParams.insert(TString(values->GetContent())); + } + } + }); + UNIT_ASSERT_VALUES_EQUAL(declaredParams, usedParams); + + TMap<TString, Ydb::TypedValue> expectedParams; + for (auto& [expectedParamName, jsonValue] : expectedParamNameToJsonYdbVal) { + Ydb::TypedValue expectedParam; + google::protobuf::util::JsonStringToMessage(jsonValue, &expectedParam); + expectedParams.emplace(expectedParamName, expectedParam); + UNIT_ASSERT_C(res.PgAutoParamValues->contains(expectedParamName), + "Autoparametrized values do not contain expected param: " << expectedParamName); + + auto actualParam = res.PgAutoParamValues.GetRef()[expectedParamName]; + UNIT_ASSERT_STRINGS_EQUAL(expectedParam.ShortUtf8DebugString(), actualParam.ShortUtf8DebugString()); + UNIT_ASSERT_C(declaredParams.contains(expectedParamName), + "Declared params don't contain expected param name: " << expectedParamName); + } + + UNIT_ASSERT_VALUES_EQUAL(declaredParams.size(), expectedParams.size()); + } + + Y_UNIT_TEST(AutoParamStmt_Int4) { + TString query = R"(insert into plato.Output values (1,2), (3,4), (4,5))"; + TString expectedParamJson = R"( + {"type":{"list_type":{"item":{"tuple_type":{"elements":[{"pg_type":{"oid":23}},{"pg_type":{"oid":23}}]}}}}, + "value":{"items":[{"items":[{"text_value":"1"},{"text_value":"2"}]},{"items":[{"text_value":"3"},{"text_value":"4"}]},{"items":[{"text_value":"4"},{"text_value":"5"}]}]}} + )"; + TestAutoParam(query, {{"a0", expectedParamJson}}); + } + + Y_UNIT_TEST(AutoParamStmt_Int4Text) { + TString query = R"(insert into plato.Output values (1,'2'), (3,'4'))"; + TString expectedParamJson = R"( + {"type":{"list_type":{"item":{"tuple_type":{"elements":[{"pg_type":{"oid":23}},{"pg_type":{"oid":25}}]}}}}, + "value":{"items":[{"items":[{"text_value":"1"},{"text_value":"2"}]},{"items":[{"text_value":"3"},{"text_value":"4"}]}]}} + )"; + TestAutoParam(query, {{"a0", expectedParamJson}}); + } + + Y_UNIT_TEST(AutoParamStmt_MultipleStmts) { + TString query = R"( + insert into plato.Output values (1,'2'), (3,'4'); + insert into plato.Output1 values (1.23); + )"; + TString expectedParamJson0 = R"( + {"type":{"list_type":{"item":{"tuple_type":{"elements":[{"pg_type":{"oid":23}},{"pg_type":{"oid":25}}]}}}}, + "value":{"items":[{"items":[{"text_value":"1"},{"text_value":"2"}]},{"items":[{"text_value":"3"},{"text_value":"4"}]}]}} + )"; + TString expectedParamJson1 = R"( + {"type":{"list_type":{"item":{"tuple_type":{"elements":[{"pg_type":{"oid":1700}}]}}}}, + "value":{"items":[{"items":[{"text_value":"1.23"}]}]}} + )"; + TestAutoParam(query, {{"a0", expectedParamJson0}, {"a1", expectedParamJson1}}); + } + + Y_UNIT_TEST(AutoParamStmt_WithNull) { + TString query = R"( + insert into plato.Output values (null, '2'), (3, '4') + )"; + TString expectedParamJson = R"( + {"type":{"list_type":{"item":{"tuple_type":{"elements":[{"pg_type":{"oid":23}},{"pg_type":{"oid":25}}]}}}}, + "value":{"items":[{"items":[{"null_flag_value":"NULL_VALUE"},{"text_value":"2"}]},{"items":[{"text_value":"3"},{"text_value":"4"}]}]}} + )"; + TestAutoParam(query, {{"a0", expectedParamJson}}); + } + + Y_UNIT_TEST(AutoParamStmt_NullCol) { + TString query = R"( + insert into plato.Output values (null,1), (null,1) + )"; + TString expectedParamJson = R"( + {"type":{"list_type":{"item":{"tuple_type":{"elements":[{"pg_type":{"oid":705}},{"pg_type":{"oid":23}}]}}}}, + "value":{"items":[{"items":[{"null_flag_value":"NULL_VALUE"},{"text_value":"1"}]},{"items":[{"null_flag_value":"NULL_VALUE"},{"text_value":"1"}]}]}} + )"; + TestAutoParam(query, {{"a0", expectedParamJson}}); + } +} diff --git a/ydb/library/yql/sql/pg/pg_sql_ut.cpp b/ydb/library/yql/sql/pg/pg_sql_ut.cpp index 45f49cb005..fd1029bec7 100644 --- a/ydb/library/yql/sql/pg/pg_sql_ut.cpp +++ b/ydb/library/yql/sql/pg/pg_sql_ut.cpp @@ -1,70 +1,9 @@ -#include <contrib/libs/fmt/include/fmt/format.h> - -#include <ydb/library/yql/ast/yql_expr.h> -#include <ydb/library/yql/providers/common/provider/yql_provider_names.h> -#include <ydb/library/yql/sql/sql.h> -#include <ydb/library/yql/parser/pg_catalog/catalog.h> -#include <ydb/library/yql/parser/pg_wrapper/interface/config.h> +#include "ut/util.h" #include <library/cpp/testing/unittest/registar.h> - using namespace NSQLTranslation; -enum class EDebugOutput { - None, - ToCerr, -}; - -TString Err2Str(NYql::TAstParseResult& res, EDebugOutput debug = EDebugOutput::None) { - TStringStream s; - res.Issues.PrintTo(s); - - if (debug == EDebugOutput::ToCerr) { - Cerr << s.Str() << Endl; - } - return s.Str(); -} - -NYql::TAstParseResult SqlToYqlWithMode(const TString& query, NSQLTranslation::ESqlMode mode = NSQLTranslation::ESqlMode::QUERY, size_t maxErrors = 10, const TString& provider = {}, - EDebugOutput debug = EDebugOutput::None, bool ansiLexer = false, NSQLTranslation::TTranslationSettings settings = {}) -{ - google::protobuf::Arena arena; - const auto service = provider ? provider : TString(NYql::YtProviderName); - const TString cluster = "plato"; - settings.ClusterMapping[cluster] = service; - settings.ClusterMapping["hahn"] = NYql::YtProviderName; - settings.ClusterMapping["mon"] = NYql::SolomonProviderName; - settings.ClusterMapping[""] = NYql::KikimrProviderName; - settings.MaxErrors = maxErrors; - settings.Mode = mode; - settings.Arena = &arena; - settings.AnsiLexer = ansiLexer; - settings.SyntaxVersion = 1; - settings.PgParser = true; - auto res = SqlToYql(query, settings); - if (debug == EDebugOutput::ToCerr) { - Err2Str(res, debug); - } - return res; -} - -NYql::TAstParseResult PgSqlToYql(const TString& query, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) { - return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, maxErrors, provider, debug); -} - -using TAstNodeVisitFunc = std::function<void(const NYql::TAstNode& root)>; - -void VisitAstNodes(const NYql::TAstNode& root, const TAstNodeVisitFunc& visitFunc) { - visitFunc(root); - if (!root.IsList()) { - return; - } - for (size_t childIdx = 0; childIdx < root.GetChildrenCount(); ++childIdx) { - VisitAstNodes(*root.GetChild(childIdx), visitFunc); - } -} - Y_UNIT_TEST_SUITE(PgSqlParsingOnly) { Y_UNIT_TEST(InsertStmt) { auto res = PgSqlToYql("INSERT INTO plato.Input VALUES (1, 1)"); diff --git a/ydb/library/yql/sql/pg/ut/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/sql/pg/ut/CMakeLists.darwin-x86_64.txt index 1b2f7362e8..a0996ad8f0 100644 --- a/ydb/library/yql/sql/pg/ut/CMakeLists.darwin-x86_64.txt +++ b/ydb/library/yql/sql/pg/ut/CMakeLists.darwin-x86_64.txt @@ -35,6 +35,7 @@ target_link_options(ydb-library-yql-sql-pg-ut PRIVATE ) target_sources(ydb-library-yql-sql-pg-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/pg_sql_ut.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/pg_sql_autoparam_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/optimizer_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/optimizer_impl_ut.cpp ) diff --git a/ydb/library/yql/sql/pg/ut/CMakeLists.linux-aarch64.txt b/ydb/library/yql/sql/pg/ut/CMakeLists.linux-aarch64.txt index c7bd854615..578b349641 100644 --- a/ydb/library/yql/sql/pg/ut/CMakeLists.linux-aarch64.txt +++ b/ydb/library/yql/sql/pg/ut/CMakeLists.linux-aarch64.txt @@ -38,6 +38,7 @@ target_link_options(ydb-library-yql-sql-pg-ut PRIVATE ) target_sources(ydb-library-yql-sql-pg-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/pg_sql_ut.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/pg_sql_autoparam_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/optimizer_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/optimizer_impl_ut.cpp ) diff --git a/ydb/library/yql/sql/pg/ut/CMakeLists.linux-x86_64.txt b/ydb/library/yql/sql/pg/ut/CMakeLists.linux-x86_64.txt index da833e07fb..98c6e63806 100644 --- a/ydb/library/yql/sql/pg/ut/CMakeLists.linux-x86_64.txt +++ b/ydb/library/yql/sql/pg/ut/CMakeLists.linux-x86_64.txt @@ -39,6 +39,7 @@ target_link_options(ydb-library-yql-sql-pg-ut PRIVATE ) target_sources(ydb-library-yql-sql-pg-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/pg_sql_ut.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/pg_sql_autoparam_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/optimizer_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/optimizer_impl_ut.cpp ) diff --git a/ydb/library/yql/sql/pg/ut/CMakeLists.windows-x86_64.txt b/ydb/library/yql/sql/pg/ut/CMakeLists.windows-x86_64.txt index 257706149c..1f3abeea56 100644 --- a/ydb/library/yql/sql/pg/ut/CMakeLists.windows-x86_64.txt +++ b/ydb/library/yql/sql/pg/ut/CMakeLists.windows-x86_64.txt @@ -28,6 +28,7 @@ target_link_libraries(ydb-library-yql-sql-pg-ut PUBLIC ) target_sources(ydb-library-yql-sql-pg-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/pg_sql_ut.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/pg_sql_autoparam_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/optimizer_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/pg/optimizer_impl_ut.cpp ) diff --git a/ydb/library/yql/sql/pg/ut/util.h b/ydb/library/yql/sql/pg/ut/util.h new file mode 100644 index 0000000000..ace07033a6 --- /dev/null +++ b/ydb/library/yql/sql/pg/ut/util.h @@ -0,0 +1,61 @@ +#include <contrib/libs/fmt/include/fmt/format.h> + +#include <ydb/library/yql/ast/yql_expr.h> +#include <ydb/library/yql/providers/common/provider/yql_provider_names.h> +#include <ydb/library/yql/sql/sql.h> +#include <ydb/library/yql/parser/pg_catalog/catalog.h> +#include <ydb/library/yql/parser/pg_wrapper/interface/config.h> + +enum class EDebugOutput { + None, + ToCerr, +}; + +inline TString Err2Str(NYql::TAstParseResult& res, EDebugOutput debug = EDebugOutput::None) { + TStringStream s; + res.Issues.PrintTo(s); + + if (debug == EDebugOutput::ToCerr) { + Cerr << s.Str() << Endl; + } + return s.Str(); +} + +inline NYql::TAstParseResult SqlToYqlWithMode(const TString& query, NSQLTranslation::ESqlMode mode = NSQLTranslation::ESqlMode::QUERY, size_t maxErrors = 10, const TString& provider = {}, + EDebugOutput debug = EDebugOutput::None, bool ansiLexer = false, NSQLTranslation::TTranslationSettings settings = {}) +{ + google::protobuf::Arena arena; + const auto service = provider ? provider : TString(NYql::YtProviderName); + const TString cluster = "plato"; + settings.ClusterMapping[cluster] = service; + settings.ClusterMapping["hahn"] = NYql::YtProviderName; + settings.ClusterMapping["mon"] = NYql::SolomonProviderName; + settings.ClusterMapping[""] = NYql::KikimrProviderName; + settings.MaxErrors = maxErrors; + settings.Mode = mode; + settings.Arena = &arena; + settings.AnsiLexer = ansiLexer; + settings.SyntaxVersion = 1; + settings.PgParser = true; + auto res = SqlToYql(query, settings); + if (debug == EDebugOutput::ToCerr) { + Err2Str(res, debug); + } + return res; +} + +inline NYql::TAstParseResult PgSqlToYql(const TString& query, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) { + return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, maxErrors, provider, debug); +} + +using TAstNodeVisitFunc = std::function<void(const NYql::TAstNode& root)>; + +inline void VisitAstNodes(const NYql::TAstNode& root, const TAstNodeVisitFunc& visitFunc) { + visitFunc(root); + if (!root.IsList()) { + return; + } + for (size_t childIdx = 0; childIdx < root.GetChildrenCount(); ++childIdx) { + VisitAstNodes(*root.GetChild(childIdx), visitFunc); + } +} diff --git a/ydb/library/yql/sql/pg/ut/ya.make b/ydb/library/yql/sql/pg/ut/ya.make index 57e021007a..93d16ba650 100644 --- a/ydb/library/yql/sql/pg/ut/ya.make +++ b/ydb/library/yql/sql/pg/ut/ya.make @@ -2,6 +2,7 @@ UNITTEST_FOR(ydb/library/yql/sql/pg) SRCS( pg_sql_ut.cpp + pg_sql_autoparam_ut.cpp optimizer_ut.cpp optimizer_impl_ut.cpp ) diff --git a/ydb/library/yql/sql/pg/ya.make b/ydb/library/yql/sql/pg/ya.make index 65171dca6e..7ebd6b1582 100644 --- a/ydb/library/yql/sql/pg/ya.make +++ b/ydb/library/yql/sql/pg/ya.make @@ -9,6 +9,7 @@ PEERDIR( ydb/library/yql/core ydb/library/yql/parser/pg_catalog ydb/library/yql/sql/settings + ydb/public/api/protos ) ADDINCL( diff --git a/ydb/library/yql/sql/settings/translation_settings.h b/ydb/library/yql/sql/settings/translation_settings.h index 2ddd6a388a..7029acfa29 100644 --- a/ydb/library/yql/sql/settings/translation_settings.h +++ b/ydb/library/yql/sql/settings/translation_settings.h @@ -106,6 +106,7 @@ namespace NSQLTranslation { TString FileAliasPrefix; TVector<ui32> PgParameterTypeOids; + bool AutoParametrizeEnabled = false; }; bool ParseTranslationSettings(const TString& query, NSQLTranslation::TTranslationSettings& settings, NYql::TIssues& issues); |