diff options
author | Vitaly Stoyan <vvvv@ydb.tech> | 2024-03-28 18:59:33 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-28 18:59:33 +0300 |
commit | 34e4eb7e7e5e35106a556fcb2927fedb70c7b39c (patch) | |
tree | 589164508f7db0bbb8eb39db51f46cfef0d6a9f2 | |
parent | 8114eed7e30d7d91067524d384ae2e4d7b20b334 (diff) | |
download | ydb-34e4eb7e7e5e35106a556fcb2927fedb70c7b39c.tar.gz |
Support of PG syntax in purecalc & purebench (#3230)
13 files changed, 176 insertions, 24 deletions
diff --git a/ydb/library/yql/public/purecalc/common/interface.h b/ydb/library/yql/public/purecalc/common/interface.h index 49bfd8c22a2..b489aeda945 100644 --- a/ydb/library/yql/public/purecalc/common/interface.h +++ b/ydb/library/yql/public/purecalc/common/interface.h @@ -363,7 +363,8 @@ namespace NYql { enum class ETranslationMode { SQL /* "SQL" */, SExpr /* "s-expression" */, - Mkql /* "mkql" */ + Mkql /* "mkql" */, + PG /* PostgreSQL */ }; /** diff --git a/ydb/library/yql/public/purecalc/common/no_llvm/ya.make b/ydb/library/yql/public/purecalc/common/no_llvm/ya.make index 929cbd3b774..46b4d25a9aa 100644 --- a/ydb/library/yql/public/purecalc/common/no_llvm/ya.make +++ b/ydb/library/yql/public/purecalc/common/no_llvm/ya.make @@ -4,6 +4,7 @@ INCLUDE(../ya.make.inc) PEERDIR( ydb/library/yql/providers/yt/codec/codegen/no_llvm + ydb/library/yql/providers/config ydb/library/yql/minikql/computation/no_llvm ydb/library/yql/minikql/invoke_builtins/no_llvm ydb/library/yql/minikql/comp_nodes/no_llvm diff --git a/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp b/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp index 20b7eaa1744..8376b2fa246 100644 --- a/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp +++ b/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp @@ -52,12 +52,31 @@ namespace { return nullptr; } + if (node->Child(0)->IsCallable(NNodes::TCoCons::CallableName())) { + return node; + } + if (!node->Child(0)->IsCallable(NNodes::TCoRead::CallableName())) { ctx.AddError(TIssue(ctx.GetPosition(node->Child(0)->Pos()), TStringBuilder() << "Expected Read!")); return nullptr; } return BuildInputFromRead(node->Pos(), node->ChildPtr(0), ctx); + } else if (node->IsCallable(NNodes::TCoLeft::CallableName())) { + TIssueScopeGuard issueScope(ctx.IssueManager, [&]() { + return new TIssue(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content()); + }); + + if (!EnsureMinArgsCount(*node, 1, ctx)) { + return nullptr; + } + + if (!node->Child(0)->IsCallable(NNodes::TCoRead::CallableName())) { + ctx.AddError(TIssue(ctx.GetPosition(node->Child(0)->Pos()), TStringBuilder() << "Expected Read!")); + return nullptr; + } + + return node->Child(0)->HeadPtr(); } return node; diff --git a/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp b/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp index 9ff39d19e95..f29a82ada43 100644 --- a/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp +++ b/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp @@ -81,6 +81,7 @@ namespace { TString nodeName ) : TTypeAnnotatorBase(typeAnnotationContext) + , TypeAnnotationContext_(typeAnnotationContext) , InputStructs_(inputStructs) , ProcessorMode_(processorMode) , InputNodeName_(std::move(nodeName)) @@ -102,12 +103,19 @@ namespace { YQL_ENSURE(inputIndex < InputStructs_.size()); + auto itemType = InputStructs_[inputIndex]; + TColumnOrder columnOrder; + for (const auto& i : itemType->GetItems()) { + columnOrder.push_back(TString(i->GetName())); + } + if (ProcessorMode_ != EProcessorMode::PullList) { - input->SetTypeAnn(ctx.MakeType<TStreamExprType>(InputStructs_[inputIndex])); + input->SetTypeAnn(ctx.MakeType<TStreamExprType>(itemType)); } else { - input->SetTypeAnn(ctx.MakeType<TListExprType>(InputStructs_[inputIndex])); + input->SetTypeAnn(ctx.MakeType<TListExprType>(itemType)); } + TypeAnnotationContext_->SetColumnOrder(*input, columnOrder, ctx); return TStatus::Ok; } diff --git a/ydb/library/yql/public/purecalc/common/worker_factory.cpp b/ydb/library/yql/public/purecalc/common/worker_factory.cpp index 6a5ddbe37cf..965331b3384 100644 --- a/ydb/library/yql/public/purecalc/common/worker_factory.cpp +++ b/ydb/library/yql/public/purecalc/common/worker_factory.cpp @@ -13,6 +13,8 @@ #include <ydb/library/yql/providers/common/udf_resolve/yql_simple_udf_resolver.h> #include <ydb/library/yql/providers/common/schema/expr/yql_expr_schema.h> #include <ydb/library/yql/providers/common/provider/yql_provider.h> +#include <ydb/library/yql/providers/common/provider/yql_provider_names.h> +#include <ydb/library/yql/providers/config/yql_config_provider.h> #include <ydb/library/yql/minikql/mkql_node.h> #include <ydb/library/yql/minikql/mkql_node_serialization.h> #include <ydb/library/yql/minikql/mkql_alloc.h> @@ -92,7 +94,7 @@ TWorkerFactory<TBase>::TWorkerFactory(TWorkerFactoryOptions options, EProcessorM if (options.TranslationMode_ == ETranslationMode::Mkql) { SerializedProgram_ = TString{options.Query}; } else { - ExprRoot_ = Compile(options.Query, ETranslationMode::SQL == options.TranslationMode_, + ExprRoot_ = Compile(options.Query, options.TranslationMode_, options.ModuleResolver, options.SyntaxVersion_, options.Modules, options.OutputSpec, processorMode); // Deduce output type if it wasn't provided by output spec @@ -109,13 +111,17 @@ TWorkerFactory<TBase>::TWorkerFactory(TWorkerFactoryOptions options, EProcessorM template <typename TBase> TExprNode::TPtr TWorkerFactory<TBase>::Compile( TStringBuf query, - bool sql, + ETranslationMode mode, IModuleResolver::TPtr moduleResolver, ui16 syntaxVersion, const THashMap<TString, TString>& modules, const TOutputSpecBase& outputSpec, EProcessorMode processorMode ) { + if (mode == ETranslationMode::PG && processorMode != EProcessorMode::PullList) { + ythrow TCompileError("", "") << "only PullList mode is compatible to PostgreSQL syntax"; + } + // Prepare type annotation context TTypeAnnotationContextPtr typeContext; @@ -128,6 +134,8 @@ TExprNode::TPtr TWorkerFactory<TBase>::Compile( typeContext->UdfResolver = NCommon::CreateSimpleUdfResolver(FuncRegistry_.Get()); typeContext->UserDataStorage = MakeIntrusive<TUserDataStorage>(nullptr, UserData_, nullptr, nullptr); typeContext->Modules = moduleResolver; + auto configProvider = CreateConfigProvider(*typeContext, nullptr, ""); + typeContext->AddDataSource(ConfigProviderName, configProvider); typeContext->Initialize(ExprContext_); if (auto modules = dynamic_cast<TModuleResolver*>(moduleResolver.get())) { @@ -138,10 +146,14 @@ TExprNode::TPtr TWorkerFactory<TBase>::Compile( TAstParseResult astRes; - if (sql) { + if (mode == ETranslationMode::SQL || mode == ETranslationMode::PG) { NSQLTranslation::TTranslationSettings settings; typeContext->DeprecatedSQL = (syntaxVersion == 0); + if (mode == ETranslationMode::PG) { + settings.PgParser = true; + } + settings.SyntaxVersion = syntaxVersion; settings.V0Behavior = NSQLTranslation::EV0Behavior::Disable; settings.Mode = NSQLTranslation::ESqlMode::LIMITED_VIEW; @@ -164,7 +176,7 @@ TExprNode::TPtr TWorkerFactory<TBase>::Compile( } if (!astRes.IsOk()) { - ythrow TCompileError(TString(query), astRes.Issues.ToString()) << "failed to parse " << (sql ? ETranslationMode::SQL : ETranslationMode::SExpr); + ythrow TCompileError(TString(query), astRes.Issues.ToString()) << "failed to parse " << mode; } ExprContext_.IssueManager.AddIssues(astRes.Issues); @@ -250,6 +262,17 @@ TExprNode::TPtr TWorkerFactory<TBase>::Compile( }, ctx, TOptimizeExprSettings(nullptr)); }), "Unordered", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR, "Unordered optimizations"); + pipeline.Add(CreateFunctorTransformer( + [&](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { + return OptimizeExpr(input, output, [](const TExprNode::TPtr& node, TExprContext&) -> TExprNode::TPtr { + if (node->IsCallable("Right!") && node->Head().IsCallable("Cons!")) { + return node->Head().ChildPtr(1); + } + + return node; + }, ctx, TOptimizeExprSettings(nullptr)); + }), "Cons", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR, + "Cons optimizations"); pipeline.Add(MakeOutputColumnsFilter(outputSpec.GetOutputColumnsFilter()), "Filter", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR, "Filter output columns"); diff --git a/ydb/library/yql/public/purecalc/common/worker_factory.h b/ydb/library/yql/public/purecalc/common/worker_factory.h index 901e20fe88a..bf4f3599a0b 100644 --- a/ydb/library/yql/public/purecalc/common/worker_factory.h +++ b/ydb/library/yql/public/purecalc/common/worker_factory.h @@ -113,7 +113,7 @@ namespace NYql { private: TExprNode::TPtr Compile(TStringBuf query, - bool sql, + ETranslationMode mode, IModuleResolver::TPtr moduleResolver, ui16 syntaxVersion, const THashMap<TString, TString>& modules, diff --git a/ydb/library/yql/public/purecalc/common/ya.make b/ydb/library/yql/public/purecalc/common/ya.make index 855f3daae43..a526f25235b 100644 --- a/ydb/library/yql/public/purecalc/common/ya.make +++ b/ydb/library/yql/public/purecalc/common/ya.make @@ -4,6 +4,7 @@ INCLUDE(ya.make.inc) PEERDIR( ydb/library/yql/providers/yt/codec/codegen + ydb/library/yql/providers/config ydb/library/yql/minikql/computation/llvm14 ydb/library/yql/minikql/invoke_builtins/llvm14 ydb/library/yql/minikql/comp_nodes/llvm14 diff --git a/ydb/library/yql/public/purecalc/ut/fake_spec.cpp b/ydb/library/yql/public/purecalc/ut/fake_spec.cpp index 4e45e76bc17..b56f7cfdfd5 100644 --- a/ydb/library/yql/public/purecalc/ut/fake_spec.cpp +++ b/ydb/library/yql/public/purecalc/ut/fake_spec.cpp @@ -2,10 +2,10 @@ namespace NYql { namespace NPureCalc { - NYT::TNode MakeFakeSchema() { + NYT::TNode MakeFakeSchema(bool pg) { auto itemType = NYT::TNode::CreateList(); - itemType.Add("DataType"); - itemType.Add("Int32"); + itemType.Add(pg ? "PgType" : "DataType"); + itemType.Add(pg ? "int4" : "Int32"); auto itemNode = NYT::TNode::CreateList(); itemNode.Add("Name"); @@ -21,15 +21,15 @@ namespace NYql { return schema; } - TFakeInputSpec FakeIS(ui32 inputsNumber) { + TFakeInputSpec FakeIS(ui32 inputsNumber, bool pg) { auto spec = TFakeInputSpec(); - spec.Schemas = TVector<NYT::TNode>(inputsNumber, MakeFakeSchema()); + spec.Schemas = TVector<NYT::TNode>(inputsNumber, MakeFakeSchema(pg)); return spec; } - TFakeOutputSpec FakeOS() { + TFakeOutputSpec FakeOS(bool pg) { auto spec = TFakeOutputSpec(); - spec.Schema = MakeFakeSchema(); + spec.Schema = MakeFakeSchema(pg); return spec; } } diff --git a/ydb/library/yql/public/purecalc/ut/fake_spec.h b/ydb/library/yql/public/purecalc/ut/fake_spec.h index 0b0e9e02ec1..87b4907e5de 100644 --- a/ydb/library/yql/public/purecalc/ut/fake_spec.h +++ b/ydb/library/yql/public/purecalc/ut/fake_spec.h @@ -47,8 +47,8 @@ namespace NYql { using TPullListReturnType = void; }; - NYT::TNode MakeFakeSchema(); - TFakeInputSpec FakeIS(ui32 inputsNumber = 1); - TFakeOutputSpec FakeOS(); + NYT::TNode MakeFakeSchema(bool pg = false); + TFakeInputSpec FakeIS(ui32 inputsNumber = 1, bool pg = false); + TFakeOutputSpec FakeOS(bool pg = false); } } diff --git a/ydb/library/yql/public/purecalc/ut/test_pg.cpp b/ydb/library/yql/public/purecalc/ut/test_pg.cpp new file mode 100644 index 00000000000..d9b21dece19 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/test_pg.cpp @@ -0,0 +1,71 @@ +#include <ydb/library/yql/public/purecalc/purecalc.h> + +#include "fake_spec.h" + +#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h> + +#include <library/cpp/testing/unittest/registar.h> + +Y_UNIT_TEST_SUITE(TestPg) { + using namespace NYql::NPureCalc; + + Y_UNIT_TEST(TestPgCompile) { + auto factory = MakeProgramFactory(); + + auto sql = TString(R"( + SELECT * FROM "Input"; + )"); + + UNIT_ASSERT_NO_EXCEPTION([&](){ + factory->MakePullListProgram(FakeIS(1,true), FakeOS(true), sql, ETranslationMode::PG); + }()); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + factory->MakePullStreamProgram(FakeIS(1,true), FakeOS(true), sql, ETranslationMode::PG); + }(), TCompileError, "PullList mode"); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + factory->MakePushStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); + }(), TCompileError, "PullList mode"); + } + + Y_UNIT_TEST(TestSqlWrongTableName) { + auto factory = MakeProgramFactory(); + + auto sql = TString(R"( + SELECT * FROM WrongTable; + )"); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + factory->MakePullListProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); + }(), TCompileError, "Failed to optimize"); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + factory->MakePullStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); + }(), TCompileError, "PullList mode"); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + factory->MakePushStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); + }(), TCompileError, "PullList mode"); + } + + Y_UNIT_TEST(TestInvalidSql) { + auto factory = MakeProgramFactory(); + + auto sql = TString(R"( + Just some invalid SQL; + )"); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + factory->MakePullListProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); + }(), TCompileError, "failed to parse PG"); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + factory->MakePullStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); + }(), TCompileError, "PullList mode"); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + factory->MakePushStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); + }(), TCompileError, "PullList mode"); + } +} diff --git a/ydb/library/yql/public/purecalc/ut/ya.make b/ydb/library/yql/public/purecalc/ut/ya.make index 5b613a4669b..87f90a25ccf 100644 --- a/ydb/library/yql/public/purecalc/ut/ya.make +++ b/ydb/library/yql/public/purecalc/ut/ya.make @@ -7,6 +7,7 @@ SRCS( test_schema.cpp test_sexpr.cpp test_sql.cpp + test_pg.cpp test_udf.cpp test_user_data.cpp test_eval.cpp diff --git a/ydb/library/yql/sql/pg/pg_sql.cpp b/ydb/library/yql/sql/pg/pg_sql.cpp index a4e1a637be8..1cdeb0506f6 100644 --- a/ydb/library/yql/sql/pg/pg_sql.cpp +++ b/ydb/library/yql/sql/pg/pg_sql.cpp @@ -294,6 +294,8 @@ public: , StmtParseInfo(stmtParseInfo) , PerStatementResult(perStatementResult) { + Y_ENSURE(settings.Mode == NSQLTranslation::ESqlMode::QUERY || settings.Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW); + Y_ENSURE(settings.Mode != NSQLTranslation::ESqlMode::LIMITED_VIEW || !perStatementResult); State.ApplicationName = Settings.ApplicationName; AstParseResults.push_back({}); if (StmtParseInfo) { @@ -394,14 +396,16 @@ public: return nullptr; } - if (Settings.EndOfQueryCommit) { + if (Settings.EndOfQueryCommit && Settings.Mode != NSQLTranslation::ESqlMode::LIMITED_VIEW) { State.Statements.push_back(L(A("let"), A("world"), L(A("CommitAll!"), A("world")))); } AddVariableDeclarations(); - State.Statements.push_back(L(A("return"), A("world"))); + if (Settings.Mode != NSQLTranslation::ESqlMode::LIMITED_VIEW) { + State.Statements.push_back(L(A("return"), A("world"))); + } if (DqEngineEnabled) { State.Statements[dqEnginePgmPos] = L(A("let"), A("world"), L(A(TString(NYql::ConfigureName)), A("world"), configSource, @@ -431,6 +435,12 @@ public: bool ParseRawStmt(const RawStmt* value) { AT_LOCATION_EX(value, stmt_location); auto node = value->stmt; + if (Settings.Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW) { + if (NodeTag(node) != T_SelectStmt && NodeTag(node) != T_VariableSetStmt) { + AddError("Unsupported statement in LIMITED_VIEW mode"); + return false; + } + } switch (NodeTag(node)) { case T_SelectStmt: return ParseSelectStmt(CAST_NODE(SelectStmt, node), false) != nullptr; @@ -747,6 +757,15 @@ public: bool fillTargetColumns = false, bool unknownsAllowed = false ) { + if (Settings.Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW) { + if (HasSelectInLimitedView) { + AddError("Expected exactly one SELECT in LIMITED_VIEW mode"); + return nullptr; + } + + HasSelectInLimitedView = true; + } + bool isValuesClauseOfInsertStmt = fillTargetColumns; State.CTE.emplace_back(); @@ -1285,6 +1304,11 @@ public: return output; } + if (Settings.Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW) { + State.Statements.push_back(L(A("return"), L(A("Right!"), L(A("Cons!"), A("world"), output)))); + return State.Statements.back(); + } + auto resOptions = QL(QL(QA("type")), QL(QA("autoref"))); State.Statements.push_back(L(A("let"), A("output"), output)); State.Statements.push_back(L(A("let"), A("result_sink"), L(A("DataSink"), QA(TString(NYql::ResultProviderName))))); @@ -4689,6 +4713,7 @@ private: ui32 StatementId = 0; TVector<TStmtParseInfo>* StmtParseInfo; bool PerStatementResult; + bool HasSelectInLimitedView = false; }; const THashMap<TStringBuf, TString> TConverter::ProviderToInsertModeMap = { diff --git a/ydb/library/yql/tools/purebench/purebench.cpp b/ydb/library/yql/tools/purebench/purebench.cpp index e7979a48e1f..b601ffdc2be 100644 --- a/ydb/library/yql/tools/purebench/purebench.cpp +++ b/ydb/library/yql/tools/purebench/purebench.cpp @@ -39,6 +39,8 @@ int Main(int argc, const char *argv[]) opts.AddLongOption('t', "test-sql", "SQL query to test").StoreResult(&testSql).DefaultValue("select count(*) as count from Input"); opts.AddLongOption('r', "repeats", "number of iterations").StoreResult(&repeats).DefaultValue(10); opts.AddLongOption('w', "show-results", "show results of test SQL").StoreResult(&showResults).DefaultValue(true); + opts.AddLongOption("pg", "use PG syntax for generate query").NoArgument(); + opts.AddLongOption("pt", "use PG syntax for test query").NoArgument(); opts.AddLongOption("udfs-dir", "directory with UDFs").StoreResult(&udfsDir).DefaultValue(""); opts.AddLongOption("llvm-settings", "LLVM settings").StoreResult(&LLVMSettings).DefaultValue(""); opts.SetFreeArgsMax(0); @@ -52,7 +54,7 @@ int Main(int argc, const char *argv[]) NYT::TNode members{NYT::TNode::CreateList()}; auto typeNode = NYT::TNode::CreateList() .Add("DataType") - .Add("Uint64"); + .Add("Int64"); members.Add(NYT::TNode::CreateList() .Add("index") @@ -67,13 +69,13 @@ int Main(int argc, const char *argv[]) inputSpec1, outputSpec1, genSql, - ETranslationMode::SQL); + res.Has("pg") ? ETranslationMode::PG : ETranslationMode::SQL); TStringStream stream; NSkiff::TUncheckedSkiffWriter writer{&stream}; for (ui64 i = 0; i < count; ++i) { writer.WriteVariant16Tag(0); - writer.WriteUint64(i); + writer.WriteInt64(i); } writer.Finish(); auto input1 = TStringStream(stream); @@ -90,7 +92,7 @@ int Main(int argc, const char *argv[]) inputSpec2, outputSpec2, testSql, - ETranslationMode::SQL); + res.Has("pt") ? ETranslationMode::PG : ETranslationMode::SQL); auto input2 = TStringStream(output1); auto handle2 = testProgram->Apply(&input2); TStringStream output2; |