summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1/select_yql.cpp
diff options
context:
space:
mode:
authorvitya-smirnov <[email protected]>2025-10-14 09:30:10 +0300
committervitya-smirnov <[email protected]>2025-10-14 09:48:40 +0300
commite06bac75417cb26b5603d104e58a15aeda178d5e (patch)
tree5a3cc5430cd0bc4fff262e5d37f41291914d6e70 /yql/essentials/sql/v1/select_yql.cpp
parent6afc58f5f8644b9e12b407cfa1341dacc1aaf5e2 (diff)
YQL-20436: Translate `SELECT .. FROM VALUES ..` to `YqlSelect`
Current `SQLv1` translation produces relatively low level `YQLs` constructions that are hard to match during later optimizations, for example, subqueries unnesting. Also it assumes that expressions are only depend on corresponding source row, which is not true for correlated subqueries. Both limitations blocks currelated subqueries implementation. Although the problem exists for `SQLv1`, it is already solved for `PG` syntax. There PostgreSQL-produced AST is converted to special `YQLs` "bulk select" node called `PgSelect`. It is more declarative and expanded later than translation. This fact helped to support correlated subqueries, which with `PgSelect` are type-checkable and decorrelatable (not generally, but heuristically). This patch is the first step forward to "bulk select" translation for `SQLv1`. As there are a lot of code already written for `PgSelect` and `PG` and `SQLv1` relatively similar, I decided to rebrand `PgSelect` into more general `SqlSelect` node. It seems that for some near future goals `PgSelect` should be enough. There was 2 problems solved: 1. `PgSelect` comes with `OrderedColumns` by default and its implementation tightly coupled with it. 2. `PgSelect` does Pg type casts. This patch contains following changes: - Added pragma `YqlSelect = 'disable' | 'auto' | 'force'` - Added `YqlSelect` (`PgSelect` alias) translation - Changed `PgSelect` wrappers to support `YqlSelect` - Changed `PgSelect` expanders to support `YqlSelect` commit_hash:8a55d63e06c22592b2029dd260bbd259194e92dc
Diffstat (limited to 'yql/essentials/sql/v1/select_yql.cpp')
-rw-r--r--yql/essentials/sql/v1/select_yql.cpp254
1 files changed, 254 insertions, 0 deletions
diff --git a/yql/essentials/sql/v1/select_yql.cpp b/yql/essentials/sql/v1/select_yql.cpp
new file mode 100644
index 00000000000..3a7d091b7d3
--- /dev/null
+++ b/yql/essentials/sql/v1/select_yql.cpp
@@ -0,0 +1,254 @@
+#include "select_yql.h"
+
+#include "context.h"
+
+namespace NSQLTranslationV1 {
+
+bool Init(TContext& ctx, ISource* src, const TVector<TNodePtr>& nodes) {
+ for (const TNodePtr& node : nodes) {
+ if (!node->Init(ctx, src)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+class TYqlValuesNode final: public INode, private TYqlValuesArgs {
+public:
+ TYqlValuesNode(TPosition position, TYqlValuesArgs&& args)
+ : INode(std::move(position))
+ , TYqlValuesArgs(std::move(args))
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ for (const auto& row : Rows) {
+ if (!::NSQLTranslationV1::Init(ctx, src, row)) {
+ return false;
+ }
+ }
+
+ if (TMaybe<size_t> width = Width(Rows, ctx)) {
+ Width_ = *width;
+ } else {
+ return false;
+ }
+
+ Values_ = BuildValueList(Rows);
+ if (!Values_) {
+ return false;
+ }
+
+ return true;
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ TNodePtr node =
+ Y("YqlSelect",
+ Q(Y(Q(Y(Q("set_items"),
+ Q(Y(Y("YqlSetItem",
+ Q(Y(Q(Y(Q("values"),
+ Q(BuildColumnList()),
+ Values_))))))))),
+ Q(Y(Q("set_ops"), Q(Y(Q("push"))))))));
+ return node->Translate(ctx);
+ }
+
+ TNodePtr DoClone() const override {
+ return new TYqlValuesNode(*this);
+ }
+
+ bool SetColumns(TVector<TString> columns, TContext& ctx) {
+ if (columns.empty()) {
+ return true;
+ }
+
+ if (columns.size() != Width_) {
+ ctx.Error() << "VALUES statement width is " << Width_
+ << ", but got " << columns.size()
+ << " column aliases";
+ return false;
+ }
+
+ Columns_ = std::move(columns);
+ return true;
+ }
+
+private:
+ TNodePtr BuildColumnList() const {
+ TNodePtr columns = Y();
+ for (size_t i = 0; i < Width_; ++i) {
+ TString name;
+ if (!Columns_) {
+ name = TStringBuilder() << "column" << i;
+ } else {
+ name = Columns_->at(i);
+ }
+
+ columns->Add(Q(std::move(name)));
+ }
+ return columns;
+ }
+
+ TNodePtr BuildValueList(const TVector<TVector<TNodePtr>>& rows) const {
+ TNodePtr values = Y("YqlValuesList");
+ for (const auto& row : rows) {
+ TNodePtr value = Y();
+ for (const TNodePtr& column : row) {
+ value->Add(column);
+ }
+
+ values->Add(Q(std::move(value)));
+ }
+ return values;
+ }
+
+ TMaybe<size_t> Width(const TVector<TVector<TNodePtr>>& rows, TContext& ctx) const {
+ size_t width = std::numeric_limits<size_t>::max();
+ for (const auto& row : rows) {
+ if (width == std::numeric_limits<size_t>::max()) {
+ width = row.size();
+ } else if (width != row.size()) {
+ ctx.Error() << "VALUES lists must all be the same length. "
+ << "Expected width is " << width << ", "
+ << "but got " << row.size();
+ return Nothing();
+ }
+ }
+ return width;
+ }
+
+ TNodePtr Values_;
+ size_t Width_ = 0;
+ TMaybe<TVector<TString>> Columns_;
+};
+
+class TYqlSelectNode final: public INode, private TYqlSelectArgs {
+public:
+ TYqlSelectNode(TPosition position, TYqlSelectArgs&& args)
+ : INode(std::move(position))
+ , TYqlSelectArgs(std::move(args))
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!InitTerms(ctx, src) || (Source && !Source->Node->Init(ctx, src))) {
+ return false;
+ }
+
+ TNodePtr item = Y();
+ {
+ TNodePtr items = BuildYqlResultItems(Terms);
+ if (!items) {
+ return false;
+ }
+
+ item->Add(Q(Y(Q("result"), Q(std::move(items)))));
+ }
+
+ if (Source) {
+ TNodePtr& node = Source->Node;
+
+ TString sourceName;
+ if (auto& alias = Source->Alias) {
+ sourceName = std::move(alias->Name);
+
+ if (auto& columns = alias->Columns) {
+ if (auto* values = dynamic_cast<TYqlValuesNode*>(node.Get())) {
+ if (!values->SetColumns(std::move(columns), ctx)) {
+ return false;
+ }
+ } else {
+ ctx.Error() << "Qualified by column names source alias "
+ << "is viable only for VALUES statement";
+ return false;
+ }
+ }
+ }
+
+ item->Add(Q(Y(Q("from"),
+ Q(Y(Q(Y(
+ std::move(node),
+ Q(std::move(sourceName)),
+ Q(Y(/* Columns are passed through SetColumns */)))))))));
+
+ item->Add(Q(Y(Q("join_ops"), Q(Y(Q(Y(Q(Y(Q("push"))))))))));
+ }
+
+ TNodePtr output =
+ Y("YqlSelect",
+ Q(Y(Q(Y(Q("set_items"),
+ Q(Y(Y("YqlSetItem", Q(std::move(item))))))),
+ Q(Y(Q("set_ops"), Q(Y(Q("push"))))))));
+
+ TNodePtr block = Y();
+ {
+ block->Add(Y("let", "output", std::move(output)));
+
+ block->Add(Y("let", "result_sink",
+ Y("DataSink", Q("result"))));
+
+ block->Add(Y("let", "world",
+ Y("Write!", "world", "result_sink", Y("Key"), "output",
+ Q(Y(Q(Y(Q("type"))), Q(Y(Q("autoref"))), Q(Y(Q("unordered"))))))));
+
+ block->Add(Y("return", Y("Commit!", "world", "result_sink")));
+ }
+
+ Node_ = Y("block", Q(std::move(block)));
+ return true;
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ return Node_->Translate(ctx);
+ }
+
+ TNodePtr DoClone() const override {
+ return new TYqlSelectNode(*this);
+ }
+
+private:
+ bool InitTerms(TContext& ctx, ISource* src) {
+ for (size_t i = 0; i < Terms.size(); ++i) {
+ const TNodePtr& term = Terms[i];
+ term->SetLabel(TermAlias(term, i));
+ }
+
+ return ::NSQLTranslationV1::Init(ctx, src, Terms);
+ }
+
+ TString TermAlias(const TNodePtr& term, size_t i) const {
+ const TString& label = term->GetLabel();
+ if (!label.empty()) {
+ return label;
+ }
+ if (const TString* column = term->GetColumnName()) {
+ return *column;
+ }
+ return TStringBuilder() << "column" << i;
+ }
+
+ TNodePtr BuildYqlResultItems(const TVector<TNodePtr>& terms) const {
+ TNodePtr items = Y();
+ for (const TNodePtr& term : terms) {
+ items->Add(BuildYqlResultItem(term->GetLabel(), term));
+ }
+ return items;
+ }
+
+ TNodePtr BuildYqlResultItem(const TString& name, const TNodePtr& term) const {
+ return Y("YqlResultItem", Q(name), Y("Void"), Y("lambda", Q(Y()), term));
+ }
+
+ TNodePtr Node_;
+};
+
+TNodePtr BuildYqlValues(TPosition position, TYqlValuesArgs&& args) {
+ return new TYqlValuesNode(std::move(position), std::move(args));
+}
+
+TNodePtr BuildYqlSelect(TPosition position, TYqlSelectArgs&& args) {
+ return new TYqlSelectNode(std::move(position), std::move(args));
+}
+
+} // namespace NSQLTranslationV1