diff options
author | vvvv <[email protected]> | 2025-07-28 11:46:56 +0300 |
---|---|---|
committer | vvvv <[email protected]> | 2025-07-28 12:46:38 +0300 |
commit | 82e921685d24483f8a5b09c95f36339fe90b78dd (patch) | |
tree | 6b1e736504d06e9ccf1bf7c07fe215fd3f313b41 | |
parent | 59ca28d036252bccdbe709379bfc08851a130b24 (diff) |
YQL-20216 position aware csse nodes
commit_hash:78445e1b3b0bb001e0d08b36fd4d31bcd9e37eb4
-rw-r--r-- | yql/essentials/ast/yql_expr.h | 10 | ||||
-rw-r--r-- | yql/essentials/ast/yql_pos_handle.h | 2 | ||||
-rw-r--r-- | yql/essentials/core/type_ann/type_ann_core.cpp | 24 | ||||
-rw-r--r-- | yql/essentials/core/yql_expr_csee.cpp | 11 | ||||
-rw-r--r-- | yql/essentials/core/yql_type_annotation.h | 1 | ||||
-rw-r--r-- | yql/essentials/data/language/pragmas_opensource.json | 6 | ||||
-rw-r--r-- | yql/essentials/providers/config/yql_config_provider.cpp | 8 | ||||
-rw-r--r-- | yql/essentials/sql/v1/context.cpp | 3 | ||||
-rw-r--r-- | yql/essentials/sql/v1/context.h | 1 | ||||
-rw-r--r-- | yql/essentials/sql/v1/query.cpp | 5 | ||||
-rw-r--r-- | yql/essentials/sql/v1/sql_query.cpp | 1 | ||||
-rw-r--r-- | yql/essentials/tests/sql/sql2yql/canondata/result.json | 12 | ||||
-rw-r--r-- | yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_csee-pos_aware_/formatted.sql | 10 | ||||
-rw-r--r-- | yql/essentials/tests/sql/suites/csee/pos_aware.cfg | 3 | ||||
-rw-r--r-- | yql/essentials/tests/sql/suites/csee/pos_aware.sql | 8 |
15 files changed, 100 insertions, 5 deletions
diff --git a/yql/essentials/ast/yql_expr.h b/yql/essentials/ast/yql_expr.h index a75667766b8..8da6bbf3a1f 100644 --- a/yql/essentials/ast/yql_expr.h +++ b/yql/essentials/ast/yql_expr.h @@ -2208,6 +2208,14 @@ public: return bool(UnordChildren_); } + void SetPosAware() { + PosAware_ = 1; + } + + bool IsPosAware() const { + return PosAware_; + } + ~TExprNode() { Y_ABORT_UNLESS(Dead(), "Node (id: %lu, type: %s, content: '%s') not dead on destruction.", UniqueId_, ToString(Type_).data(), TString(ContentUnchecked()).data()); @@ -2242,6 +2250,7 @@ private: , UnordChildren_(0) , ShallBeDisclosed_(0) , LiteralList_(0) + , PosAware_(0) {} TExprNode(const TExprNode&) = delete; @@ -2311,6 +2320,7 @@ private: ui8 UnordChildren_ : 1; // NOLINT(readability-identifier-naming) ui8 ShallBeDisclosed_ : 1; // NOLINT(readability-identifier-naming) ui8 LiteralList_ : 1; // NOLINT(readability-identifier-naming) + ui8 PosAware_ : 1; // NOLINT(readability-identifier-naming) }; }; diff --git a/yql/essentials/ast/yql_pos_handle.h b/yql/essentials/ast/yql_pos_handle.h index eecf9173868..ab79445e3f6 100644 --- a/yql/essentials/ast/yql_pos_handle.h +++ b/yql/essentials/ast/yql_pos_handle.h @@ -6,6 +6,8 @@ namespace NYql { struct TPositionHandle { friend struct TExprContext; + bool operator==(TPositionHandle const& other) const = default; + bool operator!=(TPositionHandle const& other) const = default; private: ui32 Handle_ = 0; // 0 is guaranteed to represent default-constructed TPosition }; diff --git a/yql/essentials/core/type_ann/type_ann_core.cpp b/yql/essentials/core/type_ann/type_ann_core.cpp index b9ea8992130..f1f0a5f24f6 100644 --- a/yql/essentials/core/type_ann/type_ann_core.cpp +++ b/yql/essentials/core/type_ann/type_ann_core.cpp @@ -4999,7 +4999,7 @@ namespace NTypeAnnImpl { return IGraphTransformer::TStatus::Ok; } - IGraphTransformer::TStatus UnwrapWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + IGraphTransformer::TStatus UnwrapWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx) { if (!EnsureMinMaxArgsCount(*input, 1, 2, ctx.Expr)) { return IGraphTransformer::TStatus::Error; } @@ -5037,6 +5037,9 @@ namespace NTypeAnnImpl { } input->SetTypeAnn(input->Head().GetTypeAnn()->Cast<TOptionalExprType>()->GetItemType()); + if (ctx.Types.DebugPositions) { + input->SetPosAware(); + } return IGraphTransformer::TStatus::Ok; } @@ -7047,7 +7050,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> return IGraphTransformer::TStatus::Repeat; } - IGraphTransformer::TStatus EnsureWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + IGraphTransformer::TStatus EnsureWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx) { Y_UNUSED(output); if (!EnsureMinMaxArgsCount(*input, 2, 3, ctx.Expr)) { return IGraphTransformer::TStatus::Error; @@ -7073,6 +7076,9 @@ template <NKikimr::NUdf::EDataSlot DataSlot> } input->SetTypeAnn(input->Head().GetTypeAnn()); + if (ctx.Types.DebugPositions) { + input->SetPosAware(); + } return IGraphTransformer::TStatus::Ok; } @@ -7657,6 +7663,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> } // (7) settings + bool isStrict = false; TExprNode::TPtr settings; if (input->ChildrenSize() > 7) { settings = input->ChildPtr(7); @@ -7678,6 +7685,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> if (!EnsureTupleSize(*child, 1, ctx.Expr)) { return IGraphTransformer::TStatus::Error; } + isStrict = true; } else if (settingName == "blocks") { if (!EnsureTupleSize(*child, 1, ctx.Expr)) { return IGraphTransformer::TStatus::Error; @@ -7909,6 +7917,10 @@ template <NKikimr::NUdf::EDataSlot DataSlot> } input->SetTypeAnn(cachedType); + if (ctx.Types.DebugPositions && !isStrict) { + input->SetPosAware(); + } + return IGraphTransformer::TStatus::Ok; } @@ -8066,6 +8078,10 @@ template <NKikimr::NUdf::EDataSlot DataSlot> } input->SetTypeAnn(callableType); + if (ctx.Types.DebugPositions) { + input->SetPosAware(); + } + return IGraphTransformer::TStatus::Ok; } @@ -12771,7 +12787,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> Functions["CoalesceMembers"] = &CoalesceMembersWrapper; Functions["Nvl"] = &NvlWrapper; Functions["Nanvl"] = &NanvlWrapper; - Functions["Unwrap"] = &UnwrapWrapper; + ExtFunctions["Unwrap"] = &UnwrapWrapper; Functions["Exists"] = &ExistsWrapper; Functions["BlockExists"] = &BlockExistsWrapper; Functions["BlockValidUnwrap"] = &BlockValidUnwrapWrapper; @@ -12862,7 +12878,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> Functions["PersistableRepr"] = &PersistableReprWrapper; Functions["EnsureConvertibleTo"] = &TypeAssertWrapper<false>; Functions["EnsureTupleSize"] = &TupleSizeAssertWrapper; - Functions["Ensure"] = &EnsureWrapper; + ExtFunctions["Ensure"] = &EnsureWrapper; Functions["RaiseError"] = &RaiseErrorWrapper; Functions["EnsureTypeKind"] = &EnsureTypeKindWrapper; Functions["TryMember"] = &TryMemberWrapper; diff --git a/yql/essentials/core/yql_expr_csee.cpp b/yql/essentials/core/yql_expr_csee.cpp index c796be64095..80f1921969c 100644 --- a/yql/essentials/core/yql_expr_csee.cpp +++ b/yql/essentials/core/yql_expr_csee.cpp @@ -121,6 +121,10 @@ namespace { break; } case TExprNode::Callable: + if (node.IsPosAware()) { + auto pos = node.Pos(); + hash = CseeHash(&pos, sizeof(pos), hash); + } if constexpr (UseDeterminsticHash) { hash = CseeHash(node.Content().data(), node.Content().size(), hash); } else { @@ -266,6 +270,13 @@ namespace { return left.Content().data() == right.Content().data() && left.GetFlagsToCompare() == right.GetFlagsToCompare(); case TExprNode::Callable: + if (left.IsPosAware() != right.IsPosAware()) { + return false; + } + + if (left.IsPosAware() && left.Pos() != right.Pos()) { + return false; + } // compare pointers due to intern if (left.Content().data() != right.Content().data()) { return false; diff --git a/yql/essentials/core/yql_type_annotation.h b/yql/essentials/core/yql_type_annotation.h index 79766beee7e..aba9660ea3c 100644 --- a/yql/essentials/core/yql_type_annotation.h +++ b/yql/essentials/core/yql_type_annotation.h @@ -388,6 +388,7 @@ struct TTypeAnnotationContext: public TThrRefBase { TLangVersion LangVer = MinLangVersion; EBackportCompatibleFeaturesMode BackportMode = EBackportCompatibleFeaturesMode::None; bool UseTypeDiffForConvertToError = false; + bool DebugPositions = false; THashMap<TString, TIntrusivePtr<TOptimizerStatistics::TColumnStatMap>> ColumnStatisticsByTableName; THashMap<ui64, std::shared_ptr<TOptimizerStatistics>> StatisticsMap; TIntrusivePtr<ITimeProvider> TimeProvider; diff --git a/yql/essentials/data/language/pragmas_opensource.json b/yql/essentials/data/language/pragmas_opensource.json index 272f251f0bf..9b1e6cb682e 100644 --- a/yql/essentials/data/language/pragmas_opensource.json +++ b/yql/essentials/data/language/pragmas_opensource.json @@ -879,6 +879,9 @@ "name": "DataWatermarks" }, { + "name": "DebugPositions" + }, + { "name": "DeriveColumnOrder" }, { @@ -921,6 +924,9 @@ "name": "DisableCompactNamedExprs" }, { + "name": "DisableDebugPositions" + }, + { "name": "DisableDeriveColumnOrder" }, { diff --git a/yql/essentials/providers/config/yql_config_provider.cpp b/yql/essentials/providers/config/yql_config_provider.cpp index eb519c8842d..fa770da3740 100644 --- a/yql/essentials/providers/config/yql_config_provider.cpp +++ b/yql/essentials/providers/config/yql_config_provider.cpp @@ -883,6 +883,14 @@ namespace { Types_.UseBlocks = (name == "UseBlocks"); } + else if (name == "DebugPositions" || name == "DisableDebugPositions") { + if (args.size() != 0) { + ctx.AddError(TIssue(pos, TStringBuilder() << "Expected no arguments, but got " << args.size())); + return false; + } + + Types_.DebugPositions = (name == "DebugPositions"); + } else if (name == "PgEmitAggApply" || name == "DisablePgEmitAggApply") { if (args.size() != 0) { ctx.AddError(TIssue(pos, TStringBuilder() << "Expected no arguments, but got " << args.size())); diff --git a/yql/essentials/sql/v1/context.cpp b/yql/essentials/sql/v1/context.cpp index f0cca2687dd..b95a559cac5 100644 --- a/yql/essentials/sql/v1/context.cpp +++ b/yql/essentials/sql/v1/context.cpp @@ -72,7 +72,8 @@ THashMap<TStringBuf, TPragmaField> CTX_PRAGMA_FIELDS = { {"DistinctOverKeys", &TContext::DistinctOverKeys}, {"GroupByExprAfterWhere", &TContext::GroupByExprAfterWhere}, {"FailOnGroupByExprOverride", &TContext::FailOnGroupByExprOverride}, - {"OptimizeSimpleILIKE", &TContext::OptimizeSimpleIlike} + {"OptimizeSimpleILIKE", &TContext::OptimizeSimpleIlike}, + {"DebugPositions", &TContext::DebugPositions}, }; typedef TMaybe<bool> TContext::*TPragmaMaybeField; diff --git a/yql/essentials/sql/v1/context.h b/yql/essentials/sql/v1/context.h index a4027f8668d..59ef9353915 100644 --- a/yql/essentials/sql/v1/context.h +++ b/yql/essentials/sql/v1/context.h @@ -379,6 +379,7 @@ namespace NSQLTranslationV1 { bool EmitUnionMerge = false; bool OptimizeSimpleIlike = false; bool PersistableFlattenAndAggrExprs = false; + bool DebugPositions = false; TVector<size_t> ForAllStatementsParts; TMaybe<TString> Engine; diff --git a/yql/essentials/sql/v1/query.cpp b/yql/essentials/sql/v1/query.cpp index 88966a24787..ffbfd669ac9 100644 --- a/yql/essentials/sql/v1/query.cpp +++ b/yql/essentials/sql/v1/query.cpp @@ -3311,6 +3311,11 @@ public: Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, BuildQuotedAtom(Pos_, "Engine"), BuildQuotedAtom(Pos_, *ctx.Engine)))); } + + if (ctx.DebugPositions) { + Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, + BuildQuotedAtom(Pos_, "DebugPositions")))); + } } } diff --git a/yql/essentials/sql/v1/sql_query.cpp b/yql/essentials/sql/v1/sql_query.cpp index d556698e440..fffe9da92a8 100644 --- a/yql/essentials/sql/v1/sql_query.cpp +++ b/yql/essentials/sql/v1/sql_query.cpp @@ -3218,6 +3218,7 @@ THashMap<TString, TPragmaDescr> PragmaDescrs{ TABLE_ELEM("AutoCommit", PragmaAutoCommit, true), TABLE_ELEM("UseTablePrefixForEach", PragmaUseTablePrefixForEach, true), PAIRED_TABLE_ELEM("SimpleColumns", SimpleColumns), + PAIRED_TABLE_ELEM("DebugPositions", DebugPositions), PAIRED_TABLE_ELEM("CoalesceJoinKeysOnQualifiedAll", CoalesceJoinKeysOnQualifiedAll), PAIRED_TABLE_ELEM("PullUpFlatMapOverJoin", PragmaPullUpFlatMapOverJoin), PAIRED_TABLE_ELEM("FilterPushdownOverJoinOptionalSide", FilterPushdownOverJoinOptionalSide), diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json index 08f0c46a088..abcb11244bd 100644 --- a/yql/essentials/tests/sql/sql2yql/canondata/result.json +++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json @@ -2211,6 +2211,13 @@ "uri": "https://{canondata_backend}/1942173/99e88108149e222741552e7e6cddef041d6a2846/resource.tar.gz#test_sql2yql.test_csee-nested_closure_in_l2_and_l1_unordered_/sql.yql" } ], + "test_sql2yql.test[csee-pos_aware]": [ + { + "checksum": "382518a724fae4a031f1580ea5f64903", + "size": 2196, + "uri": "https://{canondata_backend}/1784117/d39d0599e286e4cc386ded7e3a8c8bc9d4d2b8c2/resource.tar.gz#test_sql2yql.test_csee-pos_aware_/sql.yql" + } + ], "test_sql2yql.test[csee-same_closure_l2]": [ { "checksum": "04afadbb9c75b52f6e356934325a12f4", @@ -9482,6 +9489,11 @@ "uri": "file://test_sql_format.test_csee-nested_closure_in_l2_and_l1_unordered_/formatted.sql" } ], + "test_sql_format.test[csee-pos_aware]": [ + { + "uri": "file://test_sql_format.test_csee-pos_aware_/formatted.sql" + } + ], "test_sql_format.test[csee-same_closure_l2]": [ { "uri": "file://test_sql_format.test_csee-same_closure_l2_/formatted.sql" diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_csee-pos_aware_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_csee-pos_aware_/formatted.sql new file mode 100644 index 00000000000..d9e2dda61a4 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_csee-pos_aware_/formatted.sql @@ -0,0 +1,10 @@ +/* custom error: <main>:5:13: Error: Cannot parse string value from entity (#) */ +PRAGMA DebugPositions; + +SELECT + Yson::ConvertToString(d['answer']), + Yson::ConvertToString(d['query']) +FROM ( + SELECT + "{answer=foo;query=#}"y AS d +); diff --git a/yql/essentials/tests/sql/suites/csee/pos_aware.cfg b/yql/essentials/tests/sql/suites/csee/pos_aware.cfg new file mode 100644 index 00000000000..61364a1d2ce --- /dev/null +++ b/yql/essentials/tests/sql/suites/csee/pos_aware.cfg @@ -0,0 +1,3 @@ +xfail +udf yson2_udf + diff --git a/yql/essentials/tests/sql/suites/csee/pos_aware.sql b/yql/essentials/tests/sql/suites/csee/pos_aware.sql new file mode 100644 index 00000000000..8a3ff221394 --- /dev/null +++ b/yql/essentials/tests/sql/suites/csee/pos_aware.sql @@ -0,0 +1,8 @@ +/* custom error: <main>:5:13: Error: Cannot parse string value from entity (#) */ +pragma DebugPositions; +select + Yson::ConvertToString(d["answer"]), + Yson::ConvertToString(d["query"]) +from ( + select "{answer=foo;query=#}"y as d +) |