diff options
author | lucius <lucius@yandex-team.com> | 2025-04-22 15:07:35 +0300 |
---|---|---|
committer | lucius <lucius@yandex-team.com> | 2025-04-22 15:31:53 +0300 |
commit | d44f3d4d8ab58e6b8502d63babf0570f8d77af29 (patch) | |
tree | 53bb5a6ac4eb627b79390c1944fc5a0654b56dc5 | |
parent | 5bd08bac300a4c0e79e4e99dd68ca9fd8e45a2d4 (diff) | |
download | ydb-d44f3d4d8ab58e6b8502d63babf0570f8d77af29.tar.gz |
YQL-19715 optional int support
commit_hash:31b59370a1423bb42b78161908b85ef57b800e5c
9 files changed, 192 insertions, 45 deletions
diff --git a/yt/yql/providers/yt/gateway/native/yql_yt_native.cpp b/yt/yql/providers/yt/gateway/native/yql_yt_native.cpp index 460b607e01a..5467508a59a 100644 --- a/yt/yql/providers/yt/gateway/native/yql_yt_native.cpp +++ b/yt/yql/providers/yt/gateway/native/yql_yt_native.cpp @@ -64,6 +64,7 @@ #include <util/stream/str.h> #include <util/stream/input.h> #include <util/stream/file.h> +#include <util/string/type.h> #include <util/system/execpath.h> #include <util/system/guard.h> #include <util/system/shellcommand.h> @@ -214,46 +215,50 @@ TString DebugPath(NYT::TRichYPath path) { return NYT::NodeToCanonicalYsonString(NYT::PathToNode(path), NYT::NYson::EYsonFormat::Text) + " (" + std::to_string(numColumns) + " columns)"; } -void GetIntegerConstraints(const TExprNode::TPtr& column, bool& isSigned, ui64& minValueAbs, ui64& maxValueAbs) { - EDataSlot toType = column->GetTypeAnn()->Cast<TDataExprType>()->GetSlot(); +void GetIntegerConstraints(const TExprNode::TPtr& column, bool& isSigned, ui64& minValueAbs, ui64& maxValueAbs, bool& isOptional) { + const TDataExprType* dataType = nullptr; + const bool columnHasDataType = IsDataOrOptionalOfData(column->GetTypeAnn(), isOptional, dataType); + YQL_ENSURE(columnHasDataType, "YtQLFilter: unsupported type of column " << column->Dump()); + YQL_ENSURE(dataType); + const EDataSlot dataSlot = dataType->Cast<TDataExprType>()->GetSlot(); - // AllowIntegralConversion (may consider some refactoring) - if (toType == EDataSlot::Uint8) { + // looks like AllowIntegralConversion (may consider some refactoring) + if (dataSlot == EDataSlot::Uint8) { isSigned = false; minValueAbs = 0; maxValueAbs = Max<ui8>(); } - else if (toType == EDataSlot::Uint16) { + else if (dataSlot == EDataSlot::Uint16) { isSigned = false; minValueAbs = 0; maxValueAbs = Max<ui16>(); } - else if (toType == EDataSlot::Uint32) { + else if (dataSlot == EDataSlot::Uint32) { isSigned = false; minValueAbs = 0; maxValueAbs = Max<ui32>(); } - else if (toType == EDataSlot::Uint64) { + else if (dataSlot == EDataSlot::Uint64) { isSigned = false; minValueAbs = 0; maxValueAbs = Max<ui64>(); } - else if (toType == EDataSlot::Int8) { + else if (dataSlot == EDataSlot::Int8) { isSigned = true; minValueAbs = (ui64)Max<i8>() + 1; maxValueAbs = (ui64)Max<i8>(); } - else if (toType == EDataSlot::Int16) { + else if (dataSlot == EDataSlot::Int16) { isSigned = true; minValueAbs = (ui64)Max<i16>() + 1; maxValueAbs = (ui64)Max<i16>(); } - else if (toType == EDataSlot::Int32) { + else if (dataSlot == EDataSlot::Int32) { isSigned = true; minValueAbs = (ui64)Max<i32>() + 1; maxValueAbs = (ui64)Max<i32>(); } - else if (toType == EDataSlot::Int64) { + else if (dataSlot == EDataSlot::Int64) { isSigned = true; minValueAbs = (ui64)Max<i64>() + 1; maxValueAbs = (ui64)Max<i64>(); @@ -286,51 +291,102 @@ void ConvertComparisonForQL(const TStringBuf& opName, TStringBuilder& result) { } } -void GenerateInputQueryIntegerComparison(const TStringBuf& opName, const TExprNode::TPtr& intColumn, const TExprNode::TPtr& intValue, TStringBuilder& result) { +void GenerateInputQueryIntegerComparison(const TStringBuf& opName, const TExprNode::TPtr& intColumn, const TExprNode::TPtr& intValue, const std::optional<bool>& nullValue, TStringBuilder& result) { + if (TMaybeNode<TCoNull>(intValue) || TMaybeNode<TCoNothing>(intValue)) { + YQL_ENSURE(nullValue.has_value(), "YtQLFilter: optional type without coalesce is not supported"); + if (nullValue.value()) { + result << "TRUE"; + } else { + result << "FALSE"; + } + return; + } + + TMaybeNode<TCoIntegralCtor> maybeIntValue; + if (auto maybeJustValue = TMaybeNode<TCoJust>(intValue)) { + maybeIntValue = TMaybeNode<TCoIntegralCtor>(maybeJustValue.Cast().Input().Ptr()); + } else { + maybeIntValue = TMaybeNode<TCoIntegralCtor>(intValue); + } + YQL_ENSURE(maybeIntValue); + bool columnsIsSigned; ui64 minValueAbs; ui64 maxValueAbs; - GetIntegerConstraints(intColumn, columnsIsSigned, minValueAbs, maxValueAbs); + bool columnIsOptional; + GetIntegerConstraints(intColumn, columnsIsSigned, minValueAbs, maxValueAbs, columnIsOptional); + YQL_ENSURE(!columnIsOptional || columnIsOptional && nullValue.has_value(), "YtQLFilter: optional type without coalesce is not supported"); - const auto maybeInt = TMaybeNode<TCoIntegralCtor>(intValue); - YQL_ENSURE(maybeInt); bool hasSign; bool isSigned; ui64 valueAbs; - ExtractIntegralValue(maybeInt.Ref(), false, hasSign, isSigned, valueAbs); + ExtractIntegralValue(maybeIntValue.Ref(), false, hasSign, isSigned, valueAbs); + std::optional<bool> constantFilter; if (!hasSign && valueAbs > maxValueAbs) { - // value is greater than maximum + // Value is greater than maximum. if (opName == ">" || opName == ">=" || opName == "==") { - result << "FALSE"; + constantFilter = false; } else { - result << "TRUE"; + constantFilter = true; } } else if (hasSign && valueAbs > minValueAbs) { - // value is less than minimum + // Value is less than minimum. if (opName == "<" || opName == "<=" || opName == "==") { - result << "FALSE"; + constantFilter = false; } else { - result << "TRUE"; + constantFilter = true; + } + } + + const auto columnName = intColumn->ChildPtr(1)->Content(); + if (!constantFilter.has_value()) { + // Value is in the range, comparison is not constant. + if (columnIsOptional) { + const bool isLess = opName == "<" || opName == "<="; + if (isLess && !nullValue.value()) { + // QL will handle 'x [operation] NULL' as TRUE here, but we need FALSE. + QuoteColumnForQL(columnName, result); + result << " != NULL AND "; + } else if (!isLess && nullValue.value()) { + // QL will handle 'x [operation] NULL' as FALSE here, but we need TRUE. + QuoteColumnForQL(columnName, result); + result << " = NULL OR "; + } } - } else { - // value is in the range - const auto columnName = intColumn->ChildPtr(1)->Content(); - const auto valueStr = maybeInt.Cast().Literal().Value(); QuoteColumnForQL(columnName, result); result << " "; ConvertComparisonForQL(opName, result); + const auto valueStr = maybeIntValue.Cast().Literal().Value(); result << " " << valueStr; + } else if (constantFilter.value()) { + // Value is out of the range, comparison is always TRUE. + if (columnIsOptional && !nullValue.value()) { + // Handle comparison with NULL as FALSE. + QuoteColumnForQL(columnName, result); + result << " IS NOT NULL"; + } else { + result << "TRUE"; + } + } else { + // Value is out of the range, comparison is always FALSE. + if (columnIsOptional && nullValue.value()) { + // Handle comparison with NULL as TRUE. + QuoteColumnForQL(columnName, result); + result << " IS NULL"; + } else { + result << "FALSE"; + } } } -void GenerateInputQueryComparison(const TCoCompare& op, TStringBuilder& result) { +void GenerateInputQueryComparison(const TCoCompare& op, const std::optional<bool>& nullValue, TStringBuilder& result) { YQL_ENSURE(op.Ref().IsCallable({"<", "<=", ">", ">=", "==", "!="})); const auto left = op.Left().Ptr(); const auto right = op.Right().Ptr(); if (left->IsCallable("Member")) { - GenerateInputQueryIntegerComparison(op.CallableName(), left, right, result); + GenerateInputQueryIntegerComparison(op.CallableName(), left, right, nullValue, result); } else { YQL_ENSURE(right->IsCallable("Member")); auto invertedOp = op.CallableName(); @@ -343,17 +399,29 @@ void GenerateInputQueryComparison(const TCoCompare& op, TStringBuilder& result) } else if (invertedOp == ">=") { invertedOp = "<="; } - GenerateInputQueryIntegerComparison(invertedOp, right, left, result); + GenerateInputQueryIntegerComparison(invertedOp, right, left, nullValue, result); } } void GenerateInputQueryWhereExpression(const TExprNode::TPtr& node, TStringBuilder& result) { if (const auto maybeCompare = TMaybeNode<TCoCompare>(node)) { - GenerateInputQueryComparison(maybeCompare.Cast(), result); + GenerateInputQueryComparison(maybeCompare.Cast(), {}, result); } else if (node->IsCallable("Not")) { - result << "NOT ("; + const auto child = node->ChildPtr(0); + if (child->IsCallable("Exists")) { + // Do not generate NOT (x IS NOT NULL). + result << "("; + GenerateInputQueryWhereExpression(child->ChildPtr(0), result); + result << ") IS NULL"; + } else { + result << "NOT ("; + GenerateInputQueryWhereExpression(child, result); + result << ")"; + } + } else if (node->IsCallable("Exists")) { + result << "("; GenerateInputQueryWhereExpression(node->ChildPtr(0), result); - result << ")"; + result << ") IS NOT NULL"; } else if (node->IsCallable({"And", "Or"})) { const TStringBuf op = node->IsCallable("And") ? "AND" : "OR"; @@ -367,6 +435,17 @@ void GenerateInputQueryWhereExpression(const TExprNode::TPtr& node, TStringBuild GenerateInputQueryWhereExpression(node->Child(i), result); result << ")"; }; + } else if (node->IsCallable("Coalesce")) { + YQL_ENSURE(node->ChildrenSize() == 2); + const auto op = TMaybeNode<TCoCompare>(node->Child(0)).Cast(); + const auto nullValueStr = TMaybeNode<TCoBool>(node->Child(1)).Cast().Literal().Value(); + const std::optional<bool> nullValue(IsTrue(nullValueStr)); + GenerateInputQueryComparison(op, nullValue, result); + } else if (const auto maybeBool = TMaybeNode<TCoBool>(node)) { + result << maybeBool.Cast().Literal().Value(); + } else if (node->IsCallable("Member")) { + const auto columnName = node->ChildPtr(1)->Content(); + QuoteColumnForQL(columnName, result); } else { YQL_ENSURE(false, "unexpected node type"); } diff --git a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_ytql.cpp b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_ytql.cpp index bd24c6a473f..99a4bbde694 100644 --- a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_ytql.cpp +++ b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_ytql.cpp @@ -15,13 +15,13 @@ using namespace NPrivate; namespace { -bool NodeHasQLCompatibleType(const TExprNode::TPtr& node) { +bool NodeHasQLCompatibleType(const TExprNode::TPtr& node, bool allowOptional) { bool isOptional = false; const TDataExprType* dataType = nullptr; if (!IsDataOrOptionalOfData(node->GetTypeAnn(), isOptional, dataType)) { return false; } - if (isOptional) { + if (!allowOptional && isOptional) { return false; } if (!dataType) { @@ -33,8 +33,8 @@ bool NodeHasQLCompatibleType(const TExprNode::TPtr& node) { return true; } -TExprNode::TPtr CheckQLConst(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg) { - if (!NodeHasQLCompatibleType(node)) { +TExprNode::TPtr CheckQLConst(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg, bool allowOptional) { + if (!NodeHasQLCompatibleType(node, allowOptional)) { return nullptr; } if (IsDepended(*node, *rowArg)) { @@ -43,7 +43,7 @@ TExprNode::TPtr CheckQLConst(const TExprNode::TPtr& node, const TExprNode::TPtr& return node; } -TExprNode::TPtr ConvertQLMember(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg, const TExprNode::TPtr& newRowArg, TExprContext& ctx) { +TExprNode::TPtr ConvertQLMember(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg, const TExprNode::TPtr& newRowArg, TExprContext& ctx, bool allowOptional = false) { if (!node->IsCallable("Member")) { return nullptr; } @@ -55,22 +55,22 @@ TExprNode::TPtr ConvertQLMember(const TExprNode::TPtr& node, const TExprNode::TP if (memberName.StartsWith("_yql_sys_")) { return nullptr; } - if (!NodeHasQLCompatibleType(node)) { + if (!NodeHasQLCompatibleType(node, allowOptional)) { return nullptr; } auto arg = newRowArg; return ctx.ChangeChild(*node, 0, std::move(arg)); } -TExprNode::TPtr ConvertQLComparison(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg, const TExprNode::TPtr& newRowArg, TExprContext& ctx) { +TExprNode::TPtr ConvertQLComparison(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg, const TExprNode::TPtr& newRowArg, TExprContext& ctx, bool allowOptional = false) { YQL_ENSURE(node->ChildrenSize() == 2); TExprNode::TPtr childLeft; TExprNode::TPtr childRight; - if (childLeft = ConvertQLMember(node->ChildPtr(0), rowArg, newRowArg, ctx)) { - childRight = CheckQLConst(node->ChildPtr(1), rowArg); + if (childLeft = ConvertQLMember(node->ChildPtr(0), rowArg, newRowArg, ctx, allowOptional)) { + childRight = CheckQLConst(node->ChildPtr(1), rowArg, allowOptional); } - else if (childRight = ConvertQLMember(node->ChildPtr(1), rowArg, newRowArg, ctx)) { - childLeft = CheckQLConst(node->ChildPtr(0), rowArg); + else if (childRight = ConvertQLMember(node->ChildPtr(1), rowArg, newRowArg, ctx, allowOptional)) { + childLeft = CheckQLConst(node->ChildPtr(0), rowArg, allowOptional); } if (!childLeft || !childRight) { return nullptr; @@ -79,7 +79,7 @@ TExprNode::TPtr ConvertQLComparison(const TExprNode::TPtr& node, const TExprNode } TExprNode::TPtr ConvertQLSubTree(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg, const TExprNode::TPtr& newRowArg, TExprContext& ctx) { - if (node->IsCallable({"And", "Or", "Not"})) { + if (node->IsCallable({"And", "Or", "Not", "Exists"})) { TExprNode::TListType convertedChildren; for (const auto& child : node->ChildrenList()) { const auto converted = ConvertQLSubTree(child, rowArg, newRowArg, ctx); @@ -90,9 +90,33 @@ TExprNode::TPtr ConvertQLSubTree(const TExprNode::TPtr& node, const TExprNode::T }; return ctx.ChangeChildren(*node, std::move(convertedChildren)); } + if (node->IsCallable("Coalesce")) { + if (node->ChildrenSize() != 2) { + return nullptr; + } + const auto comparison = node->Child(0); + if (!comparison->IsCallable({"<", "<=", ">", ">=", "==", "!="})) { + return nullptr; + } + const auto nullValue = node->Child(1); + if (!nullValue->IsCallable("Bool")) { + return nullptr; + } + const auto convertedComparison = ConvertQLComparison(comparison, rowArg, newRowArg, ctx, /*allowOptional*/ true); + if (!convertedComparison) { + return nullptr; + } + return ctx.ChangeChildren(*node, {convertedComparison, nullValue}); + } if (node->IsCallable({"<", "<=", ">", ">=", "==", "!="})) { return ConvertQLComparison(node, rowArg, newRowArg, ctx); } + if (node->IsCallable("Bool")) { + return node; + } + if (node->IsCallable("Member")) { + return ConvertQLMember(node, rowArg, newRowArg, ctx); + } return nullptr; } diff --git a/yt/yql/providers/yt/provider/yql_yt_helpers.cpp b/yt/yql/providers/yt/provider/yql_yt_helpers.cpp index 4ee06a21708..56f61e8a97d 100644 --- a/yt/yql/providers/yt/provider/yql_yt_helpers.cpp +++ b/yt/yql/providers/yt/provider/yql_yt_helpers.cpp @@ -714,7 +714,7 @@ void GetNodesToCalculateFromQLFilter(const TExprNode& qlFilter, TExprNode::TList YQL_ENSURE(qlFilter.IsCallable("YtQLFilter")); const auto lambdaBody = qlFilter.Child(1)->Child(1); VisitExpr(lambdaBody, [&needCalc, &uniqNodes](const TExprNode::TPtr& node) { - if (node->IsCallable({"And", "Or", "Not", "<", "<=", ">", ">=", "==", "!="})) { + if (node->IsCallable({"And", "Or", "Not", "Coalesce", "Exists", "<", "<=", ">", ">=", "==", "!="})) { return true; } if (node->IsCallable("Member")) { diff --git a/yt/yql/tests/sql/suites/ql_filter/integer_optional.cfg b/yt/yql/tests/sql/suites/ql_filter/integer_optional.cfg new file mode 100644 index 00000000000..0978582140a --- /dev/null +++ b/yt/yql/tests/sql/suites/ql_filter/integer_optional.cfg @@ -0,0 +1 @@ +in Input integer_optional.txt diff --git a/yt/yql/tests/sql/suites/ql_filter/integer_optional.sql b/yt/yql/tests/sql/suites/ql_filter/integer_optional.sql new file mode 100644 index 00000000000..6faf68b3156 --- /dev/null +++ b/yt/yql/tests/sql/suites/ql_filter/integer_optional.sql @@ -0,0 +1,10 @@ +pragma yt.UseQLFilter; + +select * +from plato.Input +where + not (a <= 5) + and + b < 0 + and + c > Just(5);
\ No newline at end of file diff --git a/yt/yql/tests/sql/suites/ql_filter/integer_optional.txt b/yt/yql/tests/sql/suites/ql_filter/integer_optional.txt new file mode 100644 index 00000000000..36a0341a3e9 --- /dev/null +++ b/yt/yql/tests/sql/suites/ql_filter/integer_optional.txt @@ -0,0 +1,10 @@ +{"a"=1;"b"=1;"c"=1u;"d"=1;"e"=1u}; +{"a"=#;"b"=-2;"c"=2u;"d"=10;"e"=10u}; +{"a"=-3;"b"=3;"c"=3u;"d"=-100;"e"=100u}; +{"a"=4;"b"=#;"c"=4u;"d"=1000;"e"=1000u}; +{"a"=5;"b"=-5;"c"=#;"d"=10000;"e"=10000u}; +{"a"=-6;"b"=6;"c"=6u;"d"=-100000;"e"=100000u}; +{"a"=#;"b"=7;"c"=7u;"d"=1000000;"e"=1000000u}; +{"a"=8;"b"=-8;"c"=8u;"d"=#;"e"=10000000u}; +{"a"=-9;"b"=9;"c"=#;"d"=-100000000;"e"=100000000u}; +{"a"=10;"b"=#;"c"=10u;"d"=1000000000;"e"=#}; diff --git a/yt/yql/tests/sql/suites/ql_filter/integer_optional.txt.attr b/yt/yql/tests/sql/suites/ql_filter/integer_optional.txt.attr new file mode 100644 index 00000000000..b4567c7b245 --- /dev/null +++ b/yt/yql/tests/sql/suites/ql_filter/integer_optional.txt.attr @@ -0,0 +1,11 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["a";["OptionalType";["DataType";"Int32"]]]; + ["b";["OptionalType";["DataType";"Int32"]]]; + ["c";["OptionalType";["DataType";"Uint32"]]]; + ["d";["OptionalType";["DataType";"Int64"]]]; + ["e";["OptionalType";["DataType";"Uint64"]]] + ]] + } +} diff --git a/yt/yql/tests/sql/suites/ql_filter/integer_optional_null.cfg b/yt/yql/tests/sql/suites/ql_filter/integer_optional_null.cfg new file mode 100644 index 00000000000..0978582140a --- /dev/null +++ b/yt/yql/tests/sql/suites/ql_filter/integer_optional_null.cfg @@ -0,0 +1 @@ +in Input integer_optional.txt diff --git a/yt/yql/tests/sql/suites/ql_filter/integer_optional_null.sql b/yt/yql/tests/sql/suites/ql_filter/integer_optional_null.sql new file mode 100644 index 00000000000..4ad8a40b212 --- /dev/null +++ b/yt/yql/tests/sql/suites/ql_filter/integer_optional_null.sql @@ -0,0 +1,11 @@ +pragma yt.UseQLFilter; + +select * +from plato.Input +where + ( + a > null + or b > nothing(Int32?) + or c is null + ) + and (c > 0) is null;
\ No newline at end of file |