aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlucius <lucius@yandex-team.com>2025-04-22 15:07:35 +0300
committerlucius <lucius@yandex-team.com>2025-04-22 15:31:53 +0300
commitd44f3d4d8ab58e6b8502d63babf0570f8d77af29 (patch)
tree53bb5a6ac4eb627b79390c1944fc5a0654b56dc5
parent5bd08bac300a4c0e79e4e99dd68ca9fd8e45a2d4 (diff)
downloadydb-d44f3d4d8ab58e6b8502d63babf0570f8d77af29.tar.gz
YQL-19715 optional int support
commit_hash:31b59370a1423bb42b78161908b85ef57b800e5c
-rw-r--r--yt/yql/providers/yt/gateway/native/yql_yt_native.cpp143
-rw-r--r--yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_ytql.cpp48
-rw-r--r--yt/yql/providers/yt/provider/yql_yt_helpers.cpp2
-rw-r--r--yt/yql/tests/sql/suites/ql_filter/integer_optional.cfg1
-rw-r--r--yt/yql/tests/sql/suites/ql_filter/integer_optional.sql10
-rw-r--r--yt/yql/tests/sql/suites/ql_filter/integer_optional.txt10
-rw-r--r--yt/yql/tests/sql/suites/ql_filter/integer_optional.txt.attr11
-rw-r--r--yt/yql/tests/sql/suites/ql_filter/integer_optional_null.cfg1
-rw-r--r--yt/yql/tests/sql/suites/ql_filter/integer_optional_null.sql11
9 files changed, 192 insertions, 45 deletions
diff --git a/yt/yql/providers/yt/gateway/native/yql_yt_native.cpp b/yt/yql/providers/yt/gateway/native/yql_yt_native.cpp
index 460b607e01a..5467508a59a 100644
--- a/yt/yql/providers/yt/gateway/native/yql_yt_native.cpp
+++ b/yt/yql/providers/yt/gateway/native/yql_yt_native.cpp
@@ -64,6 +64,7 @@
#include <util/stream/str.h>
#include <util/stream/input.h>
#include <util/stream/file.h>
+#include <util/string/type.h>
#include <util/system/execpath.h>
#include <util/system/guard.h>
#include <util/system/shellcommand.h>
@@ -214,46 +215,50 @@ TString DebugPath(NYT::TRichYPath path) {
return NYT::NodeToCanonicalYsonString(NYT::PathToNode(path), NYT::NYson::EYsonFormat::Text) + " (" + std::to_string(numColumns) + " columns)";
}
-void GetIntegerConstraints(const TExprNode::TPtr& column, bool& isSigned, ui64& minValueAbs, ui64& maxValueAbs) {
- EDataSlot toType = column->GetTypeAnn()->Cast<TDataExprType>()->GetSlot();
+void GetIntegerConstraints(const TExprNode::TPtr& column, bool& isSigned, ui64& minValueAbs, ui64& maxValueAbs, bool& isOptional) {
+ const TDataExprType* dataType = nullptr;
+ const bool columnHasDataType = IsDataOrOptionalOfData(column->GetTypeAnn(), isOptional, dataType);
+ YQL_ENSURE(columnHasDataType, "YtQLFilter: unsupported type of column " << column->Dump());
+ YQL_ENSURE(dataType);
+ const EDataSlot dataSlot = dataType->Cast<TDataExprType>()->GetSlot();
- // AllowIntegralConversion (may consider some refactoring)
- if (toType == EDataSlot::Uint8) {
+ // looks like AllowIntegralConversion (may consider some refactoring)
+ if (dataSlot == EDataSlot::Uint8) {
isSigned = false;
minValueAbs = 0;
maxValueAbs = Max<ui8>();
}
- else if (toType == EDataSlot::Uint16) {
+ else if (dataSlot == EDataSlot::Uint16) {
isSigned = false;
minValueAbs = 0;
maxValueAbs = Max<ui16>();
}
- else if (toType == EDataSlot::Uint32) {
+ else if (dataSlot == EDataSlot::Uint32) {
isSigned = false;
minValueAbs = 0;
maxValueAbs = Max<ui32>();
}
- else if (toType == EDataSlot::Uint64) {
+ else if (dataSlot == EDataSlot::Uint64) {
isSigned = false;
minValueAbs = 0;
maxValueAbs = Max<ui64>();
}
- else if (toType == EDataSlot::Int8) {
+ else if (dataSlot == EDataSlot::Int8) {
isSigned = true;
minValueAbs = (ui64)Max<i8>() + 1;
maxValueAbs = (ui64)Max<i8>();
}
- else if (toType == EDataSlot::Int16) {
+ else if (dataSlot == EDataSlot::Int16) {
isSigned = true;
minValueAbs = (ui64)Max<i16>() + 1;
maxValueAbs = (ui64)Max<i16>();
}
- else if (toType == EDataSlot::Int32) {
+ else if (dataSlot == EDataSlot::Int32) {
isSigned = true;
minValueAbs = (ui64)Max<i32>() + 1;
maxValueAbs = (ui64)Max<i32>();
}
- else if (toType == EDataSlot::Int64) {
+ else if (dataSlot == EDataSlot::Int64) {
isSigned = true;
minValueAbs = (ui64)Max<i64>() + 1;
maxValueAbs = (ui64)Max<i64>();
@@ -286,51 +291,102 @@ void ConvertComparisonForQL(const TStringBuf& opName, TStringBuilder& result) {
}
}
-void GenerateInputQueryIntegerComparison(const TStringBuf& opName, const TExprNode::TPtr& intColumn, const TExprNode::TPtr& intValue, TStringBuilder& result) {
+void GenerateInputQueryIntegerComparison(const TStringBuf& opName, const TExprNode::TPtr& intColumn, const TExprNode::TPtr& intValue, const std::optional<bool>& nullValue, TStringBuilder& result) {
+ if (TMaybeNode<TCoNull>(intValue) || TMaybeNode<TCoNothing>(intValue)) {
+ YQL_ENSURE(nullValue.has_value(), "YtQLFilter: optional type without coalesce is not supported");
+ if (nullValue.value()) {
+ result << "TRUE";
+ } else {
+ result << "FALSE";
+ }
+ return;
+ }
+
+ TMaybeNode<TCoIntegralCtor> maybeIntValue;
+ if (auto maybeJustValue = TMaybeNode<TCoJust>(intValue)) {
+ maybeIntValue = TMaybeNode<TCoIntegralCtor>(maybeJustValue.Cast().Input().Ptr());
+ } else {
+ maybeIntValue = TMaybeNode<TCoIntegralCtor>(intValue);
+ }
+ YQL_ENSURE(maybeIntValue);
+
bool columnsIsSigned;
ui64 minValueAbs;
ui64 maxValueAbs;
- GetIntegerConstraints(intColumn, columnsIsSigned, minValueAbs, maxValueAbs);
+ bool columnIsOptional;
+ GetIntegerConstraints(intColumn, columnsIsSigned, minValueAbs, maxValueAbs, columnIsOptional);
+ YQL_ENSURE(!columnIsOptional || columnIsOptional && nullValue.has_value(), "YtQLFilter: optional type without coalesce is not supported");
- const auto maybeInt = TMaybeNode<TCoIntegralCtor>(intValue);
- YQL_ENSURE(maybeInt);
bool hasSign;
bool isSigned;
ui64 valueAbs;
- ExtractIntegralValue(maybeInt.Ref(), false, hasSign, isSigned, valueAbs);
+ ExtractIntegralValue(maybeIntValue.Ref(), false, hasSign, isSigned, valueAbs);
+ std::optional<bool> constantFilter;
if (!hasSign && valueAbs > maxValueAbs) {
- // value is greater than maximum
+ // Value is greater than maximum.
if (opName == ">" || opName == ">=" || opName == "==") {
- result << "FALSE";
+ constantFilter = false;
} else {
- result << "TRUE";
+ constantFilter = true;
}
} else if (hasSign && valueAbs > minValueAbs) {
- // value is less than minimum
+ // Value is less than minimum.
if (opName == "<" || opName == "<=" || opName == "==") {
- result << "FALSE";
+ constantFilter = false;
} else {
- result << "TRUE";
+ constantFilter = true;
+ }
+ }
+
+ const auto columnName = intColumn->ChildPtr(1)->Content();
+ if (!constantFilter.has_value()) {
+ // Value is in the range, comparison is not constant.
+ if (columnIsOptional) {
+ const bool isLess = opName == "<" || opName == "<=";
+ if (isLess && !nullValue.value()) {
+ // QL will handle 'x [operation] NULL' as TRUE here, but we need FALSE.
+ QuoteColumnForQL(columnName, result);
+ result << " != NULL AND ";
+ } else if (!isLess && nullValue.value()) {
+ // QL will handle 'x [operation] NULL' as FALSE here, but we need TRUE.
+ QuoteColumnForQL(columnName, result);
+ result << " = NULL OR ";
+ }
}
- } else {
- // value is in the range
- const auto columnName = intColumn->ChildPtr(1)->Content();
- const auto valueStr = maybeInt.Cast().Literal().Value();
QuoteColumnForQL(columnName, result);
result << " ";
ConvertComparisonForQL(opName, result);
+ const auto valueStr = maybeIntValue.Cast().Literal().Value();
result << " " << valueStr;
+ } else if (constantFilter.value()) {
+ // Value is out of the range, comparison is always TRUE.
+ if (columnIsOptional && !nullValue.value()) {
+ // Handle comparison with NULL as FALSE.
+ QuoteColumnForQL(columnName, result);
+ result << " IS NOT NULL";
+ } else {
+ result << "TRUE";
+ }
+ } else {
+ // Value is out of the range, comparison is always FALSE.
+ if (columnIsOptional && nullValue.value()) {
+ // Handle comparison with NULL as TRUE.
+ QuoteColumnForQL(columnName, result);
+ result << " IS NULL";
+ } else {
+ result << "FALSE";
+ }
}
}
-void GenerateInputQueryComparison(const TCoCompare& op, TStringBuilder& result) {
+void GenerateInputQueryComparison(const TCoCompare& op, const std::optional<bool>& nullValue, TStringBuilder& result) {
YQL_ENSURE(op.Ref().IsCallable({"<", "<=", ">", ">=", "==", "!="}));
const auto left = op.Left().Ptr();
const auto right = op.Right().Ptr();
if (left->IsCallable("Member")) {
- GenerateInputQueryIntegerComparison(op.CallableName(), left, right, result);
+ GenerateInputQueryIntegerComparison(op.CallableName(), left, right, nullValue, result);
} else {
YQL_ENSURE(right->IsCallable("Member"));
auto invertedOp = op.CallableName();
@@ -343,17 +399,29 @@ void GenerateInputQueryComparison(const TCoCompare& op, TStringBuilder& result)
} else if (invertedOp == ">=") {
invertedOp = "<=";
}
- GenerateInputQueryIntegerComparison(invertedOp, right, left, result);
+ GenerateInputQueryIntegerComparison(invertedOp, right, left, nullValue, result);
}
}
void GenerateInputQueryWhereExpression(const TExprNode::TPtr& node, TStringBuilder& result) {
if (const auto maybeCompare = TMaybeNode<TCoCompare>(node)) {
- GenerateInputQueryComparison(maybeCompare.Cast(), result);
+ GenerateInputQueryComparison(maybeCompare.Cast(), {}, result);
} else if (node->IsCallable("Not")) {
- result << "NOT (";
+ const auto child = node->ChildPtr(0);
+ if (child->IsCallable("Exists")) {
+ // Do not generate NOT (x IS NOT NULL).
+ result << "(";
+ GenerateInputQueryWhereExpression(child->ChildPtr(0), result);
+ result << ") IS NULL";
+ } else {
+ result << "NOT (";
+ GenerateInputQueryWhereExpression(child, result);
+ result << ")";
+ }
+ } else if (node->IsCallable("Exists")) {
+ result << "(";
GenerateInputQueryWhereExpression(node->ChildPtr(0), result);
- result << ")";
+ result << ") IS NOT NULL";
} else if (node->IsCallable({"And", "Or"})) {
const TStringBuf op = node->IsCallable("And") ? "AND" : "OR";
@@ -367,6 +435,17 @@ void GenerateInputQueryWhereExpression(const TExprNode::TPtr& node, TStringBuild
GenerateInputQueryWhereExpression(node->Child(i), result);
result << ")";
};
+ } else if (node->IsCallable("Coalesce")) {
+ YQL_ENSURE(node->ChildrenSize() == 2);
+ const auto op = TMaybeNode<TCoCompare>(node->Child(0)).Cast();
+ const auto nullValueStr = TMaybeNode<TCoBool>(node->Child(1)).Cast().Literal().Value();
+ const std::optional<bool> nullValue(IsTrue(nullValueStr));
+ GenerateInputQueryComparison(op, nullValue, result);
+ } else if (const auto maybeBool = TMaybeNode<TCoBool>(node)) {
+ result << maybeBool.Cast().Literal().Value();
+ } else if (node->IsCallable("Member")) {
+ const auto columnName = node->ChildPtr(1)->Content();
+ QuoteColumnForQL(columnName, result);
} else {
YQL_ENSURE(false, "unexpected node type");
}
diff --git a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_ytql.cpp b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_ytql.cpp
index bd24c6a473f..99a4bbde694 100644
--- a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_ytql.cpp
+++ b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_ytql.cpp
@@ -15,13 +15,13 @@ using namespace NPrivate;
namespace {
-bool NodeHasQLCompatibleType(const TExprNode::TPtr& node) {
+bool NodeHasQLCompatibleType(const TExprNode::TPtr& node, bool allowOptional) {
bool isOptional = false;
const TDataExprType* dataType = nullptr;
if (!IsDataOrOptionalOfData(node->GetTypeAnn(), isOptional, dataType)) {
return false;
}
- if (isOptional) {
+ if (!allowOptional && isOptional) {
return false;
}
if (!dataType) {
@@ -33,8 +33,8 @@ bool NodeHasQLCompatibleType(const TExprNode::TPtr& node) {
return true;
}
-TExprNode::TPtr CheckQLConst(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg) {
- if (!NodeHasQLCompatibleType(node)) {
+TExprNode::TPtr CheckQLConst(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg, bool allowOptional) {
+ if (!NodeHasQLCompatibleType(node, allowOptional)) {
return nullptr;
}
if (IsDepended(*node, *rowArg)) {
@@ -43,7 +43,7 @@ TExprNode::TPtr CheckQLConst(const TExprNode::TPtr& node, const TExprNode::TPtr&
return node;
}
-TExprNode::TPtr ConvertQLMember(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg, const TExprNode::TPtr& newRowArg, TExprContext& ctx) {
+TExprNode::TPtr ConvertQLMember(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg, const TExprNode::TPtr& newRowArg, TExprContext& ctx, bool allowOptional = false) {
if (!node->IsCallable("Member")) {
return nullptr;
}
@@ -55,22 +55,22 @@ TExprNode::TPtr ConvertQLMember(const TExprNode::TPtr& node, const TExprNode::TP
if (memberName.StartsWith("_yql_sys_")) {
return nullptr;
}
- if (!NodeHasQLCompatibleType(node)) {
+ if (!NodeHasQLCompatibleType(node, allowOptional)) {
return nullptr;
}
auto arg = newRowArg;
return ctx.ChangeChild(*node, 0, std::move(arg));
}
-TExprNode::TPtr ConvertQLComparison(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg, const TExprNode::TPtr& newRowArg, TExprContext& ctx) {
+TExprNode::TPtr ConvertQLComparison(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg, const TExprNode::TPtr& newRowArg, TExprContext& ctx, bool allowOptional = false) {
YQL_ENSURE(node->ChildrenSize() == 2);
TExprNode::TPtr childLeft;
TExprNode::TPtr childRight;
- if (childLeft = ConvertQLMember(node->ChildPtr(0), rowArg, newRowArg, ctx)) {
- childRight = CheckQLConst(node->ChildPtr(1), rowArg);
+ if (childLeft = ConvertQLMember(node->ChildPtr(0), rowArg, newRowArg, ctx, allowOptional)) {
+ childRight = CheckQLConst(node->ChildPtr(1), rowArg, allowOptional);
}
- else if (childRight = ConvertQLMember(node->ChildPtr(1), rowArg, newRowArg, ctx)) {
- childLeft = CheckQLConst(node->ChildPtr(0), rowArg);
+ else if (childRight = ConvertQLMember(node->ChildPtr(1), rowArg, newRowArg, ctx, allowOptional)) {
+ childLeft = CheckQLConst(node->ChildPtr(0), rowArg, allowOptional);
}
if (!childLeft || !childRight) {
return nullptr;
@@ -79,7 +79,7 @@ TExprNode::TPtr ConvertQLComparison(const TExprNode::TPtr& node, const TExprNode
}
TExprNode::TPtr ConvertQLSubTree(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg, const TExprNode::TPtr& newRowArg, TExprContext& ctx) {
- if (node->IsCallable({"And", "Or", "Not"})) {
+ if (node->IsCallable({"And", "Or", "Not", "Exists"})) {
TExprNode::TListType convertedChildren;
for (const auto& child : node->ChildrenList()) {
const auto converted = ConvertQLSubTree(child, rowArg, newRowArg, ctx);
@@ -90,9 +90,33 @@ TExprNode::TPtr ConvertQLSubTree(const TExprNode::TPtr& node, const TExprNode::T
};
return ctx.ChangeChildren(*node, std::move(convertedChildren));
}
+ if (node->IsCallable("Coalesce")) {
+ if (node->ChildrenSize() != 2) {
+ return nullptr;
+ }
+ const auto comparison = node->Child(0);
+ if (!comparison->IsCallable({"<", "<=", ">", ">=", "==", "!="})) {
+ return nullptr;
+ }
+ const auto nullValue = node->Child(1);
+ if (!nullValue->IsCallable("Bool")) {
+ return nullptr;
+ }
+ const auto convertedComparison = ConvertQLComparison(comparison, rowArg, newRowArg, ctx, /*allowOptional*/ true);
+ if (!convertedComparison) {
+ return nullptr;
+ }
+ return ctx.ChangeChildren(*node, {convertedComparison, nullValue});
+ }
if (node->IsCallable({"<", "<=", ">", ">=", "==", "!="})) {
return ConvertQLComparison(node, rowArg, newRowArg, ctx);
}
+ if (node->IsCallable("Bool")) {
+ return node;
+ }
+ if (node->IsCallable("Member")) {
+ return ConvertQLMember(node, rowArg, newRowArg, ctx);
+ }
return nullptr;
}
diff --git a/yt/yql/providers/yt/provider/yql_yt_helpers.cpp b/yt/yql/providers/yt/provider/yql_yt_helpers.cpp
index 4ee06a21708..56f61e8a97d 100644
--- a/yt/yql/providers/yt/provider/yql_yt_helpers.cpp
+++ b/yt/yql/providers/yt/provider/yql_yt_helpers.cpp
@@ -714,7 +714,7 @@ void GetNodesToCalculateFromQLFilter(const TExprNode& qlFilter, TExprNode::TList
YQL_ENSURE(qlFilter.IsCallable("YtQLFilter"));
const auto lambdaBody = qlFilter.Child(1)->Child(1);
VisitExpr(lambdaBody, [&needCalc, &uniqNodes](const TExprNode::TPtr& node) {
- if (node->IsCallable({"And", "Or", "Not", "<", "<=", ">", ">=", "==", "!="})) {
+ if (node->IsCallable({"And", "Or", "Not", "Coalesce", "Exists", "<", "<=", ">", ">=", "==", "!="})) {
return true;
}
if (node->IsCallable("Member")) {
diff --git a/yt/yql/tests/sql/suites/ql_filter/integer_optional.cfg b/yt/yql/tests/sql/suites/ql_filter/integer_optional.cfg
new file mode 100644
index 00000000000..0978582140a
--- /dev/null
+++ b/yt/yql/tests/sql/suites/ql_filter/integer_optional.cfg
@@ -0,0 +1 @@
+in Input integer_optional.txt
diff --git a/yt/yql/tests/sql/suites/ql_filter/integer_optional.sql b/yt/yql/tests/sql/suites/ql_filter/integer_optional.sql
new file mode 100644
index 00000000000..6faf68b3156
--- /dev/null
+++ b/yt/yql/tests/sql/suites/ql_filter/integer_optional.sql
@@ -0,0 +1,10 @@
+pragma yt.UseQLFilter;
+
+select *
+from plato.Input
+where
+ not (a <= 5)
+ and
+ b < 0
+ and
+ c > Just(5); \ No newline at end of file
diff --git a/yt/yql/tests/sql/suites/ql_filter/integer_optional.txt b/yt/yql/tests/sql/suites/ql_filter/integer_optional.txt
new file mode 100644
index 00000000000..36a0341a3e9
--- /dev/null
+++ b/yt/yql/tests/sql/suites/ql_filter/integer_optional.txt
@@ -0,0 +1,10 @@
+{"a"=1;"b"=1;"c"=1u;"d"=1;"e"=1u};
+{"a"=#;"b"=-2;"c"=2u;"d"=10;"e"=10u};
+{"a"=-3;"b"=3;"c"=3u;"d"=-100;"e"=100u};
+{"a"=4;"b"=#;"c"=4u;"d"=1000;"e"=1000u};
+{"a"=5;"b"=-5;"c"=#;"d"=10000;"e"=10000u};
+{"a"=-6;"b"=6;"c"=6u;"d"=-100000;"e"=100000u};
+{"a"=#;"b"=7;"c"=7u;"d"=1000000;"e"=1000000u};
+{"a"=8;"b"=-8;"c"=8u;"d"=#;"e"=10000000u};
+{"a"=-9;"b"=9;"c"=#;"d"=-100000000;"e"=100000000u};
+{"a"=10;"b"=#;"c"=10u;"d"=1000000000;"e"=#};
diff --git a/yt/yql/tests/sql/suites/ql_filter/integer_optional.txt.attr b/yt/yql/tests/sql/suites/ql_filter/integer_optional.txt.attr
new file mode 100644
index 00000000000..b4567c7b245
--- /dev/null
+++ b/yt/yql/tests/sql/suites/ql_filter/integer_optional.txt.attr
@@ -0,0 +1,11 @@
+{
+ "_yql_row_spec"={
+ "Type"=["StructType";[
+ ["a";["OptionalType";["DataType";"Int32"]]];
+ ["b";["OptionalType";["DataType";"Int32"]]];
+ ["c";["OptionalType";["DataType";"Uint32"]]];
+ ["d";["OptionalType";["DataType";"Int64"]]];
+ ["e";["OptionalType";["DataType";"Uint64"]]]
+ ]]
+ }
+}
diff --git a/yt/yql/tests/sql/suites/ql_filter/integer_optional_null.cfg b/yt/yql/tests/sql/suites/ql_filter/integer_optional_null.cfg
new file mode 100644
index 00000000000..0978582140a
--- /dev/null
+++ b/yt/yql/tests/sql/suites/ql_filter/integer_optional_null.cfg
@@ -0,0 +1 @@
+in Input integer_optional.txt
diff --git a/yt/yql/tests/sql/suites/ql_filter/integer_optional_null.sql b/yt/yql/tests/sql/suites/ql_filter/integer_optional_null.sql
new file mode 100644
index 00000000000..4ad8a40b212
--- /dev/null
+++ b/yt/yql/tests/sql/suites/ql_filter/integer_optional_null.sql
@@ -0,0 +1,11 @@
+pragma yt.UseQLFilter;
+
+select *
+from plato.Input
+where
+ (
+ a > null
+ or b > nothing(Int32?)
+ or c is null
+ )
+ and (c > 0) is null; \ No newline at end of file