diff options
author | aidarsamer <[email protected]> | 2023-04-12 11:33:47 +0300 |
---|---|---|
committer | aidarsamer <[email protected]> | 2023-04-12 11:33:47 +0300 |
commit | aa0f4daea45136f6cf6a92e1398a20f090592c31 (patch) | |
tree | ceff9fa392700ccf19b81770000e517219ead046 | |
parent | 0e71846f867fa1d8474388f5e93235b90e2067a6 (diff) |
Temporarily disable binary String data type for LIKE pushdown to Column Shards.
-rw-r--r-- | ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.cpp | 39 | ||||
-rw-r--r-- | ydb/core/kqp/ut/olap/kqp_olap_ut.cpp | 23 | ||||
-rw-r--r-- | ydb/core/testlib/cs_helper.cpp | 10 | ||||
-rw-r--r-- | ydb/core/testlib/cs_helper.h | 1 |
4 files changed, 64 insertions, 9 deletions
diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.cpp index 2719be84242..b9312668a0a 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.cpp @@ -13,6 +13,33 @@ using namespace NYql::NNodes; namespace { +bool ColumnHasBinaryStringType(const TExprBase& expr) { + if (!expr.Maybe<TCoMember>()) { + return false; + } + auto typeAnn = expr.Ptr()->GetTypeAnn(); + auto itemType = GetSeqItemType(typeAnn); + if (!itemType) { + itemType = typeAnn; + } + if (itemType->GetKind() != ETypeAnnotationKind::Data) { + return false; + } + auto dataTypeInfo = NUdf::GetDataTypeInfo(itemType->Cast<TDataExprType>()->GetSlot()); + return (std::string(dataTypeInfo.Name.data()) == "String"); +} + +bool IsLikeOperator(const TCoCompare& predicate) { + if (predicate.Maybe<TCoCmpStringContains>()) { + return true; + } else if (predicate.Maybe<TCoCmpStartsWith>()) { + return true; + } else if (predicate.Maybe<TCoCmpEndsWith>()) { + return true; + } + return false; +} + bool IsSupportedPredicate(const TCoCompare& predicate) { if (predicate.Maybe<TCoCmpEqual>()) { return true; @@ -30,13 +57,7 @@ bool IsSupportedPredicate(const TCoCompare& predicate) { return true; } else if (NKikimr::NSsa::RuntimeVersion >= 2U) { // We introduced LIKE pushdown in v2 of SSA program - if (predicate.Maybe<TCoCmpStringContains>()) { - return true; - } else if (predicate.Maybe<TCoCmpStartsWith>()) { - return true; - } else if (predicate.Maybe<TCoCmpEndsWith>()) { - return true; - } + return IsLikeOperator(predicate); } return false; @@ -273,6 +294,10 @@ bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExpr if (!IsComparableTypes(leftList[i], rightList[i], equality, inputType)) { return false; } + if (IsLikeOperator(compare) && (ColumnHasBinaryStringType(leftList[i]) || ColumnHasBinaryStringType(rightList[i]))) { + // Currently Column Shard doesn't have LIKE kernel for binary strings + return false; + } } return true; diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp index b3d7b6be54c..683fec54d88 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp @@ -1406,6 +1406,29 @@ Y_UNIT_TEST_SUITE(KqpOlap) { } #endif + Y_UNIT_TEST(PredicatePushdown_LikeNotPushedDownForStringType) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + + TStreamExecScanQuerySettings scanSettings; + scanSettings.Explain(true); + + TTableWithNullsHelper(kikimr).CreateTableWithNulls(); + WriteTestDataForTableWithNulls(kikimr, "/Root/tableWithNulls"); + EnableDebugLogging(kikimr); + + auto tableClient = kikimr.GetTableClient(); + auto query = R"(SELECT id, binary_str FROM `/Root/tableWithNulls` WHERE binary_str LIKE "5%")"; + auto it = tableClient.StreamExecuteScanQuery(query, scanSettings).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + + auto result = CollectStreamResult(it); + auto ast = result.QueryStats->Getquery_ast(); + UNIT_ASSERT_C(ast.find("KqpOlapFilter") == std::string::npos, + TStringBuilder() << "Predicate pushed down. Query: " << query); + } + Y_UNIT_TEST(PredicatePushdown_MixStrictAndNotStrict) { auto settings = TKikimrSettings() .SetWithSampleTables(false); diff --git a/ydb/core/testlib/cs_helper.cpp b/ydb/core/testlib/cs_helper.cpp index 0a7374d8d7c..990f6b7aede 100644 --- a/ydb/core/testlib/cs_helper.cpp +++ b/ydb/core/testlib/cs_helper.cpp @@ -356,7 +356,8 @@ std::shared_ptr<arrow::Schema> TTableWithNullsHelper::GetArrowSchema() { std::vector<std::shared_ptr<arrow::Field>>{ arrow::field("id", arrow::int32()), arrow::field("resource_id", arrow::utf8()), - arrow::field("level", arrow::int32()) + arrow::field("level", arrow::int32()), + arrow::field("binary_str", arrow::binary()) }); } @@ -371,28 +372,33 @@ std::shared_ptr<arrow::RecordBatch> TTableWithNullsHelper::TestArrowBatch(ui64, arrow::Int32Builder b1; arrow::StringBuilder b2; arrow::Int32Builder b3; + arrow::StringBuilder b4; for (size_t i = 1; i <= rowCount / 2; ++i) { Y_VERIFY(b1.Append(i).ok()); Y_VERIFY(b2.AppendNull().ok()); Y_VERIFY(b3.Append(i).ok()); + Y_VERIFY(b4.AppendNull().ok()); } for (size_t i = rowCount / 2 + 1; i <= rowCount; ++i) { Y_VERIFY(b1.Append(i).ok()); Y_VERIFY(b2.Append(std::to_string(i)).ok()); Y_VERIFY(b3.AppendNull().ok()); + Y_VERIFY(b4.Append(std::to_string(i)).ok()); } std::shared_ptr<arrow::Int32Array> a1; std::shared_ptr<arrow::StringArray> a2; std::shared_ptr<arrow::Int32Array> a3; + std::shared_ptr<arrow::StringArray> a4; Y_VERIFY(b1.Finish(&a1).ok()); Y_VERIFY(b2.Finish(&a2).ok()); Y_VERIFY(b3.Finish(&a3).ok()); + Y_VERIFY(b4.Finish(&a4).ok()); - return arrow::RecordBatch::Make(schema, rowCount, { a1, a2, a3 }); + return arrow::RecordBatch::Make(schema, rowCount, { a1, a2, a3, a4 }); } } diff --git a/ydb/core/testlib/cs_helper.h b/ydb/core/testlib/cs_helper.h index b171f660519..cc76cb33976 100644 --- a/ydb/core/testlib/cs_helper.h +++ b/ydb/core/testlib/cs_helper.h @@ -182,6 +182,7 @@ public: Columns { Name: "id" Type: "Int32" NotNull: true } Columns { Name: "resource_id" Type: "Utf8" } Columns { Name: "level" Type: "Int32" } + Columns { Name: "binary_str" Type: "String" } KeyColumnNames: "id" )"; |