summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoraidarsamer <[email protected]>2023-04-12 11:33:47 +0300
committeraidarsamer <[email protected]>2023-04-12 11:33:47 +0300
commitaa0f4daea45136f6cf6a92e1398a20f090592c31 (patch)
treeceff9fa392700ccf19b81770000e517219ead046
parent0e71846f867fa1d8474388f5e93235b90e2067a6 (diff)
Temporarily disable binary String data type for LIKE pushdown to Column Shards.
-rw-r--r--ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.cpp39
-rw-r--r--ydb/core/kqp/ut/olap/kqp_olap_ut.cpp23
-rw-r--r--ydb/core/testlib/cs_helper.cpp10
-rw-r--r--ydb/core/testlib/cs_helper.h1
4 files changed, 64 insertions, 9 deletions
diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.cpp
index 2719be84242..b9312668a0a 100644
--- a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.cpp
+++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.cpp
@@ -13,6 +13,33 @@ using namespace NYql::NNodes;
namespace {
+bool ColumnHasBinaryStringType(const TExprBase& expr) {
+ if (!expr.Maybe<TCoMember>()) {
+ return false;
+ }
+ auto typeAnn = expr.Ptr()->GetTypeAnn();
+ auto itemType = GetSeqItemType(typeAnn);
+ if (!itemType) {
+ itemType = typeAnn;
+ }
+ if (itemType->GetKind() != ETypeAnnotationKind::Data) {
+ return false;
+ }
+ auto dataTypeInfo = NUdf::GetDataTypeInfo(itemType->Cast<TDataExprType>()->GetSlot());
+ return (std::string(dataTypeInfo.Name.data()) == "String");
+}
+
+bool IsLikeOperator(const TCoCompare& predicate) {
+ if (predicate.Maybe<TCoCmpStringContains>()) {
+ return true;
+ } else if (predicate.Maybe<TCoCmpStartsWith>()) {
+ return true;
+ } else if (predicate.Maybe<TCoCmpEndsWith>()) {
+ return true;
+ }
+ return false;
+}
+
bool IsSupportedPredicate(const TCoCompare& predicate) {
if (predicate.Maybe<TCoCmpEqual>()) {
return true;
@@ -30,13 +57,7 @@ bool IsSupportedPredicate(const TCoCompare& predicate) {
return true;
} else if (NKikimr::NSsa::RuntimeVersion >= 2U) {
// We introduced LIKE pushdown in v2 of SSA program
- if (predicate.Maybe<TCoCmpStringContains>()) {
- return true;
- } else if (predicate.Maybe<TCoCmpStartsWith>()) {
- return true;
- } else if (predicate.Maybe<TCoCmpEndsWith>()) {
- return true;
- }
+ return IsLikeOperator(predicate);
}
return false;
@@ -273,6 +294,10 @@ bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExpr
if (!IsComparableTypes(leftList[i], rightList[i], equality, inputType)) {
return false;
}
+ if (IsLikeOperator(compare) && (ColumnHasBinaryStringType(leftList[i]) || ColumnHasBinaryStringType(rightList[i]))) {
+ // Currently Column Shard doesn't have LIKE kernel for binary strings
+ return false;
+ }
}
return true;
diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp
index b3d7b6be54c..683fec54d88 100644
--- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp
+++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp
@@ -1406,6 +1406,29 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
}
#endif
+ Y_UNIT_TEST(PredicatePushdown_LikeNotPushedDownForStringType) {
+ auto settings = TKikimrSettings()
+ .SetWithSampleTables(false);
+ TKikimrRunner kikimr(settings);
+
+ TStreamExecScanQuerySettings scanSettings;
+ scanSettings.Explain(true);
+
+ TTableWithNullsHelper(kikimr).CreateTableWithNulls();
+ WriteTestDataForTableWithNulls(kikimr, "/Root/tableWithNulls");
+ EnableDebugLogging(kikimr);
+
+ auto tableClient = kikimr.GetTableClient();
+ auto query = R"(SELECT id, binary_str FROM `/Root/tableWithNulls` WHERE binary_str LIKE "5%")";
+ auto it = tableClient.StreamExecuteScanQuery(query, scanSettings).GetValueSync();
+ UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString());
+
+ auto result = CollectStreamResult(it);
+ auto ast = result.QueryStats->Getquery_ast();
+ UNIT_ASSERT_C(ast.find("KqpOlapFilter") == std::string::npos,
+ TStringBuilder() << "Predicate pushed down. Query: " << query);
+ }
+
Y_UNIT_TEST(PredicatePushdown_MixStrictAndNotStrict) {
auto settings = TKikimrSettings()
.SetWithSampleTables(false);
diff --git a/ydb/core/testlib/cs_helper.cpp b/ydb/core/testlib/cs_helper.cpp
index 0a7374d8d7c..990f6b7aede 100644
--- a/ydb/core/testlib/cs_helper.cpp
+++ b/ydb/core/testlib/cs_helper.cpp
@@ -356,7 +356,8 @@ std::shared_ptr<arrow::Schema> TTableWithNullsHelper::GetArrowSchema() {
std::vector<std::shared_ptr<arrow::Field>>{
arrow::field("id", arrow::int32()),
arrow::field("resource_id", arrow::utf8()),
- arrow::field("level", arrow::int32())
+ arrow::field("level", arrow::int32()),
+ arrow::field("binary_str", arrow::binary())
});
}
@@ -371,28 +372,33 @@ std::shared_ptr<arrow::RecordBatch> TTableWithNullsHelper::TestArrowBatch(ui64,
arrow::Int32Builder b1;
arrow::StringBuilder b2;
arrow::Int32Builder b3;
+ arrow::StringBuilder b4;
for (size_t i = 1; i <= rowCount / 2; ++i) {
Y_VERIFY(b1.Append(i).ok());
Y_VERIFY(b2.AppendNull().ok());
Y_VERIFY(b3.Append(i).ok());
+ Y_VERIFY(b4.AppendNull().ok());
}
for (size_t i = rowCount / 2 + 1; i <= rowCount; ++i) {
Y_VERIFY(b1.Append(i).ok());
Y_VERIFY(b2.Append(std::to_string(i)).ok());
Y_VERIFY(b3.AppendNull().ok());
+ Y_VERIFY(b4.Append(std::to_string(i)).ok());
}
std::shared_ptr<arrow::Int32Array> a1;
std::shared_ptr<arrow::StringArray> a2;
std::shared_ptr<arrow::Int32Array> a3;
+ std::shared_ptr<arrow::StringArray> a4;
Y_VERIFY(b1.Finish(&a1).ok());
Y_VERIFY(b2.Finish(&a2).ok());
Y_VERIFY(b3.Finish(&a3).ok());
+ Y_VERIFY(b4.Finish(&a4).ok());
- return arrow::RecordBatch::Make(schema, rowCount, { a1, a2, a3 });
+ return arrow::RecordBatch::Make(schema, rowCount, { a1, a2, a3, a4 });
}
}
diff --git a/ydb/core/testlib/cs_helper.h b/ydb/core/testlib/cs_helper.h
index b171f660519..cc76cb33976 100644
--- a/ydb/core/testlib/cs_helper.h
+++ b/ydb/core/testlib/cs_helper.h
@@ -182,6 +182,7 @@ public:
Columns { Name: "id" Type: "Int32" NotNull: true }
Columns { Name: "resource_id" Type: "Utf8" }
Columns { Name: "level" Type: "Int32" }
+ Columns { Name: "binary_str" Type: "String" }
KeyColumnNames: "id"
)";