aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorchertus <azuikov@ydb.tech>2023-03-14 21:55:04 +0300
committerchertus <azuikov@ydb.tech>2023-03-14 21:55:04 +0300
commit12266429be663771b4fae57b9257645f26d9e94b (patch)
tree4cd22cc7b0137dbb96ea245acb54fb0728fbe645
parent524d24d24566e8d43e18525cb5c8ee18ea712356 (diff)
downloadydb-12266429be663771b4fae57b9257645f26d9e94b.tar.gz
case-insensitive LIKEs in SSA
-rw-r--r--ydb/core/formats/ut_program_step.cpp27
-rw-r--r--ydb/core/protos/ssa.proto3
-rw-r--r--ydb/core/tx/columnshard/columnshard_common.cpp30
-rw-r--r--ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp13
4 files changed, 63 insertions, 10 deletions
diff --git a/ydb/core/formats/ut_program_step.cpp b/ydb/core/formats/ut_program_step.cpp
index 1f2ae885214..30314eb642b 100644
--- a/ydb/core/formats/ut_program_step.cpp
+++ b/ydb/core/formats/ut_program_step.cpp
@@ -420,6 +420,33 @@ Y_UNIT_TEST_SUITE(ProgramStep) {
UNIT_ASSERT_VALUES_EQUAL(res[3], false);
}
+ Y_UNIT_TEST(StartsWithIgnoreCase) {
+ std::vector<bool> res = LikeTest({"Aa", "abAaba", "baA", ""}, EOperation::StartsWith, "aA", true);
+ UNIT_ASSERT_VALUES_EQUAL(res.size(), 4);
+ UNIT_ASSERT_VALUES_EQUAL(res[0], true);
+ UNIT_ASSERT_VALUES_EQUAL(res[1], false);
+ UNIT_ASSERT_VALUES_EQUAL(res[2], false);
+ UNIT_ASSERT_VALUES_EQUAL(res[3], false);
+ }
+
+ Y_UNIT_TEST(EndsWithIgnoreCase) {
+ std::vector<bool> res = LikeTest({"Aa", "abAaba", "baA", ""}, EOperation::EndsWith, "aA", true);
+ UNIT_ASSERT_VALUES_EQUAL(res.size(), 4);
+ UNIT_ASSERT_VALUES_EQUAL(res[0], true);
+ UNIT_ASSERT_VALUES_EQUAL(res[1], false);
+ UNIT_ASSERT_VALUES_EQUAL(res[2], true);
+ UNIT_ASSERT_VALUES_EQUAL(res[3], false);
+ }
+
+ Y_UNIT_TEST(MatchSubstringIgnoreCase) {
+ std::vector<bool> res = LikeTest({"Aa", "abAaba", "baA", ""}, EOperation::MatchSubstring, "aA", true);
+ UNIT_ASSERT_VALUES_EQUAL(res.size(), 4);
+ UNIT_ASSERT_VALUES_EQUAL(res[0], true);
+ UNIT_ASSERT_VALUES_EQUAL(res[1], true);
+ UNIT_ASSERT_VALUES_EQUAL(res[2], true);
+ UNIT_ASSERT_VALUES_EQUAL(res[3], false);
+ }
+
Y_UNIT_TEST(ScalarTest) {
auto schema = std::make_shared<arrow::Schema>(std::vector{
std::make_shared<arrow::Field>("x", arrow::int64()),
diff --git a/ydb/core/protos/ssa.proto b/ydb/core/protos/ssa.proto
index 9ba916f1be5..eaa8891c41d 100644
--- a/ydb/core/protos/ssa.proto
+++ b/ydb/core/protos/ssa.proto
@@ -77,6 +77,9 @@ message TProgram {
FUNC_STR_MATCH_LIKE = 32;
FUNC_STR_STARTS_WITH = 33;
FUNC_STR_ENDS_WITH = 34;
+ FUNC_STR_MATCH_IGNORE_CASE = 35;
+ FUNC_STR_STARTS_WITH_IGNORE_CASE = 36;
+ FUNC_STR_ENDS_WITH_IGNORE_CASE = 37;
}
message TFunction {
diff --git a/ydb/core/tx/columnshard/columnshard_common.cpp b/ydb/core/tx/columnshard/columnshard_common.cpp
index ff7ab88e9a5..b49a536e200 100644
--- a/ydb/core/tx/columnshard/columnshard_common.cpp
+++ b/ydb/core/tx/columnshard/columnshard_common.cpp
@@ -90,7 +90,7 @@ TAssign MakeFunction(const TContext& info, const std::string& name,
return castOpts;
};
- auto mkLikeOptions = [&]() {
+ auto mkLikeOptions = [&](bool ignoreCase) {
if (arguments.size() != 2 || !info.Constants.count(arguments[1])) {
return std::shared_ptr<arrow::compute::MatchSubstringOptions>();
}
@@ -100,7 +100,7 @@ TAssign MakeFunction(const TContext& info, const std::string& name,
}
arguments.resize(1);
auto& pattern = static_cast<arrow::BaseBinaryScalar&>(*patternScalar).value;
- return std::make_shared<arrow::compute::MatchSubstringOptions>(pattern->ToString()); // TODO: case-insensitive
+ return std::make_shared<arrow::compute::MatchSubstringOptions>(pattern->ToString(), ignoreCase);
};
switch (func.GetId()) {
@@ -121,25 +121,43 @@ TAssign MakeFunction(const TContext& info, const std::string& name,
case TId::FUNC_STR_LENGTH:
return TAssign(name, EOperation::BinaryLength, std::move(arguments));
case TId::FUNC_STR_MATCH: {
- if (auto opts = mkLikeOptions()) {
+ if (auto opts = mkLikeOptions(false)) {
return TAssign(name, EOperation::MatchSubstring, std::move(arguments), opts);
}
break;
}
case TId::FUNC_STR_MATCH_LIKE: {
- if (auto opts = mkLikeOptions()) {
+ if (auto opts = mkLikeOptions(false)) {
return TAssign(name, EOperation::MatchLike, std::move(arguments), opts);
}
break;
}
case TId::FUNC_STR_STARTS_WITH: {
- if (auto opts = mkLikeOptions()) {
+ if (auto opts = mkLikeOptions(false)) {
return TAssign(name, EOperation::StartsWith, std::move(arguments), opts);
}
break;
}
case TId::FUNC_STR_ENDS_WITH: {
- if (auto opts = mkLikeOptions()) {
+ if (auto opts = mkLikeOptions(false)) {
+ return TAssign(name, EOperation::EndsWith, std::move(arguments), opts);
+ }
+ break;
+ }
+ case TId::FUNC_STR_MATCH_IGNORE_CASE: {
+ if (auto opts = mkLikeOptions(true)) {
+ return TAssign(name, EOperation::MatchSubstring, std::move(arguments), opts);
+ }
+ break;
+ }
+ case TId::FUNC_STR_STARTS_WITH_IGNORE_CASE: {
+ if (auto opts = mkLikeOptions(true)) {
+ return TAssign(name, EOperation::StartsWith, std::move(arguments), opts);
+ }
+ break;
+ }
+ case TId::FUNC_STR_ENDS_WITH_IGNORE_CASE: {
+ if (auto opts = mkLikeOptions(true)) {
return TAssign(name, EOperation::EndsWith, std::move(arguments), opts);
}
break;
diff --git a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp
index bc226e43a4a..4cfae6e9d4b 100644
--- a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp
+++ b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp
@@ -1165,7 +1165,6 @@ static NKikimrSSA::TProgram MakeSelect(TAssignment::EFunction compareId = TAssig
}
// SELECT level, timestamp FROM t WHERE likeFunc(timestamp, pattern)
-// FUNC_STR_MATCH, FUNC_STR_STARTS_WITH, FUNC_STR_ENDS_WITH
static NKikimrSSA::TProgram MakeSelectLike(TAssignment::EFunction likeId, const TString& pattern) {
NKikimrSSA::TProgram ssa;
@@ -1467,8 +1466,11 @@ void TestReadWithProgramLike(const TestTableDescription& table = {}) {
TString pattern = "1";
std::vector<NKikimrSSA::TProgram> ssas = {
MakeSelectLike(TAssignment::FUNC_STR_MATCH, pattern),
+ MakeSelectLike(TAssignment::FUNC_STR_MATCH_IGNORE_CASE, pattern),
MakeSelectLike(TAssignment::FUNC_STR_STARTS_WITH, pattern),
- MakeSelectLike(TAssignment::FUNC_STR_ENDS_WITH, pattern)
+ MakeSelectLike(TAssignment::FUNC_STR_STARTS_WITH_IGNORE_CASE, pattern),
+ MakeSelectLike(TAssignment::FUNC_STR_ENDS_WITH, pattern),
+ MakeSelectLike(TAssignment::FUNC_STR_ENDS_WITH_IGNORE_CASE, pattern)
};
ui32 i = 0;
@@ -1509,12 +1511,15 @@ void TestReadWithProgramLike(const TestTableDescription& table = {}) {
switch (i) {
case 0:
+ case 1:
UNIT_ASSERT(CheckColumns(readData, meta, {"message"}, 19));
break;
- case 1:
+ case 2:
+ case 3:
UNIT_ASSERT(CheckColumns(readData, meta, {"message"}, 11));
break;
- case 2:
+ case 4:
+ case 5:
UNIT_ASSERT(CheckColumns(readData, meta, {"message"}, 10));
break;
default: