diff options
author | chertus <azuikov@ydb.tech> | 2023-03-14 21:55:04 +0300 |
---|---|---|
committer | chertus <azuikov@ydb.tech> | 2023-03-14 21:55:04 +0300 |
commit | 12266429be663771b4fae57b9257645f26d9e94b (patch) | |
tree | 4cd22cc7b0137dbb96ea245acb54fb0728fbe645 | |
parent | 524d24d24566e8d43e18525cb5c8ee18ea712356 (diff) | |
download | ydb-12266429be663771b4fae57b9257645f26d9e94b.tar.gz |
case-insensitive LIKEs in SSA
-rw-r--r-- | ydb/core/formats/ut_program_step.cpp | 27 | ||||
-rw-r--r-- | ydb/core/protos/ssa.proto | 3 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/columnshard_common.cpp | 30 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp | 13 |
4 files changed, 63 insertions, 10 deletions
diff --git a/ydb/core/formats/ut_program_step.cpp b/ydb/core/formats/ut_program_step.cpp index 1f2ae885214..30314eb642b 100644 --- a/ydb/core/formats/ut_program_step.cpp +++ b/ydb/core/formats/ut_program_step.cpp @@ -420,6 +420,33 @@ Y_UNIT_TEST_SUITE(ProgramStep) { UNIT_ASSERT_VALUES_EQUAL(res[3], false); } + Y_UNIT_TEST(StartsWithIgnoreCase) { + std::vector<bool> res = LikeTest({"Aa", "abAaba", "baA", ""}, EOperation::StartsWith, "aA", true); + UNIT_ASSERT_VALUES_EQUAL(res.size(), 4); + UNIT_ASSERT_VALUES_EQUAL(res[0], true); + UNIT_ASSERT_VALUES_EQUAL(res[1], false); + UNIT_ASSERT_VALUES_EQUAL(res[2], false); + UNIT_ASSERT_VALUES_EQUAL(res[3], false); + } + + Y_UNIT_TEST(EndsWithIgnoreCase) { + std::vector<bool> res = LikeTest({"Aa", "abAaba", "baA", ""}, EOperation::EndsWith, "aA", true); + UNIT_ASSERT_VALUES_EQUAL(res.size(), 4); + UNIT_ASSERT_VALUES_EQUAL(res[0], true); + UNIT_ASSERT_VALUES_EQUAL(res[1], false); + UNIT_ASSERT_VALUES_EQUAL(res[2], true); + UNIT_ASSERT_VALUES_EQUAL(res[3], false); + } + + Y_UNIT_TEST(MatchSubstringIgnoreCase) { + std::vector<bool> res = LikeTest({"Aa", "abAaba", "baA", ""}, EOperation::MatchSubstring, "aA", true); + UNIT_ASSERT_VALUES_EQUAL(res.size(), 4); + UNIT_ASSERT_VALUES_EQUAL(res[0], true); + UNIT_ASSERT_VALUES_EQUAL(res[1], true); + UNIT_ASSERT_VALUES_EQUAL(res[2], true); + UNIT_ASSERT_VALUES_EQUAL(res[3], false); + } + Y_UNIT_TEST(ScalarTest) { auto schema = std::make_shared<arrow::Schema>(std::vector{ std::make_shared<arrow::Field>("x", arrow::int64()), diff --git a/ydb/core/protos/ssa.proto b/ydb/core/protos/ssa.proto index 9ba916f1be5..eaa8891c41d 100644 --- a/ydb/core/protos/ssa.proto +++ b/ydb/core/protos/ssa.proto @@ -77,6 +77,9 @@ message TProgram { FUNC_STR_MATCH_LIKE = 32; FUNC_STR_STARTS_WITH = 33; FUNC_STR_ENDS_WITH = 34; + FUNC_STR_MATCH_IGNORE_CASE = 35; + FUNC_STR_STARTS_WITH_IGNORE_CASE = 36; + FUNC_STR_ENDS_WITH_IGNORE_CASE = 37; } message TFunction { diff --git a/ydb/core/tx/columnshard/columnshard_common.cpp b/ydb/core/tx/columnshard/columnshard_common.cpp index ff7ab88e9a5..b49a536e200 100644 --- a/ydb/core/tx/columnshard/columnshard_common.cpp +++ b/ydb/core/tx/columnshard/columnshard_common.cpp @@ -90,7 +90,7 @@ TAssign MakeFunction(const TContext& info, const std::string& name, return castOpts; }; - auto mkLikeOptions = [&]() { + auto mkLikeOptions = [&](bool ignoreCase) { if (arguments.size() != 2 || !info.Constants.count(arguments[1])) { return std::shared_ptr<arrow::compute::MatchSubstringOptions>(); } @@ -100,7 +100,7 @@ TAssign MakeFunction(const TContext& info, const std::string& name, } arguments.resize(1); auto& pattern = static_cast<arrow::BaseBinaryScalar&>(*patternScalar).value; - return std::make_shared<arrow::compute::MatchSubstringOptions>(pattern->ToString()); // TODO: case-insensitive + return std::make_shared<arrow::compute::MatchSubstringOptions>(pattern->ToString(), ignoreCase); }; switch (func.GetId()) { @@ -121,25 +121,43 @@ TAssign MakeFunction(const TContext& info, const std::string& name, case TId::FUNC_STR_LENGTH: return TAssign(name, EOperation::BinaryLength, std::move(arguments)); case TId::FUNC_STR_MATCH: { - if (auto opts = mkLikeOptions()) { + if (auto opts = mkLikeOptions(false)) { return TAssign(name, EOperation::MatchSubstring, std::move(arguments), opts); } break; } case TId::FUNC_STR_MATCH_LIKE: { - if (auto opts = mkLikeOptions()) { + if (auto opts = mkLikeOptions(false)) { return TAssign(name, EOperation::MatchLike, std::move(arguments), opts); } break; } case TId::FUNC_STR_STARTS_WITH: { - if (auto opts = mkLikeOptions()) { + if (auto opts = mkLikeOptions(false)) { return TAssign(name, EOperation::StartsWith, std::move(arguments), opts); } break; } case TId::FUNC_STR_ENDS_WITH: { - if (auto opts = mkLikeOptions()) { + if (auto opts = mkLikeOptions(false)) { + return TAssign(name, EOperation::EndsWith, std::move(arguments), opts); + } + break; + } + case TId::FUNC_STR_MATCH_IGNORE_CASE: { + if (auto opts = mkLikeOptions(true)) { + return TAssign(name, EOperation::MatchSubstring, std::move(arguments), opts); + } + break; + } + case TId::FUNC_STR_STARTS_WITH_IGNORE_CASE: { + if (auto opts = mkLikeOptions(true)) { + return TAssign(name, EOperation::StartsWith, std::move(arguments), opts); + } + break; + } + case TId::FUNC_STR_ENDS_WITH_IGNORE_CASE: { + if (auto opts = mkLikeOptions(true)) { return TAssign(name, EOperation::EndsWith, std::move(arguments), opts); } break; diff --git a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp index bc226e43a4a..4cfae6e9d4b 100644 --- a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp +++ b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp @@ -1165,7 +1165,6 @@ static NKikimrSSA::TProgram MakeSelect(TAssignment::EFunction compareId = TAssig } // SELECT level, timestamp FROM t WHERE likeFunc(timestamp, pattern) -// FUNC_STR_MATCH, FUNC_STR_STARTS_WITH, FUNC_STR_ENDS_WITH static NKikimrSSA::TProgram MakeSelectLike(TAssignment::EFunction likeId, const TString& pattern) { NKikimrSSA::TProgram ssa; @@ -1467,8 +1466,11 @@ void TestReadWithProgramLike(const TestTableDescription& table = {}) { TString pattern = "1"; std::vector<NKikimrSSA::TProgram> ssas = { MakeSelectLike(TAssignment::FUNC_STR_MATCH, pattern), + MakeSelectLike(TAssignment::FUNC_STR_MATCH_IGNORE_CASE, pattern), MakeSelectLike(TAssignment::FUNC_STR_STARTS_WITH, pattern), - MakeSelectLike(TAssignment::FUNC_STR_ENDS_WITH, pattern) + MakeSelectLike(TAssignment::FUNC_STR_STARTS_WITH_IGNORE_CASE, pattern), + MakeSelectLike(TAssignment::FUNC_STR_ENDS_WITH, pattern), + MakeSelectLike(TAssignment::FUNC_STR_ENDS_WITH_IGNORE_CASE, pattern) }; ui32 i = 0; @@ -1509,12 +1511,15 @@ void TestReadWithProgramLike(const TestTableDescription& table = {}) { switch (i) { case 0: + case 1: UNIT_ASSERT(CheckColumns(readData, meta, {"message"}, 19)); break; - case 1: + case 2: + case 3: UNIT_ASSERT(CheckColumns(readData, meta, {"message"}, 11)); break; - case 2: + case 4: + case 5: UNIT_ASSERT(CheckColumns(readData, meta, {"message"}, 10)); break; default: |