diff options
author | zverevgeny <[email protected]> | 2025-04-25 13:47:32 +0300 |
---|---|---|
committer | zverevgeny <[email protected]> | 2025-04-25 14:34:55 +0300 |
commit | cecedbe2b29df37e09409fc2d15fb8211269d8b1 (patch) | |
tree | 5a03c7812ce3b54e22244e8a2740930f03bdf232 /yql/essentials/sql/v1 | |
parent | eadbc0700cb45e9d5706f48f9a233c1b6016433d (diff) |
Primitives for case insensitive simple pattern match
commit_hash:5f4bdb090c2f60459073e3e95ccd39ec58b95232
Diffstat (limited to 'yql/essentials/sql/v1')
-rw-r--r-- | yql/essentials/sql/v1/context.cpp | 1 | ||||
-rw-r--r-- | yql/essentials/sql/v1/context.h | 1 | ||||
-rw-r--r-- | yql/essentials/sql/v1/node.cpp | 5 | ||||
-rw-r--r-- | yql/essentials/sql/v1/sql_expression.cpp | 20 | ||||
-rw-r--r-- | yql/essentials/sql/v1/sql_query.cpp | 6 |
5 files changed, 25 insertions, 8 deletions
diff --git a/yql/essentials/sql/v1/context.cpp b/yql/essentials/sql/v1/context.cpp index ae313a171e1..9decaefd4e9 100644 --- a/yql/essentials/sql/v1/context.cpp +++ b/yql/essentials/sql/v1/context.cpp @@ -71,6 +71,7 @@ THashMap<TStringBuf, TPragmaField> CTX_PRAGMA_FIELDS = { {"DistinctOverKeys", &TContext::DistinctOverKeys}, {"GroupByExprAfterWhere", &TContext::GroupByExprAfterWhere}, {"FailOnGroupByExprOverride", &TContext::FailOnGroupByExprOverride}, + {"OptimizeSimpleILIKE", &TContext::OptimizeSimpleIlike} }; typedef TMaybe<bool> TContext::*TPragmaMaybeField; diff --git a/yql/essentials/sql/v1/context.h b/yql/essentials/sql/v1/context.h index 14314c0a314..e45a0495a2c 100644 --- a/yql/essentials/sql/v1/context.h +++ b/yql/essentials/sql/v1/context.h @@ -376,6 +376,7 @@ namespace NSQLTranslationV1 { bool GroupByExprAfterWhere = false; bool FailOnGroupByExprOverride = false; bool EmitUnionMerge = false; + bool OptimizeSimpleIlike = false; TVector<size_t> ForAllStatementsParts; TMaybe<TString> Engine; diff --git a/yql/essentials/sql/v1/node.cpp b/yql/essentials/sql/v1/node.cpp index e5c7d353795..fe5d4dff7b0 100644 --- a/yql/essentials/sql/v1/node.cpp +++ b/yql/essentials/sql/v1/node.cpp @@ -3179,7 +3179,10 @@ TNodePtr BuildBinaryOp(TContext& ctx, TPosition pos, const TString& opName, TNod return nullptr; } - static const THashSet<TStringBuf> nullSafeOps = {"IsDistinctFrom", "IsNotDistinctFrom"}; + static const THashSet<TStringBuf> nullSafeOps = { + "IsDistinctFrom", "IsNotDistinctFrom", + "EqualsIgnoreCase", "StartsWithIgnoreCase", "EndsWithIgnoreCase", "StringContainsIgnoreCase" + }; if (!nullSafeOps.contains(opName)) { const bool bothArgNull = a->IsNull() && b->IsNull(); const bool oneArgNull = a->IsNull() || b->IsNull(); diff --git a/yql/essentials/sql/v1/sql_expression.cpp b/yql/essentials/sql/v1/sql_expression.cpp index 3eb1904cc59..4b7e31fdaa3 100644 --- a/yql/essentials/sql/v1/sql_expression.cpp +++ b/yql/essentials/sql/v1/sql_expression.cpp @@ -1797,19 +1797,25 @@ TNodePtr TSqlExpression::SubExpr(const TRule_xor_subexpr& node, const TTrailingQ return nullptr; } - if (opName == "like" || mayIgnoreCase) { + if ((opName == "like") || mayIgnoreCase || Ctx.OptimizeSimpleIlike) { // TODO: expand LIKE in optimizers - we can analyze argument types there + const bool useIgnoreCaseOp = (opName == "ilike") && !mayIgnoreCase; + const auto& equalOp = useIgnoreCaseOp ? "EqualsIgnoreCase" : "=="; + const auto& startsWithOp = useIgnoreCaseOp ? "StartsWithIgnoreCase" : "StartsWith"; + const auto& endsWithOp = useIgnoreCaseOp ? "EndsWithIgnoreCase" : "EndsWith"; + const auto& containsOp = useIgnoreCaseOp ? "StringContainsIgnoreCase" : "StringContains"; + YQL_ENSURE(!components.empty()); const auto& first = components.front(); if (components.size() == 1 && first.IsSimple) { // no '%'s and '_'s in pattern YQL_ENSURE(first.Prefix == first.Suffix); - isMatch = BuildBinaryOp(Ctx, pos, "==", res, BuildLiteralRawString(pos, first.Suffix, isUtf8)); + isMatch = BuildBinaryOp(Ctx, pos, equalOp, res, BuildLiteralRawString(pos, first.Suffix, isUtf8)); } else if (!first.Prefix.empty()) { const TString& prefix = first.Prefix; TNodePtr prefixMatch; if (Ctx.EmitStartsWith) { - prefixMatch = BuildBinaryOp(Ctx, pos, "StartsWith", res, BuildLiteralRawString(pos, prefix, isUtf8)); + prefixMatch = BuildBinaryOp(Ctx, pos, startsWithOp, res, BuildLiteralRawString(pos, prefix, isUtf8)); } else { prefixMatch = BuildBinaryOp(Ctx, pos, ">=", res, BuildLiteralRawString(pos, prefix, isUtf8)); auto upperBound = isUtf8 ? NextValidUtf8(prefix) : NextLexicographicString(prefix); @@ -1834,7 +1840,7 @@ TNodePtr TSqlExpression::SubExpr(const TRule_xor_subexpr& node, const TTrailingQ TNodePtr sizePred = BuildBinaryOp(Ctx, pos, ">=", TNodePtr(new TCallNodeImpl(pos, "Size", { res })), TNodePtr(new TLiteralNumberNode<ui32>(pos, "Uint32", ToString(prefix.size() + suffix.size())))); - TNodePtr suffixMatch = BuildBinaryOp(Ctx, pos, "EndsWith", res, BuildLiteralRawString(pos, suffix, isUtf8)); + TNodePtr suffixMatch = BuildBinaryOp(Ctx, pos, endsWithOp, res, BuildLiteralRawString(pos, suffix, isUtf8)); isMatch = new TCallNodeImpl(pos, "And", { sizePred, prefixMatch, @@ -1849,14 +1855,14 @@ TNodePtr TSqlExpression::SubExpr(const TRule_xor_subexpr& node, const TTrailingQ if (components.size() == 3 && components.back().Prefix.empty()) { // '%foo%' YQL_ENSURE(!components[1].Prefix.empty()); - isMatch = BuildBinaryOp(Ctx, pos, "StringContains", res, BuildLiteralRawString(pos, components[1].Prefix, isUtf8)); + isMatch = BuildBinaryOp(Ctx, pos, containsOp, res, BuildLiteralRawString(pos, components[1].Prefix, isUtf8)); } else if (components.size() == 2) { // '%foo' - isMatch = BuildBinaryOp(Ctx, pos, "EndsWith", res, BuildLiteralRawString(pos, components[1].Prefix, isUtf8)); + isMatch = BuildBinaryOp(Ctx, pos, endsWithOp, res, BuildLiteralRawString(pos, components[1].Prefix, isUtf8)); } } else if (Ctx.AnsiLike && !components.back().Suffix.empty()) { const TString& suffix = components.back().Suffix; - TNodePtr suffixMatch = BuildBinaryOp(Ctx, pos, "EndsWith", res, BuildLiteralRawString(pos, suffix, isUtf8)); + TNodePtr suffixMatch = BuildBinaryOp(Ctx, pos, endsWithOp, res, BuildLiteralRawString(pos, suffix, isUtf8)); isMatch = BuildBinaryOp(Ctx, pos, "And", suffixMatch, isMatch); } // TODO: more StringContains/StartsWith/EndsWith cases? diff --git a/yql/essentials/sql/v1/sql_query.cpp b/yql/essentials/sql/v1/sql_query.cpp index c9ea302c8c7..a750d941917 100644 --- a/yql/essentials/sql/v1/sql_query.cpp +++ b/yql/essentials/sql/v1/sql_query.cpp @@ -3434,6 +3434,12 @@ TNodePtr TSqlQuery::PragmaStatement(const TRule_pragma_stmt& stmt, bool& success } Ctx.Engine = *literal; + } else if (normalizedPragma == "optimizesimpleilike") { + Ctx.OptimizeSimpleIlike = true; + Ctx.IncrementMonCounter("sql_pragma", "OptimizeSimpleILIKE"); + } else if (normalizedPragma == "disableoptimizesimpleilike") { + Ctx.OptimizeSimpleIlike = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableOptimizeSimpleILIKE"); } else { Error() << "Unknown pragma: " << pragma; Ctx.IncrementMonCounter("sql_errors", "UnknownPragma"); |