diff options
| author | ssmike <[email protected]> | 2023-11-17 20:23:29 +0300 |
|---|---|---|
| committer | ssmike <[email protected]> | 2023-11-17 21:05:54 +0300 |
| commit | 03f188111211d64677ea001d0f1ded8e392b9d27 (patch) | |
| tree | 3039c1ae954eefc4d5a99fcdefdf5005b6dda9ff | |
| parent | 86cd90095ae579c072aabe74272211905395b863 (diff) | |
Fix unsafe conversions
YQL-16933
| -rw-r--r-- | ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp | 109 | ||||
| -rw-r--r-- | ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp | 1 | ||||
| -rw-r--r-- | ydb/core/kqp/opt/logical/kqp_opt_log_sqlin.cpp | 137 | ||||
| -rw-r--r-- | ydb/core/kqp/ut/opt/kqp_extract_predicate_unpack_ut.cpp | 302 | ||||
| -rw-r--r-- | ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp | 120 |
5 files changed, 549 insertions, 120 deletions
diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp index 59141ae6667..85939d28575 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp @@ -378,7 +378,9 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext auto rightRead = rightReadMatch->Read.template Cast<ReadType>(); TMaybeNode<TCoAtomList> lookupColumns; - TMaybe<TKqlKeyInc> rightTableKeyPrefix; + size_t rightPrefixSize; + TMaybeNode<TExprBase> rightPrefixExpr; + if constexpr (std::is_same_v<ReadType, TKqlReadTableBase>) { Y_ENSURE(rightRead.template Maybe<TKqlReadTable>() || rightRead.template Maybe<TKqlReadTableIndex>()); const TKqlReadTableBase read = rightRead; @@ -393,7 +395,18 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext } lookupColumns = read.Columns(); - rightTableKeyPrefix = maybeRightTableKeyPrefix.Cast(); + + rightPrefixSize = maybeRightTableKeyPrefix.Cast().ArgCount(); + TVector<TExprBase> columns; + for (auto& column : maybeRightTableKeyPrefix.Cast().Args()) { + columns.push_back(TExprBase(column)); + } + + rightPrefixExpr = Build<TCoAsList>(ctx, join.Pos()) + .Add<TExprList>() + .Add(columns) + .Build() + .Done(); if (auto indexRead = rightRead.template Maybe<TKqlReadTableIndex>()) { indexName = indexRead.Cast().Index().StringValue(); @@ -403,6 +416,11 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext } } else if constexpr (std::is_same_v<ReadType, TKqlReadTableRangesBase>){ auto read = rightReadMatch->Read.template Cast<TKqlReadTableRangesBase>(); + if (!read.Table().SysView().Value().empty()) { + // Can't lookup in system views + return {}; + } + lookupColumns = read.Columns(); if (auto indexRead = read.template Maybe<TKqlReadTableIndexRanges>()) { @@ -414,27 +432,32 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext lookupTable = read.Table().Path().StringValue(); } - const auto& rightTableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, lookupTable); - if (TCoVoid::Match(read.Ranges().Raw())) { - rightTableKeyPrefix = Build<TKqlKeyInc>(ctx, read.Ranges().Pos()).Done(); + rightPrefixSize = 0; + rightPrefixExpr = Build<TCoJust>(ctx, join.Pos()) + .Input<TCoAsList>().Build() + .Done(); } else { auto prompt = TKqpReadTableExplainPrompt::Parse(read); + + if (prompt.PointPrefixLen != prompt.UsedKeyColumns.size()) { + return {}; + } + if (prompt.ExpectedMaxRanges != TMaybe<ui64>(1)) { return {}; } + rightPrefixSize = prompt.PointPrefixLen; - TMaybeNode<TExprBase> row; + const auto& rightTableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, lookupTable); + + TMaybeNode<TExprBase> rowsExpr; if (read.template Maybe<TKqlReadTableRanges>()) { - row = read.template Cast<TKqlReadTableRanges>().PrefixPointsExpr(); - } - if (rightRead.template Maybe<TKqlReadTableIndexRanges>()) { - row = read.template Cast<TKqlReadTableIndexRanges>().PrefixPointsExpr(); + rowsExpr = read.template Cast<TKqlReadTableRanges>().PrefixPointsExpr(); } - if (!row.IsValid()) { - return {}; + if (read.template Maybe<TKqlReadTableIndexRanges>()) { + rowsExpr = read.template Cast<TKqlReadTableIndexRanges>().PrefixPointsExpr(); } - row = Build<TCoHead>(ctx, read.Ranges().Pos()).Input(row.Cast()).Done(); size_t prefixLen = prompt.PointPrefixLen; TVector<TString> keyColumns; @@ -443,23 +466,34 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext keyColumns.push_back(rightTableDesc.Metadata->KeyColumnNames[i]); } + + auto rowArg = Build<TCoArgument>(ctx, join.Pos()) + .Name("rowArg") + .Done(); + TVector<TExprBase> components; for (auto column : keyColumns) { TCoAtom columnAtom(ctx.NewAtom(read.Ranges().Pos(), column)); components.push_back( Build<TCoMember>(ctx, read.Ranges().Pos()) - .Struct(row.Cast()) + .Struct(rowArg) .Name(columnAtom) .Done()); } - rightTableKeyPrefix = Build<TKqlKeyInc>(ctx, read.Ranges().Pos()) - .Add(components) + rightPrefixExpr = Build<TCoMap>(ctx, join.Pos()) + .Input(rowsExpr.Cast()) + .Lambda() + .Args({rowArg}) + .Body<TExprList>() + .Add(components) + .Build() + .Build() .Done(); } } - Y_ENSURE(rightTableKeyPrefix); + Y_ENSURE(rightPrefixExpr.IsValid()); const auto& rightTableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, lookupTable); if (rightTableDesc.Metadata->Kind == NYql::EKikimrTableKind::Olap) { @@ -493,6 +527,10 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext .Name("leftRowArg") .Done(); + auto prefixRowArg = Build<TCoArgument>(ctx, join.Pos()) + .Name("prefixArg") + .Done(); + TVector<TExprBase> lookupMembers; TVector<TCoAtom> skipNullColumns; ui32 fixedPrefix = 0; @@ -502,12 +540,15 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext auto leftColumn = rightJoinKeyToLeft.FindPtr(rightColumnName); - if (fixedPrefix < rightTableKeyPrefix->ArgCount()) { + if (fixedPrefix < rightPrefixSize) { if (leftColumn) { return {}; } - member = rightTableKeyPrefix->Arg(fixedPrefix).Ptr(); + member = Build<TCoNth>(ctx, prefixRowArg.Pos()) + .Tuple(prefixRowArg) + .Index().Value(ToString(fixedPrefix)).Build() + .Done().Ptr(); fixedPrefix++; } else { if (!leftColumn) { @@ -629,15 +670,21 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext } if (useStreamIndexLookupJoin) { - auto leftInput = Build<TCoMap>(ctx, join.Pos()) + auto leftInput = Build<TCoFlatMap>(ctx, join.Pos()) .Input(leftData) .Lambda() .Args({leftRowArg}) - .Body<TExprList>() - .Add<TCoAsStruct>() - .Add(lookupMembers) - .Build() - .Add(leftRowArg) + .Body<TCoMap>() + .Input(rightPrefixExpr.Cast()) + .Lambda() + .Args({prefixRowArg}) + .Body<TExprList>() + .Add<TCoAsStruct>() + .Add(lookupMembers) + .Build() + .Add(leftRowArg) + .Build() + .Build() .Build() .Build() .Done(); @@ -646,14 +693,20 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext } auto leftDataDeduplicated = DeduplicateByMembers(leftData, filter, deduplicateLeftColumns, ctx, join.Pos()); - auto keysToLookup = Build<TCoMap>(ctx, join.Pos()) + auto keysToLookup = Build<TCoFlatMap>(ctx, join.Pos()) .Input(leftDataDeduplicated) .Lambda() .Args({leftRowArg}) - .Body<TCoAsStruct>() - .Add(lookupMembers) + .Body<TCoMap>() + .Input(rightPrefixExpr.Cast()) + .Lambda() + .Args({prefixRowArg}) + .Body<TCoAsStruct>() + .Add(lookupMembers) + .Build() .Build() .Build() + .Build() .Done(); TExprBase lookup = indexName diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp index 71b2ab727ac..2dc178460d4 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp @@ -346,6 +346,7 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx if (buildResult.ExpectedMaxRanges.Defined()) { prompt.SetExpectedMaxRanges(buildResult.ExpectedMaxRanges.GetRef()); } + prompt.SetPointPrefixLen(buildResult.UsedPrefixLen); YQL_CLOG(DEBUG, ProviderKqp) << "Ranges extracted: " << KqpExprToPrettyString(*ranges, ctx); YQL_CLOG(DEBUG, ProviderKqp) << "Residual lambda: " << KqpExprToPrettyString(*residualLambda, ctx); diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_sqlin.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_sqlin.cpp index 5728b843ca8..00d430ae7b4 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_sqlin.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_sqlin.cpp @@ -72,90 +72,103 @@ TExprBase KqpRewriteSqlInToEquiJoin(const TExprBase& node, TExprContext& ctx, co return node; } + TVector<TStringBuf> keys; // remaining key parts, that can be used in SqlIn (only in asc order) + + const NYql::TKikimrTableDescription* tableDesc; + auto readMatch = MatchRead<TKqlReadTableBase>(flatMap.Input()); + auto rangesMatch = MatchRead<TKqlReadTableRangesBase>(flatMap.Input()); + ui64 fixedPrefixLen; + if (readMatch) { + TString lookupTable; - TString lookupTable; - //TODO: remove this branch KIKIMR-15255 - if (!readMatch) { - if (auto readRangesMatch = MatchRead<TKqlReadTableRangesBase>(flatMap.Input())) { - auto read = readRangesMatch->Read.Cast<TKqlReadTableRangesBase>(); - if (TCoVoid::Match(read.Ranges().Raw())) { - readMatch = readRangesMatch; - auto key = Build<TKqlKeyInc>(ctx, read.Pos()).Done(); - readMatch->Read = - Build<TKqlReadTable>(ctx, read.Pos()) - .Settings(read.Settings()) - .Table(read.Table()) - .Columns(read.Columns()) - .Range<TKqlKeyRange>() - .From(key) - .To(key) - .Build() - .Done(); - if (auto indexRead = read.Maybe<TKqlReadTableIndexRanges>()) { - const auto& tableDesc = GetTableData(*kqpCtx.Tables, kqpCtx.Cluster, read.Table().Path()); - const auto& [indexMeta, _ ] = tableDesc.Metadata->GetIndexMetadata(indexRead.Index().Cast().StringValue()); - lookupTable = indexMeta->Name; - } - } else { - return node; - } - } else { + if (readMatch->FlatMap) { return node; } - } - if (!readMatch) { - return node; - } - if (readMatch->FlatMap) { - return node; - } + auto readTable = readMatch->Read.Cast<TKqlReadTableBase>(); - auto readTable = readMatch->Read.Cast<TKqlReadTableBase>(); + static const std::set<TStringBuf> supportedReads { + TKqlReadTable::CallableName(), + TKqlReadTableIndex::CallableName(), + }; - static const std::set<TStringBuf> supportedReads { - TKqlReadTable::CallableName(), - TKqlReadTableIndex::CallableName(), - }; + if (!supportedReads.contains(readTable.CallableName())) { + return node; + } - if (!supportedReads.contains(readTable.CallableName())) { - return node; - } + if (!readTable.Table().SysView().Value().empty()) { + return node; + } - if (!readTable.Table().SysView().Value().empty()) { - return node; - } + if (auto indexRead = readTable.Maybe<TKqlReadTableIndex>()) { + lookupTable = GetIndexMetadata(indexRead.Cast(), *kqpCtx.Tables, kqpCtx.Cluster)->Name; + } else if (!lookupTable) { + lookupTable = readTable.Table().Path().StringValue(); + } - if (auto indexRead = readTable.Maybe<TKqlReadTableIndex>()) { - lookupTable = GetIndexMetadata(indexRead.Cast(), *kqpCtx.Tables, kqpCtx.Cluster)->Name; - } else if (!lookupTable) { - lookupTable = readTable.Table().Path().StringValue(); - } + tableDesc = &kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, lookupTable); + const auto& rangeFrom = readTable.Range().From(); + const auto& rangeTo = readTable.Range().To(); - const auto& tableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, lookupTable); - const auto& rangeFrom = readTable.Range().From(); - const auto& rangeTo = readTable.Range().To(); + if (!rangeFrom.Maybe<TKqlKeyInc>() || !rangeTo.Maybe<TKqlKeyInc>()) { + return node; + } + if (rangeFrom.Raw() != rangeTo.Raw()) { + // not point selection + return node; + } - if (!rangeFrom.Maybe<TKqlKeyInc>() || !rangeTo.Maybe<TKqlKeyInc>()) { - return node; - } - if (rangeFrom.Raw() != rangeTo.Raw()) { - // not point selection + fixedPrefixLen = rangeFrom.ArgCount(); + } else if (rangesMatch) { + if (rangesMatch->FlatMap) { + return node; + } + + auto read = rangesMatch->Read.template Cast<TKqlReadTableRangesBase>(); + + if (!read.Table().SysView().Value().empty()) { + return node; + } + + auto prompt = TKqpReadTableExplainPrompt::Parse(read); + if (prompt.PointPrefixLen != prompt.UsedKeyColumns.size()) { + return node; + } + + if (!TCoVoid::Match(read.Ranges().Raw()) && prompt.ExpectedMaxRanges != TMaybe<ui64>(1)) { + return node; + } + + TString lookupTable; + TString indexName; + if (auto indexRead = read.template Maybe<TKqlReadTableIndexRanges>()) { + const auto& tableDesc = GetTableData(*kqpCtx.Tables, kqpCtx.Cluster, read.Table().Path()); + const auto& [indexMeta, _ ] = tableDesc.Metadata->GetIndexMetadata(indexRead.Index().Cast().StringValue()); + lookupTable = indexMeta->Name; + indexName = indexRead.Cast().Index().StringValue(); + } else { + lookupTable = read.Table().Path().StringValue(); + } + + tableDesc = &kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, lookupTable); + + fixedPrefixLen = prompt.PointPrefixLen; + } else { return node; } - i64 keySuffixLen = (i64) tableDesc.Metadata->KeyColumnNames.size() - (i64) rangeFrom.ArgCount(); + i64 keySuffixLen = (i64) tableDesc->Metadata->KeyColumnNames.size() - (i64) fixedPrefixLen; if (keySuffixLen <= 0) { return node; } - TVector<TStringBuf> keys; // remaining key parts, that can be used in SqlIn (only in asc order) keys.reserve(keySuffixLen); - for (ui64 idx = rangeFrom.ArgCount(); idx < tableDesc.Metadata->KeyColumnNames.size(); ++idx) { - keys.emplace_back(TStringBuf(tableDesc.Metadata->KeyColumnNames[idx])); + for (ui64 idx = fixedPrefixLen; idx < tableDesc->Metadata->KeyColumnNames.size(); ++idx) { + keys.emplace_back(TStringBuf(tableDesc->Metadata->KeyColumnNames[idx])); } + auto flatMapLambdaArg = flatMap.Lambda().Args().Arg(0); auto findMemberIndexInKeys = [&keys](const TCoArgument& flatMapLambdaArg, const TCoMember& member) { diff --git a/ydb/core/kqp/ut/opt/kqp_extract_predicate_unpack_ut.cpp b/ydb/core/kqp/ut/opt/kqp_extract_predicate_unpack_ut.cpp index f32e2edfd27..3d11a090a79 100644 --- a/ydb/core/kqp/ut/opt/kqp_extract_predicate_unpack_ut.cpp +++ b/ydb/core/kqp/ut/opt/kqp_extract_predicate_unpack_ut.cpp @@ -16,10 +16,42 @@ void PrepareTablesToUnpack(TSession session) { Value String, PRIMARY KEY (Key, Fk) ); + CREATE TABLE `/Root/UintComplexKey` ( + Key UInt64, + Fk Int64, + Value String, + PRIMARY KEY (Key, Fk) + ); + CREATE TABLE `/Root/UintComplexKeyWithIndex` ( + Key UInt64, + Fk Int64, + Value String, + Payload String, + PRIMARY KEY (Value, Fk), + INDEX Index GLOBAL ON (Key, Fk) + ); + CREATE TABLE `/Root/SimpleKey` ( Key Int32, Value String, - PRIMARY KEY (Key)); + PRIMARY KEY (Key) + ); + CREATE TABLE `/Root/Uint64Table` ( + Key Uint64, + Value Uint64, + PRIMARY KEY (Key) + ); + CREATE TABLE `/Root/Uint32Table` ( + Key Uint32, + Value Uint32, + PRIMARY KEY (Key) + ); + + CREATE TABLE `/Root/UTF8Table` ( + Key UTF8, + Value UTF8, + PRIMARY KEY (Key) + ); )").GetValueSync(); UNIT_ASSERT_C(result1.IsSuccess(), result1.GetIssues().ToString()); @@ -33,11 +65,40 @@ void PrepareTablesToUnpack(TSession session) { (4, 104, "Value2"), (5, 105, "Value3"); + REPLACE INTO `/Root/UintComplexKey` (Key, Fk, Value) VALUES + (null, null, "NullValue"), + (1, 101, "Value1"), + (-2, 102, "Value1"), + (-2, 103, "Value3"), + (3, 103, "Value2"), + (4, 104, "Value2"), + (5, 105, "Value3"); + + REPLACE INTO `/Root/UintComplexKeyWithIndex` (Key, Fk, Value, Payload) VALUES + (null, null, "NullValue", "null"), + (1, 101, "Value1", "101"), + (-2, 102, "Value1", "102"), + (-2, 103, "Value3", "103-1"), + (3, 103, "Value2", "103-2"), + (4, 104, "Value2", "104"), + (5, 105, "Value3", "105"); + REPLACE INTO `/Root/SimpleKey` (Key, Value) VALUES (100, "Value20"), (101, "Value21"), (102, "Value22"), (103, "Value23"); + + REPLACE INTO `/Root/Uint64Table` (Key, Value) VALUES + (Cast(-1 AS Uint64), 1), + (-1, 2), + (5, -1), + (3, 3); + + REPLACE INTO `/Root/UTF8Table` (Key, Value) VALUES + ("1", "2"), + ("5", "-1"), + ("3", "3"); )", TTxControl::BeginTx().CommitTx()).GetValueSync(); UNIT_ASSERT_C(result2.IsSuccess(), result2.GetIssues().ToString()); @@ -45,7 +106,7 @@ void PrepareTablesToUnpack(TSession session) { Y_UNIT_TEST_SUITE(KqpExtractPredicateLookup) { -void Test(const TString& query, const TString& answer, bool noScans = true, bool checkForLookups = true) { +void Test(const TString& query, const TString& answer, TMaybe<TString> allowScans = {}, NYdb::TParams params = TParamsBuilder().Build()) { TKikimrSettings settings; settings.SetDomainRoot(KikimrDefaultUtDomainRoot); TKikimrRunner kikimr(settings); @@ -57,25 +118,33 @@ void Test(const TString& query, const TString& answer, bool noScans = true, bool TExecDataQuerySettings execSettings; execSettings.CollectQueryStats(ECollectQueryStatsMode::Basic); - auto result = session.ExecuteDataQuery(query, TTxControl::BeginTx().CommitTx(), execSettings).ExtractValueSync(); + auto result = session.ExecuteDataQuery(query, TTxControl::BeginTx().CommitTx(), params, execSettings).ExtractValueSync(); UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); UNIT_ASSERT_EQUAL(result.GetResultSets().size(), 1); CompareYson(answer, FormatResultSetYson(result.GetResultSet(0))); auto explain = session.ExplainDataQuery(query).ExtractValueSync(); - if (checkForLookups) { - UNIT_ASSERT(explain.GetPlan().Contains("Lookup")); - } - if (noScans) { - UNIT_ASSERT(!explain.GetPlan().Contains("Scan")); + UNIT_ASSERT(explain.GetPlan().Contains("Lookup")); + Cerr << explain.GetPlan(); + + NJson::TJsonValue plan; + NJson::ReadJsonTree(explain.GetPlan(), &plan, true); + UNIT_ASSERT(ValidatePlanNodeIds(plan)); + for (const auto& tableStats : plan.GetMap().at("tables").GetArray()) { + TString table = tableStats.GetMap().at("name").GetString(); + if (allowScans && table == *allowScans) { + continue; + } + + for (auto& read : tableStats.GetMap().at("reads").GetArray()) { + UNIT_ASSERT(!read.GetMap().at("type").GetString().Contains("Scan")); + } } } -void TestRange(const TString& query, const TString& answer, int stagesCount = 1) { - TKikimrSettings settings; - settings.SetDomainRoot(KikimrDefaultUtDomainRoot); - TKikimrRunner kikimr(settings); +void TestRange(const TString& query, const TString& answer, ui64 rowsRead, int stagesCount = 1) { + TKikimrRunner kikimr; auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -92,6 +161,45 @@ void TestRange(const TString& query, const TString& answer, int stagesCount = 1) auto stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); UNIT_ASSERT_EQUAL(stagesCount, stats.query_phases_size()); + + ui64 rowsStats = 0; + for (auto& phase : stats.query_phases()) { + for (auto& access : phase.table_access()) { + rowsStats += access.reads().rows(); + } + } + UNIT_ASSERT_EQUAL(rowsStats, rowsRead); +} + +Y_UNIT_TEST(OverflowLookup) { + TestRange( + R"( + SELECT * FROM `/Root/Uint64Table` + WHERE Key = 3; + )", + R"([ + [[3u];[3u]] + ])", + 1); + + TestRange( + R"( + SELECT * FROM `/Root/Uint64Table` + WHERE Key = -1; + )", + R"([])", + 0, + 2); + + TestRange( + R"( + SELECT Value FROM `/Root/Uint64Table` + WHERE Key IS NULL; + )", + R"([ + [[1u]] + ])", + 1); } Y_UNIT_TEST(SimpleRange) { @@ -101,8 +209,11 @@ Y_UNIT_TEST(SimpleRange) { WHERE Key >= 101 AND Key < 104; )", R"([ - [[101];["Value21"]];[[102];["Value22"]];[[103];["Value23"]] - ])"); + [[101];["Value21"]]; + [[102];["Value22"]]; + [[103];["Value23"]] + ])", + 3); TestRange( R"( @@ -110,8 +221,11 @@ Y_UNIT_TEST(SimpleRange) { WHERE Key >= 101; )", R"([ - [[101];["Value21"]];[[102];["Value22"]];[[103];["Value23"]] - ])"); + [[101];["Value21"]]; + [[102];["Value22"]]; + [[103];["Value23"]] + ])", + 3); TestRange( R"( @@ -119,8 +233,12 @@ Y_UNIT_TEST(SimpleRange) { WHERE Key < 104; )", R"([ - [[100];["Value20"]];[[101];["Value21"]];[[102];["Value22"]];[[103];["Value23"]] - ])"); + [[100];["Value20"]]; + [[101];["Value21"]]; + [[102];["Value22"]]; + [[103];["Value23"]] + ])", + 4); TestRange( R"( @@ -128,8 +246,12 @@ Y_UNIT_TEST(SimpleRange) { WHERE Key < 104 AND Key >= 0; )", R"([ - [[100];["Value20"]];[[101];["Value21"]];[[102];["Value22"]];[[103];["Value23"]] - ])"); + [[100];["Value20"]]; + [[101];["Value21"]]; + [[102];["Value22"]]; + [[103];["Value23"]] + ])", + 4); TestRange( R"( @@ -137,8 +259,21 @@ Y_UNIT_TEST(SimpleRange) { WHERE Key >= 101 AND Key < 104u; )", R"([ - [[101];["Value21"]];[[102];["Value22"]];[[103];["Value23"]] - ])"); + [[101];["Value21"]]; + [[102];["Value22"]]; + [[103];["Value23"]] + ])", + 3); + + TestRange( + R"( + SELECT * FROM `/Root/UTF8Table` + WHERE Key = "1"; + )", + R"([ + [["1"];["2"]] + ])", + 1); } Y_UNIT_TEST(ComplexRange) { @@ -148,8 +283,12 @@ Y_UNIT_TEST(ComplexRange) { WHERE Key >= 1 AND Key < 4 AND Fk >= 101 AND Fk < 104; )", R"([ - [[1];[101];["Value1"]];[[2];[102];["Value1"]];[[2];[103];["Value3"]];[[3];[103];["Value2"]] - ])"); + [[1];[101];["Value1"]]; + [[2];[102];["Value1"]]; + [[2];[103];["Value3"]]; + [[3];[103];["Value2"]] + ])", + 4); TestRange( R"( @@ -157,8 +296,12 @@ Y_UNIT_TEST(ComplexRange) { WHERE (Key, Fk) >= (1, 101) AND (Key, Fk) < (4, 104); )", R"([ - [[1];[101];["Value1"]];[[2];[102];["Value1"]];[[2];[103];["Value3"]];[[3];[103];["Value2"]] - ])"); + [[1];[101];["Value1"]]; + [[2];[102];["Value1"]]; + [[2];[103];["Value3"]]; + [[3];[103];["Value2"]] + ])", + 4); TestRange( R"( @@ -168,6 +311,7 @@ Y_UNIT_TEST(ComplexRange) { R"([ [[5];[105];["Value3"]] ])", + 1, 2); } @@ -251,17 +395,119 @@ Y_UNIT_TEST(ComplexLookupComplexKey) { Y_UNIT_TEST(PointJoin) { Test( R"( + DECLARE $p as Int32; SELECT l.Key, l.Fk, l.Value, r.Key, r.Value FROM `/Root/SimpleKey` AS r INNER JOIN `/Root/ComplexKey` AS l ON l.Fk = r.Key - WHERE l.Key = 1 + 1 and l.Key = l.Key + WHERE l.Key = 1 + $p and l.Key = l.Key ORDER BY r.Value )", R"([ [[2];[102];["Value1"];[102];["Value22"]]; [[2];[103];["Value3"];[103];["Value23"]] ])", - /* noScans */ false); + "/Root/SimpleKey", + TParamsBuilder().AddParam("$p").Int32(1).Build().Build()); + + Test( + R"( + DECLARE $p as Int32; + SELECT l.Key, l.Fk, l.Value, r.Key, r.Value FROM `/Root/SimpleKey` AS r + INNER JOIN `/Root/UintComplexKey` AS l + ON l.Fk = r.Key + WHERE l.Key = $p and l.Key = l.Key + ORDER BY r.Value + )", + R"([ + [[3u];[103];["Value2"];[103];["Value23"]] + ])", + "/Root/SimpleKey", + TParamsBuilder().AddParam("$p").Int32(3).Build().Build()); + + Test( + R"( + DECLARE $p as Int32; + SELECT l.Key, l.Fk, l.Value, r.Key, r.Value FROM `/Root/SimpleKey` AS r + INNER JOIN `/Root/UintComplexKey` AS l + ON l.Fk = r.Key + WHERE l.Key = $p and l.Key = l.Key + ORDER BY r.Value + )", + R"([ + ])", + "/Root/SimpleKey", + TParamsBuilder().AddParam("$p").Int32(-2).Build().Build()); + + Test( + R"( + DECLARE $p as Int32; + SELECT l.Key, l.Fk, l.Value, r.Key, r.Value, l.Payload FROM `/Root/SimpleKey` AS r + INNER JOIN `/Root/UintComplexKeyWithIndex` VIEW Index AS l + ON l.Fk = r.Key + WHERE l.Key = $p and l.Key = l.Key + ORDER BY r.Value + )", + R"([ + [[3u];[103];["Value2"];[103];["Value23"];["103-2"]] + ])", + "/Root/SimpleKey", + TParamsBuilder().AddParam("$p").Int32(3).Build().Build()); +} + +Y_UNIT_TEST(SqlInJoin) { + Test( + R"( + DECLARE $p AS Int32; + $rows = (SELECT Key FROM `/Root/SimpleKey`); + SELECT Key, Fk, Value FROM `/Root/ComplexKey` + WHERE Fk IN $rows AND Key = 1 + $p + ORDER BY Key, Fk + )", + R"([ + [[2];[102];["Value1"]]; + [[2];[103];["Value3"]] + ])", + "/Root/SimpleKey", + TParamsBuilder().AddParam("$p").Int32(1).Build().Build()); + + Test( + R"( + DECLARE $p AS Int32; + $rows = (SELECT Key FROM `/Root/SimpleKey`); + SELECT Key, Fk, Value FROM `/Root/UintComplexKey` + WHERE Fk IN $rows AND Key = $p + )", + R"([ + [[3u];[103];["Value2"]] + ])", + "/Root/SimpleKey", + TParamsBuilder().AddParam("$p").Int32(3).Build().Build()); + + Test( + R"( + DECLARE $p AS Int32; + $rows = (SELECT Key FROM `/Root/SimpleKey`); + SELECT Key, Fk, Value FROM `/Root/UintComplexKey` + WHERE Fk IN $rows AND Key = $p + )", + R"([ + ])", + "/Root/SimpleKey", + TParamsBuilder().AddParam("$p").Int32(-2).Build().Build()); + + + Test( + R"( + DECLARE $p AS Int32; + $rows = (SELECT Key FROM `/Root/SimpleKey`); + SELECT Key, Fk, Value, Payload FROM `/Root/UintComplexKeyWithIndex` VIEW Index + WHERE Fk IN $rows AND Key = $p + )", + R"([ + [[3u];[103];["Value2"];["103-2"]] + ])", + "/Root/SimpleKey", + TParamsBuilder().AddParam("$p").Int32(3).Build().Build()); } } // suite diff --git a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp index f38379da18e..3e660f2b3eb 100644 --- a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp +++ b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp @@ -8,6 +8,8 @@ #include <ydb/library/yql/core/services/yql_transform_pipeline.h> #include <ydb/library/yql/core/services/yql_out_transformers.h> +#include <ydb/library/yql/utils/utf8.h> + namespace NYql { namespace NDetail { namespace { @@ -1831,6 +1833,120 @@ TExprNode::TPtr DoBuildMultiColumnComputeNode(const TStructExprType& rowType, co return ctx.NewCallable(pos, range->IsCallable("RangeOr") ? "RangeUnion" : "RangeIntersect", std::move(output)); } +IGraphTransformer::TStatus ConvertLiteral(TExprNode::TPtr& node, const NYql::TTypeAnnotationNode & sourceType, const NYql::TTypeAnnotationNode & expectedType, NYql::TExprContext& ctx) { + if (IsSameAnnotation(sourceType, expectedType)) { + return IGraphTransformer::TStatus::Ok; + } + + if (expectedType.GetKind() == ETypeAnnotationKind::Optional) { + auto nextType = expectedType.Cast<TOptionalExprType>()->GetItemType(); + auto originalNode = node; + auto status1 = ConvertLiteral(node, sourceType, *nextType, ctx); + if (status1.Level != IGraphTransformer::TStatus::Error) { + node = ctx.NewCallable(node->Pos(), "Just", { node }); + return IGraphTransformer::TStatus::Repeat; + } + + node = originalNode; + if (node->IsCallable("Just")) { + auto sourceItemType = sourceType.Cast<TOptionalExprType>()->GetItemType(); + auto value = node->HeadRef(); + auto status = ConvertLiteral(value, *sourceItemType, *nextType, ctx); + if (status.Level != IGraphTransformer::TStatus::Error) { + node = ctx.NewCallable(node->Pos(), "Just", { value }); + return IGraphTransformer::TStatus::Repeat; + } + } else if (sourceType.GetKind() == ETypeAnnotationKind::Optional) { + return IGraphTransformer::TStatus::Error; + } + + if (IsNull(sourceType)) { + node = ctx.NewCallable(node->Pos(), "Nothing", { ExpandType(node->Pos(), expectedType, ctx) }); + return IGraphTransformer::TStatus::Repeat; + } + } + + if (expectedType.GetKind() == ETypeAnnotationKind::Data && sourceType.GetKind() == ETypeAnnotationKind::Data) { + const auto from = sourceType.Cast<TDataExprType>()->GetSlot(); + const auto to = expectedType.Cast<TDataExprType>()->GetSlot(); + if (from == EDataSlot::Utf8 && to == EDataSlot::String) { + auto pos = node->Pos(); + node = ctx.NewCallable(pos, "ToString", { std::move(node) }); + return IGraphTransformer::TStatus::Repeat; + } + + if (node->IsCallable("String") && to == EDataSlot::Utf8) { + if (const auto atom = node->Head().Content(); IsUtf8(atom)) { + node = ctx.RenameNode(*node, "Utf8"); + return IGraphTransformer::TStatus::Repeat; + } + } + + if (IsDataTypeNumeric(from) && IsDataTypeNumeric(to)) { + { + auto current = node; + bool negate = false; + for (;;) { + if (current->IsCallable("Plus")) { + current = current->HeadPtr(); + } + else if (current->IsCallable("Minus")) { + current = current->HeadPtr(); + negate = !negate; + } + else { + break; + } + } + + if (const auto maybeInt = TMaybeNode<TCoIntegralCtor>(current)) { + TString atomValue; + if (AllowIntegralConversion(maybeInt.Cast(), false, to, &atomValue)) { + node = ctx.NewCallable(node->Pos(), expectedType.Cast<TDataExprType>()->GetName(), + {ctx.NewAtom(node->Pos(), atomValue, TNodeFlags::Default)}); + return IGraphTransformer::TStatus::Repeat; + } + } + } + + if (GetNumericDataTypeLevel(to) < GetNumericDataTypeLevel(from)) { + return IGraphTransformer::TStatus::Error; + } + + auto castResult = NKikimr::NUdf::GetCastResult(from, to); + if (!castResult || *castResult & NKikimr::NUdf::Impossible) { + return IGraphTransformer::TStatus::Error; + } + + if (*castResult != NKikimr::NUdf::ECastOptions::Complete) { + return IGraphTransformer::TStatus::Error; + } + + const auto pos = node->Pos(); + auto type = ExpandType(pos, expectedType, ctx); + node = ctx.NewCallable(pos, "Convert", {std::move(node), std::move(type)}); + return IGraphTransformer::TStatus::Repeat; + } + + auto fromFeatures = NUdf::GetDataTypeInfo(from).Features; + auto toFeatures = NUdf::GetDataTypeInfo(to).Features; + if ((fromFeatures & NUdf::TzDateType) && (toFeatures & (NUdf::DateType| NUdf::TzDateType)) || + (toFeatures & NUdf::TzDateType) && (fromFeatures & (NUdf::DateType | NUdf::TzDateType))) { + node = ctx.Builder(node->Pos()) + .Callable("Apply") + .Callable(0, "Udf") + .Atom(0, TString("DateTime2.Make") + expectedType.Cast<TDataExprType>()->GetName()) + .Seal() + .Add(1, node->HeadPtr()) + .Seal() + .Build(); + return IGraphTransformer::TStatus::Repeat; + } + } + + return IGraphTransformer::TStatus::Error; +} + void NormalizeRangeHint(TMaybe<TRangeHint>& hint, const TVector<TString>& indexKeys, const TStructExprType& rowType, TExprContext& ctx, TTypeAnnotationContext& types) { if (!hint) { return; @@ -1854,10 +1970,10 @@ void NormalizeRangeHint(TMaybe<TRangeHint>& hint, const TVector<TString>& indexK [&](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) -> IGraphTransformer::TStatus { output = input; - auto status = TrySilentConvertTo(output, *unwrapOptional, ctx); + auto status = ConvertLiteral(output, *output->GetTypeAnn(), *unwrapOptional, ctx); if (status == IGraphTransformer::TStatus::Error) { output = input; - status = TrySilentConvertTo(output, *columnType, ctx); + status = ConvertLiteral(output, *output->GetTypeAnn(), *columnType, ctx); } if (status == IGraphTransformer::TStatus::Repeat) { |
