summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorssmike <[email protected]>2023-11-17 20:23:29 +0300
committerssmike <[email protected]>2023-11-17 21:05:54 +0300
commit03f188111211d64677ea001d0f1ded8e392b9d27 (patch)
tree3039c1ae954eefc4d5a99fcdefdf5005b6dda9ff
parent86cd90095ae579c072aabe74272211905395b863 (diff)
Fix unsafe conversions
YQL-16933
-rw-r--r--ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp109
-rw-r--r--ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp1
-rw-r--r--ydb/core/kqp/opt/logical/kqp_opt_log_sqlin.cpp137
-rw-r--r--ydb/core/kqp/ut/opt/kqp_extract_predicate_unpack_ut.cpp302
-rw-r--r--ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp120
5 files changed, 549 insertions, 120 deletions
diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp
index 59141ae6667..85939d28575 100644
--- a/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp
+++ b/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp
@@ -378,7 +378,9 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext
auto rightRead = rightReadMatch->Read.template Cast<ReadType>();
TMaybeNode<TCoAtomList> lookupColumns;
- TMaybe<TKqlKeyInc> rightTableKeyPrefix;
+ size_t rightPrefixSize;
+ TMaybeNode<TExprBase> rightPrefixExpr;
+
if constexpr (std::is_same_v<ReadType, TKqlReadTableBase>) {
Y_ENSURE(rightRead.template Maybe<TKqlReadTable>() || rightRead.template Maybe<TKqlReadTableIndex>());
const TKqlReadTableBase read = rightRead;
@@ -393,7 +395,18 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext
}
lookupColumns = read.Columns();
- rightTableKeyPrefix = maybeRightTableKeyPrefix.Cast();
+
+ rightPrefixSize = maybeRightTableKeyPrefix.Cast().ArgCount();
+ TVector<TExprBase> columns;
+ for (auto& column : maybeRightTableKeyPrefix.Cast().Args()) {
+ columns.push_back(TExprBase(column));
+ }
+
+ rightPrefixExpr = Build<TCoAsList>(ctx, join.Pos())
+ .Add<TExprList>()
+ .Add(columns)
+ .Build()
+ .Done();
if (auto indexRead = rightRead.template Maybe<TKqlReadTableIndex>()) {
indexName = indexRead.Cast().Index().StringValue();
@@ -403,6 +416,11 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext
}
} else if constexpr (std::is_same_v<ReadType, TKqlReadTableRangesBase>){
auto read = rightReadMatch->Read.template Cast<TKqlReadTableRangesBase>();
+ if (!read.Table().SysView().Value().empty()) {
+ // Can't lookup in system views
+ return {};
+ }
+
lookupColumns = read.Columns();
if (auto indexRead = read.template Maybe<TKqlReadTableIndexRanges>()) {
@@ -414,27 +432,32 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext
lookupTable = read.Table().Path().StringValue();
}
- const auto& rightTableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, lookupTable);
-
if (TCoVoid::Match(read.Ranges().Raw())) {
- rightTableKeyPrefix = Build<TKqlKeyInc>(ctx, read.Ranges().Pos()).Done();
+ rightPrefixSize = 0;
+ rightPrefixExpr = Build<TCoJust>(ctx, join.Pos())
+ .Input<TCoAsList>().Build()
+ .Done();
} else {
auto prompt = TKqpReadTableExplainPrompt::Parse(read);
+
+ if (prompt.PointPrefixLen != prompt.UsedKeyColumns.size()) {
+ return {};
+ }
+
if (prompt.ExpectedMaxRanges != TMaybe<ui64>(1)) {
return {};
}
+ rightPrefixSize = prompt.PointPrefixLen;
- TMaybeNode<TExprBase> row;
+ const auto& rightTableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, lookupTable);
+
+ TMaybeNode<TExprBase> rowsExpr;
if (read.template Maybe<TKqlReadTableRanges>()) {
- row = read.template Cast<TKqlReadTableRanges>().PrefixPointsExpr();
- }
- if (rightRead.template Maybe<TKqlReadTableIndexRanges>()) {
- row = read.template Cast<TKqlReadTableIndexRanges>().PrefixPointsExpr();
+ rowsExpr = read.template Cast<TKqlReadTableRanges>().PrefixPointsExpr();
}
- if (!row.IsValid()) {
- return {};
+ if (read.template Maybe<TKqlReadTableIndexRanges>()) {
+ rowsExpr = read.template Cast<TKqlReadTableIndexRanges>().PrefixPointsExpr();
}
- row = Build<TCoHead>(ctx, read.Ranges().Pos()).Input(row.Cast()).Done();
size_t prefixLen = prompt.PointPrefixLen;
TVector<TString> keyColumns;
@@ -443,23 +466,34 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext
keyColumns.push_back(rightTableDesc.Metadata->KeyColumnNames[i]);
}
+
+ auto rowArg = Build<TCoArgument>(ctx, join.Pos())
+ .Name("rowArg")
+ .Done();
+
TVector<TExprBase> components;
for (auto column : keyColumns) {
TCoAtom columnAtom(ctx.NewAtom(read.Ranges().Pos(), column));
components.push_back(
Build<TCoMember>(ctx, read.Ranges().Pos())
- .Struct(row.Cast())
+ .Struct(rowArg)
.Name(columnAtom)
.Done());
}
- rightTableKeyPrefix = Build<TKqlKeyInc>(ctx, read.Ranges().Pos())
- .Add(components)
+ rightPrefixExpr = Build<TCoMap>(ctx, join.Pos())
+ .Input(rowsExpr.Cast())
+ .Lambda()
+ .Args({rowArg})
+ .Body<TExprList>()
+ .Add(components)
+ .Build()
+ .Build()
.Done();
}
}
- Y_ENSURE(rightTableKeyPrefix);
+ Y_ENSURE(rightPrefixExpr.IsValid());
const auto& rightTableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, lookupTable);
if (rightTableDesc.Metadata->Kind == NYql::EKikimrTableKind::Olap) {
@@ -493,6 +527,10 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext
.Name("leftRowArg")
.Done();
+ auto prefixRowArg = Build<TCoArgument>(ctx, join.Pos())
+ .Name("prefixArg")
+ .Done();
+
TVector<TExprBase> lookupMembers;
TVector<TCoAtom> skipNullColumns;
ui32 fixedPrefix = 0;
@@ -502,12 +540,15 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext
auto leftColumn = rightJoinKeyToLeft.FindPtr(rightColumnName);
- if (fixedPrefix < rightTableKeyPrefix->ArgCount()) {
+ if (fixedPrefix < rightPrefixSize) {
if (leftColumn) {
return {};
}
- member = rightTableKeyPrefix->Arg(fixedPrefix).Ptr();
+ member = Build<TCoNth>(ctx, prefixRowArg.Pos())
+ .Tuple(prefixRowArg)
+ .Index().Value(ToString(fixedPrefix)).Build()
+ .Done().Ptr();
fixedPrefix++;
} else {
if (!leftColumn) {
@@ -629,15 +670,21 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext
}
if (useStreamIndexLookupJoin) {
- auto leftInput = Build<TCoMap>(ctx, join.Pos())
+ auto leftInput = Build<TCoFlatMap>(ctx, join.Pos())
.Input(leftData)
.Lambda()
.Args({leftRowArg})
- .Body<TExprList>()
- .Add<TCoAsStruct>()
- .Add(lookupMembers)
- .Build()
- .Add(leftRowArg)
+ .Body<TCoMap>()
+ .Input(rightPrefixExpr.Cast())
+ .Lambda()
+ .Args({prefixRowArg})
+ .Body<TExprList>()
+ .Add<TCoAsStruct>()
+ .Add(lookupMembers)
+ .Build()
+ .Add(leftRowArg)
+ .Build()
+ .Build()
.Build()
.Build()
.Done();
@@ -646,14 +693,20 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext
}
auto leftDataDeduplicated = DeduplicateByMembers(leftData, filter, deduplicateLeftColumns, ctx, join.Pos());
- auto keysToLookup = Build<TCoMap>(ctx, join.Pos())
+ auto keysToLookup = Build<TCoFlatMap>(ctx, join.Pos())
.Input(leftDataDeduplicated)
.Lambda()
.Args({leftRowArg})
- .Body<TCoAsStruct>()
- .Add(lookupMembers)
+ .Body<TCoMap>()
+ .Input(rightPrefixExpr.Cast())
+ .Lambda()
+ .Args({prefixRowArg})
+ .Body<TCoAsStruct>()
+ .Add(lookupMembers)
+ .Build()
.Build()
.Build()
+ .Build()
.Done();
TExprBase lookup = indexName
diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp
index 71b2ab727ac..2dc178460d4 100644
--- a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp
+++ b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp
@@ -346,6 +346,7 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx
if (buildResult.ExpectedMaxRanges.Defined()) {
prompt.SetExpectedMaxRanges(buildResult.ExpectedMaxRanges.GetRef());
}
+ prompt.SetPointPrefixLen(buildResult.UsedPrefixLen);
YQL_CLOG(DEBUG, ProviderKqp) << "Ranges extracted: " << KqpExprToPrettyString(*ranges, ctx);
YQL_CLOG(DEBUG, ProviderKqp) << "Residual lambda: " << KqpExprToPrettyString(*residualLambda, ctx);
diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_sqlin.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_sqlin.cpp
index 5728b843ca8..00d430ae7b4 100644
--- a/ydb/core/kqp/opt/logical/kqp_opt_log_sqlin.cpp
+++ b/ydb/core/kqp/opt/logical/kqp_opt_log_sqlin.cpp
@@ -72,90 +72,103 @@ TExprBase KqpRewriteSqlInToEquiJoin(const TExprBase& node, TExprContext& ctx, co
return node;
}
+ TVector<TStringBuf> keys; // remaining key parts, that can be used in SqlIn (only in asc order)
+
+ const NYql::TKikimrTableDescription* tableDesc;
+
auto readMatch = MatchRead<TKqlReadTableBase>(flatMap.Input());
+ auto rangesMatch = MatchRead<TKqlReadTableRangesBase>(flatMap.Input());
+ ui64 fixedPrefixLen;
+ if (readMatch) {
+ TString lookupTable;
- TString lookupTable;
- //TODO: remove this branch KIKIMR-15255
- if (!readMatch) {
- if (auto readRangesMatch = MatchRead<TKqlReadTableRangesBase>(flatMap.Input())) {
- auto read = readRangesMatch->Read.Cast<TKqlReadTableRangesBase>();
- if (TCoVoid::Match(read.Ranges().Raw())) {
- readMatch = readRangesMatch;
- auto key = Build<TKqlKeyInc>(ctx, read.Pos()).Done();
- readMatch->Read =
- Build<TKqlReadTable>(ctx, read.Pos())
- .Settings(read.Settings())
- .Table(read.Table())
- .Columns(read.Columns())
- .Range<TKqlKeyRange>()
- .From(key)
- .To(key)
- .Build()
- .Done();
- if (auto indexRead = read.Maybe<TKqlReadTableIndexRanges>()) {
- const auto& tableDesc = GetTableData(*kqpCtx.Tables, kqpCtx.Cluster, read.Table().Path());
- const auto& [indexMeta, _ ] = tableDesc.Metadata->GetIndexMetadata(indexRead.Index().Cast().StringValue());
- lookupTable = indexMeta->Name;
- }
- } else {
- return node;
- }
- } else {
+ if (readMatch->FlatMap) {
return node;
}
- }
- if (!readMatch) {
- return node;
- }
- if (readMatch->FlatMap) {
- return node;
- }
+ auto readTable = readMatch->Read.Cast<TKqlReadTableBase>();
- auto readTable = readMatch->Read.Cast<TKqlReadTableBase>();
+ static const std::set<TStringBuf> supportedReads {
+ TKqlReadTable::CallableName(),
+ TKqlReadTableIndex::CallableName(),
+ };
- static const std::set<TStringBuf> supportedReads {
- TKqlReadTable::CallableName(),
- TKqlReadTableIndex::CallableName(),
- };
+ if (!supportedReads.contains(readTable.CallableName())) {
+ return node;
+ }
- if (!supportedReads.contains(readTable.CallableName())) {
- return node;
- }
+ if (!readTable.Table().SysView().Value().empty()) {
+ return node;
+ }
- if (!readTable.Table().SysView().Value().empty()) {
- return node;
- }
+ if (auto indexRead = readTable.Maybe<TKqlReadTableIndex>()) {
+ lookupTable = GetIndexMetadata(indexRead.Cast(), *kqpCtx.Tables, kqpCtx.Cluster)->Name;
+ } else if (!lookupTable) {
+ lookupTable = readTable.Table().Path().StringValue();
+ }
- if (auto indexRead = readTable.Maybe<TKqlReadTableIndex>()) {
- lookupTable = GetIndexMetadata(indexRead.Cast(), *kqpCtx.Tables, kqpCtx.Cluster)->Name;
- } else if (!lookupTable) {
- lookupTable = readTable.Table().Path().StringValue();
- }
+ tableDesc = &kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, lookupTable);
+ const auto& rangeFrom = readTable.Range().From();
+ const auto& rangeTo = readTable.Range().To();
- const auto& tableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, lookupTable);
- const auto& rangeFrom = readTable.Range().From();
- const auto& rangeTo = readTable.Range().To();
+ if (!rangeFrom.Maybe<TKqlKeyInc>() || !rangeTo.Maybe<TKqlKeyInc>()) {
+ return node;
+ }
+ if (rangeFrom.Raw() != rangeTo.Raw()) {
+ // not point selection
+ return node;
+ }
- if (!rangeFrom.Maybe<TKqlKeyInc>() || !rangeTo.Maybe<TKqlKeyInc>()) {
- return node;
- }
- if (rangeFrom.Raw() != rangeTo.Raw()) {
- // not point selection
+ fixedPrefixLen = rangeFrom.ArgCount();
+ } else if (rangesMatch) {
+ if (rangesMatch->FlatMap) {
+ return node;
+ }
+
+ auto read = rangesMatch->Read.template Cast<TKqlReadTableRangesBase>();
+
+ if (!read.Table().SysView().Value().empty()) {
+ return node;
+ }
+
+ auto prompt = TKqpReadTableExplainPrompt::Parse(read);
+ if (prompt.PointPrefixLen != prompt.UsedKeyColumns.size()) {
+ return node;
+ }
+
+ if (!TCoVoid::Match(read.Ranges().Raw()) && prompt.ExpectedMaxRanges != TMaybe<ui64>(1)) {
+ return node;
+ }
+
+ TString lookupTable;
+ TString indexName;
+ if (auto indexRead = read.template Maybe<TKqlReadTableIndexRanges>()) {
+ const auto& tableDesc = GetTableData(*kqpCtx.Tables, kqpCtx.Cluster, read.Table().Path());
+ const auto& [indexMeta, _ ] = tableDesc.Metadata->GetIndexMetadata(indexRead.Index().Cast().StringValue());
+ lookupTable = indexMeta->Name;
+ indexName = indexRead.Cast().Index().StringValue();
+ } else {
+ lookupTable = read.Table().Path().StringValue();
+ }
+
+ tableDesc = &kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, lookupTable);
+
+ fixedPrefixLen = prompt.PointPrefixLen;
+ } else {
return node;
}
- i64 keySuffixLen = (i64) tableDesc.Metadata->KeyColumnNames.size() - (i64) rangeFrom.ArgCount();
+ i64 keySuffixLen = (i64) tableDesc->Metadata->KeyColumnNames.size() - (i64) fixedPrefixLen;
if (keySuffixLen <= 0) {
return node;
}
- TVector<TStringBuf> keys; // remaining key parts, that can be used in SqlIn (only in asc order)
keys.reserve(keySuffixLen);
- for (ui64 idx = rangeFrom.ArgCount(); idx < tableDesc.Metadata->KeyColumnNames.size(); ++idx) {
- keys.emplace_back(TStringBuf(tableDesc.Metadata->KeyColumnNames[idx]));
+ for (ui64 idx = fixedPrefixLen; idx < tableDesc->Metadata->KeyColumnNames.size(); ++idx) {
+ keys.emplace_back(TStringBuf(tableDesc->Metadata->KeyColumnNames[idx]));
}
+
auto flatMapLambdaArg = flatMap.Lambda().Args().Arg(0);
auto findMemberIndexInKeys = [&keys](const TCoArgument& flatMapLambdaArg, const TCoMember& member) {
diff --git a/ydb/core/kqp/ut/opt/kqp_extract_predicate_unpack_ut.cpp b/ydb/core/kqp/ut/opt/kqp_extract_predicate_unpack_ut.cpp
index f32e2edfd27..3d11a090a79 100644
--- a/ydb/core/kqp/ut/opt/kqp_extract_predicate_unpack_ut.cpp
+++ b/ydb/core/kqp/ut/opt/kqp_extract_predicate_unpack_ut.cpp
@@ -16,10 +16,42 @@ void PrepareTablesToUnpack(TSession session) {
Value String,
PRIMARY KEY (Key, Fk)
);
+ CREATE TABLE `/Root/UintComplexKey` (
+ Key UInt64,
+ Fk Int64,
+ Value String,
+ PRIMARY KEY (Key, Fk)
+ );
+ CREATE TABLE `/Root/UintComplexKeyWithIndex` (
+ Key UInt64,
+ Fk Int64,
+ Value String,
+ Payload String,
+ PRIMARY KEY (Value, Fk),
+ INDEX Index GLOBAL ON (Key, Fk)
+ );
+
CREATE TABLE `/Root/SimpleKey` (
Key Int32,
Value String,
- PRIMARY KEY (Key));
+ PRIMARY KEY (Key)
+ );
+ CREATE TABLE `/Root/Uint64Table` (
+ Key Uint64,
+ Value Uint64,
+ PRIMARY KEY (Key)
+ );
+ CREATE TABLE `/Root/Uint32Table` (
+ Key Uint32,
+ Value Uint32,
+ PRIMARY KEY (Key)
+ );
+
+ CREATE TABLE `/Root/UTF8Table` (
+ Key UTF8,
+ Value UTF8,
+ PRIMARY KEY (Key)
+ );
)").GetValueSync();
UNIT_ASSERT_C(result1.IsSuccess(), result1.GetIssues().ToString());
@@ -33,11 +65,40 @@ void PrepareTablesToUnpack(TSession session) {
(4, 104, "Value2"),
(5, 105, "Value3");
+ REPLACE INTO `/Root/UintComplexKey` (Key, Fk, Value) VALUES
+ (null, null, "NullValue"),
+ (1, 101, "Value1"),
+ (-2, 102, "Value1"),
+ (-2, 103, "Value3"),
+ (3, 103, "Value2"),
+ (4, 104, "Value2"),
+ (5, 105, "Value3");
+
+ REPLACE INTO `/Root/UintComplexKeyWithIndex` (Key, Fk, Value, Payload) VALUES
+ (null, null, "NullValue", "null"),
+ (1, 101, "Value1", "101"),
+ (-2, 102, "Value1", "102"),
+ (-2, 103, "Value3", "103-1"),
+ (3, 103, "Value2", "103-2"),
+ (4, 104, "Value2", "104"),
+ (5, 105, "Value3", "105");
+
REPLACE INTO `/Root/SimpleKey` (Key, Value) VALUES
(100, "Value20"),
(101, "Value21"),
(102, "Value22"),
(103, "Value23");
+
+ REPLACE INTO `/Root/Uint64Table` (Key, Value) VALUES
+ (Cast(-1 AS Uint64), 1),
+ (-1, 2),
+ (5, -1),
+ (3, 3);
+
+ REPLACE INTO `/Root/UTF8Table` (Key, Value) VALUES
+ ("1", "2"),
+ ("5", "-1"),
+ ("3", "3");
)", TTxControl::BeginTx().CommitTx()).GetValueSync();
UNIT_ASSERT_C(result2.IsSuccess(), result2.GetIssues().ToString());
@@ -45,7 +106,7 @@ void PrepareTablesToUnpack(TSession session) {
Y_UNIT_TEST_SUITE(KqpExtractPredicateLookup) {
-void Test(const TString& query, const TString& answer, bool noScans = true, bool checkForLookups = true) {
+void Test(const TString& query, const TString& answer, TMaybe<TString> allowScans = {}, NYdb::TParams params = TParamsBuilder().Build()) {
TKikimrSettings settings;
settings.SetDomainRoot(KikimrDefaultUtDomainRoot);
TKikimrRunner kikimr(settings);
@@ -57,25 +118,33 @@ void Test(const TString& query, const TString& answer, bool noScans = true, bool
TExecDataQuerySettings execSettings;
execSettings.CollectQueryStats(ECollectQueryStatsMode::Basic);
- auto result = session.ExecuteDataQuery(query, TTxControl::BeginTx().CommitTx(), execSettings).ExtractValueSync();
+ auto result = session.ExecuteDataQuery(query, TTxControl::BeginTx().CommitTx(), params, execSettings).ExtractValueSync();
UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString());
UNIT_ASSERT_EQUAL(result.GetResultSets().size(), 1);
CompareYson(answer, FormatResultSetYson(result.GetResultSet(0)));
auto explain = session.ExplainDataQuery(query).ExtractValueSync();
- if (checkForLookups) {
- UNIT_ASSERT(explain.GetPlan().Contains("Lookup"));
- }
- if (noScans) {
- UNIT_ASSERT(!explain.GetPlan().Contains("Scan"));
+ UNIT_ASSERT(explain.GetPlan().Contains("Lookup"));
+ Cerr << explain.GetPlan();
+
+ NJson::TJsonValue plan;
+ NJson::ReadJsonTree(explain.GetPlan(), &plan, true);
+ UNIT_ASSERT(ValidatePlanNodeIds(plan));
+ for (const auto& tableStats : plan.GetMap().at("tables").GetArray()) {
+ TString table = tableStats.GetMap().at("name").GetString();
+ if (allowScans && table == *allowScans) {
+ continue;
+ }
+
+ for (auto& read : tableStats.GetMap().at("reads").GetArray()) {
+ UNIT_ASSERT(!read.GetMap().at("type").GetString().Contains("Scan"));
+ }
}
}
-void TestRange(const TString& query, const TString& answer, int stagesCount = 1) {
- TKikimrSettings settings;
- settings.SetDomainRoot(KikimrDefaultUtDomainRoot);
- TKikimrRunner kikimr(settings);
+void TestRange(const TString& query, const TString& answer, ui64 rowsRead, int stagesCount = 1) {
+ TKikimrRunner kikimr;
auto db = kikimr.GetTableClient();
auto session = db.CreateSession().GetValueSync().GetSession();
@@ -92,6 +161,45 @@ void TestRange(const TString& query, const TString& answer, int stagesCount = 1)
auto stats = NYdb::TProtoAccessor::GetProto(*result.GetStats());
UNIT_ASSERT_EQUAL(stagesCount, stats.query_phases_size());
+
+ ui64 rowsStats = 0;
+ for (auto& phase : stats.query_phases()) {
+ for (auto& access : phase.table_access()) {
+ rowsStats += access.reads().rows();
+ }
+ }
+ UNIT_ASSERT_EQUAL(rowsStats, rowsRead);
+}
+
+Y_UNIT_TEST(OverflowLookup) {
+ TestRange(
+ R"(
+ SELECT * FROM `/Root/Uint64Table`
+ WHERE Key = 3;
+ )",
+ R"([
+ [[3u];[3u]]
+ ])",
+ 1);
+
+ TestRange(
+ R"(
+ SELECT * FROM `/Root/Uint64Table`
+ WHERE Key = -1;
+ )",
+ R"([])",
+ 0,
+ 2);
+
+ TestRange(
+ R"(
+ SELECT Value FROM `/Root/Uint64Table`
+ WHERE Key IS NULL;
+ )",
+ R"([
+ [[1u]]
+ ])",
+ 1);
}
Y_UNIT_TEST(SimpleRange) {
@@ -101,8 +209,11 @@ Y_UNIT_TEST(SimpleRange) {
WHERE Key >= 101 AND Key < 104;
)",
R"([
- [[101];["Value21"]];[[102];["Value22"]];[[103];["Value23"]]
- ])");
+ [[101];["Value21"]];
+ [[102];["Value22"]];
+ [[103];["Value23"]]
+ ])",
+ 3);
TestRange(
R"(
@@ -110,8 +221,11 @@ Y_UNIT_TEST(SimpleRange) {
WHERE Key >= 101;
)",
R"([
- [[101];["Value21"]];[[102];["Value22"]];[[103];["Value23"]]
- ])");
+ [[101];["Value21"]];
+ [[102];["Value22"]];
+ [[103];["Value23"]]
+ ])",
+ 3);
TestRange(
R"(
@@ -119,8 +233,12 @@ Y_UNIT_TEST(SimpleRange) {
WHERE Key < 104;
)",
R"([
- [[100];["Value20"]];[[101];["Value21"]];[[102];["Value22"]];[[103];["Value23"]]
- ])");
+ [[100];["Value20"]];
+ [[101];["Value21"]];
+ [[102];["Value22"]];
+ [[103];["Value23"]]
+ ])",
+ 4);
TestRange(
R"(
@@ -128,8 +246,12 @@ Y_UNIT_TEST(SimpleRange) {
WHERE Key < 104 AND Key >= 0;
)",
R"([
- [[100];["Value20"]];[[101];["Value21"]];[[102];["Value22"]];[[103];["Value23"]]
- ])");
+ [[100];["Value20"]];
+ [[101];["Value21"]];
+ [[102];["Value22"]];
+ [[103];["Value23"]]
+ ])",
+ 4);
TestRange(
R"(
@@ -137,8 +259,21 @@ Y_UNIT_TEST(SimpleRange) {
WHERE Key >= 101 AND Key < 104u;
)",
R"([
- [[101];["Value21"]];[[102];["Value22"]];[[103];["Value23"]]
- ])");
+ [[101];["Value21"]];
+ [[102];["Value22"]];
+ [[103];["Value23"]]
+ ])",
+ 3);
+
+ TestRange(
+ R"(
+ SELECT * FROM `/Root/UTF8Table`
+ WHERE Key = "1";
+ )",
+ R"([
+ [["1"];["2"]]
+ ])",
+ 1);
}
Y_UNIT_TEST(ComplexRange) {
@@ -148,8 +283,12 @@ Y_UNIT_TEST(ComplexRange) {
WHERE Key >= 1 AND Key < 4 AND Fk >= 101 AND Fk < 104;
)",
R"([
- [[1];[101];["Value1"]];[[2];[102];["Value1"]];[[2];[103];["Value3"]];[[3];[103];["Value2"]]
- ])");
+ [[1];[101];["Value1"]];
+ [[2];[102];["Value1"]];
+ [[2];[103];["Value3"]];
+ [[3];[103];["Value2"]]
+ ])",
+ 4);
TestRange(
R"(
@@ -157,8 +296,12 @@ Y_UNIT_TEST(ComplexRange) {
WHERE (Key, Fk) >= (1, 101) AND (Key, Fk) < (4, 104);
)",
R"([
- [[1];[101];["Value1"]];[[2];[102];["Value1"]];[[2];[103];["Value3"]];[[3];[103];["Value2"]]
- ])");
+ [[1];[101];["Value1"]];
+ [[2];[102];["Value1"]];
+ [[2];[103];["Value3"]];
+ [[3];[103];["Value2"]]
+ ])",
+ 4);
TestRange(
R"(
@@ -168,6 +311,7 @@ Y_UNIT_TEST(ComplexRange) {
R"([
[[5];[105];["Value3"]]
])",
+ 1,
2);
}
@@ -251,17 +395,119 @@ Y_UNIT_TEST(ComplexLookupComplexKey) {
Y_UNIT_TEST(PointJoin) {
Test(
R"(
+ DECLARE $p as Int32;
SELECT l.Key, l.Fk, l.Value, r.Key, r.Value FROM `/Root/SimpleKey` AS r
INNER JOIN `/Root/ComplexKey` AS l
ON l.Fk = r.Key
- WHERE l.Key = 1 + 1 and l.Key = l.Key
+ WHERE l.Key = 1 + $p and l.Key = l.Key
ORDER BY r.Value
)",
R"([
[[2];[102];["Value1"];[102];["Value22"]];
[[2];[103];["Value3"];[103];["Value23"]]
])",
- /* noScans */ false);
+ "/Root/SimpleKey",
+ TParamsBuilder().AddParam("$p").Int32(1).Build().Build());
+
+ Test(
+ R"(
+ DECLARE $p as Int32;
+ SELECT l.Key, l.Fk, l.Value, r.Key, r.Value FROM `/Root/SimpleKey` AS r
+ INNER JOIN `/Root/UintComplexKey` AS l
+ ON l.Fk = r.Key
+ WHERE l.Key = $p and l.Key = l.Key
+ ORDER BY r.Value
+ )",
+ R"([
+ [[3u];[103];["Value2"];[103];["Value23"]]
+ ])",
+ "/Root/SimpleKey",
+ TParamsBuilder().AddParam("$p").Int32(3).Build().Build());
+
+ Test(
+ R"(
+ DECLARE $p as Int32;
+ SELECT l.Key, l.Fk, l.Value, r.Key, r.Value FROM `/Root/SimpleKey` AS r
+ INNER JOIN `/Root/UintComplexKey` AS l
+ ON l.Fk = r.Key
+ WHERE l.Key = $p and l.Key = l.Key
+ ORDER BY r.Value
+ )",
+ R"([
+ ])",
+ "/Root/SimpleKey",
+ TParamsBuilder().AddParam("$p").Int32(-2).Build().Build());
+
+ Test(
+ R"(
+ DECLARE $p as Int32;
+ SELECT l.Key, l.Fk, l.Value, r.Key, r.Value, l.Payload FROM `/Root/SimpleKey` AS r
+ INNER JOIN `/Root/UintComplexKeyWithIndex` VIEW Index AS l
+ ON l.Fk = r.Key
+ WHERE l.Key = $p and l.Key = l.Key
+ ORDER BY r.Value
+ )",
+ R"([
+ [[3u];[103];["Value2"];[103];["Value23"];["103-2"]]
+ ])",
+ "/Root/SimpleKey",
+ TParamsBuilder().AddParam("$p").Int32(3).Build().Build());
+}
+
+Y_UNIT_TEST(SqlInJoin) {
+ Test(
+ R"(
+ DECLARE $p AS Int32;
+ $rows = (SELECT Key FROM `/Root/SimpleKey`);
+ SELECT Key, Fk, Value FROM `/Root/ComplexKey`
+ WHERE Fk IN $rows AND Key = 1 + $p
+ ORDER BY Key, Fk
+ )",
+ R"([
+ [[2];[102];["Value1"]];
+ [[2];[103];["Value3"]]
+ ])",
+ "/Root/SimpleKey",
+ TParamsBuilder().AddParam("$p").Int32(1).Build().Build());
+
+ Test(
+ R"(
+ DECLARE $p AS Int32;
+ $rows = (SELECT Key FROM `/Root/SimpleKey`);
+ SELECT Key, Fk, Value FROM `/Root/UintComplexKey`
+ WHERE Fk IN $rows AND Key = $p
+ )",
+ R"([
+ [[3u];[103];["Value2"]]
+ ])",
+ "/Root/SimpleKey",
+ TParamsBuilder().AddParam("$p").Int32(3).Build().Build());
+
+ Test(
+ R"(
+ DECLARE $p AS Int32;
+ $rows = (SELECT Key FROM `/Root/SimpleKey`);
+ SELECT Key, Fk, Value FROM `/Root/UintComplexKey`
+ WHERE Fk IN $rows AND Key = $p
+ )",
+ R"([
+ ])",
+ "/Root/SimpleKey",
+ TParamsBuilder().AddParam("$p").Int32(-2).Build().Build());
+
+
+ Test(
+ R"(
+ DECLARE $p AS Int32;
+ $rows = (SELECT Key FROM `/Root/SimpleKey`);
+ SELECT Key, Fk, Value, Payload FROM `/Root/UintComplexKeyWithIndex` VIEW Index
+ WHERE Fk IN $rows AND Key = $p
+ )",
+ R"([
+ [[3u];[103];["Value2"];["103-2"]]
+ ])",
+ "/Root/SimpleKey",
+ TParamsBuilder().AddParam("$p").Int32(3).Build().Build());
}
} // suite
diff --git a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp
index f38379da18e..3e660f2b3eb 100644
--- a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp
+++ b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp
@@ -8,6 +8,8 @@
#include <ydb/library/yql/core/services/yql_transform_pipeline.h>
#include <ydb/library/yql/core/services/yql_out_transformers.h>
+#include <ydb/library/yql/utils/utf8.h>
+
namespace NYql {
namespace NDetail {
namespace {
@@ -1831,6 +1833,120 @@ TExprNode::TPtr DoBuildMultiColumnComputeNode(const TStructExprType& rowType, co
return ctx.NewCallable(pos, range->IsCallable("RangeOr") ? "RangeUnion" : "RangeIntersect", std::move(output));
}
+IGraphTransformer::TStatus ConvertLiteral(TExprNode::TPtr& node, const NYql::TTypeAnnotationNode & sourceType, const NYql::TTypeAnnotationNode & expectedType, NYql::TExprContext& ctx) {
+ if (IsSameAnnotation(sourceType, expectedType)) {
+ return IGraphTransformer::TStatus::Ok;
+ }
+
+ if (expectedType.GetKind() == ETypeAnnotationKind::Optional) {
+ auto nextType = expectedType.Cast<TOptionalExprType>()->GetItemType();
+ auto originalNode = node;
+ auto status1 = ConvertLiteral(node, sourceType, *nextType, ctx);
+ if (status1.Level != IGraphTransformer::TStatus::Error) {
+ node = ctx.NewCallable(node->Pos(), "Just", { node });
+ return IGraphTransformer::TStatus::Repeat;
+ }
+
+ node = originalNode;
+ if (node->IsCallable("Just")) {
+ auto sourceItemType = sourceType.Cast<TOptionalExprType>()->GetItemType();
+ auto value = node->HeadRef();
+ auto status = ConvertLiteral(value, *sourceItemType, *nextType, ctx);
+ if (status.Level != IGraphTransformer::TStatus::Error) {
+ node = ctx.NewCallable(node->Pos(), "Just", { value });
+ return IGraphTransformer::TStatus::Repeat;
+ }
+ } else if (sourceType.GetKind() == ETypeAnnotationKind::Optional) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ if (IsNull(sourceType)) {
+ node = ctx.NewCallable(node->Pos(), "Nothing", { ExpandType(node->Pos(), expectedType, ctx) });
+ return IGraphTransformer::TStatus::Repeat;
+ }
+ }
+
+ if (expectedType.GetKind() == ETypeAnnotationKind::Data && sourceType.GetKind() == ETypeAnnotationKind::Data) {
+ const auto from = sourceType.Cast<TDataExprType>()->GetSlot();
+ const auto to = expectedType.Cast<TDataExprType>()->GetSlot();
+ if (from == EDataSlot::Utf8 && to == EDataSlot::String) {
+ auto pos = node->Pos();
+ node = ctx.NewCallable(pos, "ToString", { std::move(node) });
+ return IGraphTransformer::TStatus::Repeat;
+ }
+
+ if (node->IsCallable("String") && to == EDataSlot::Utf8) {
+ if (const auto atom = node->Head().Content(); IsUtf8(atom)) {
+ node = ctx.RenameNode(*node, "Utf8");
+ return IGraphTransformer::TStatus::Repeat;
+ }
+ }
+
+ if (IsDataTypeNumeric(from) && IsDataTypeNumeric(to)) {
+ {
+ auto current = node;
+ bool negate = false;
+ for (;;) {
+ if (current->IsCallable("Plus")) {
+ current = current->HeadPtr();
+ }
+ else if (current->IsCallable("Minus")) {
+ current = current->HeadPtr();
+ negate = !negate;
+ }
+ else {
+ break;
+ }
+ }
+
+ if (const auto maybeInt = TMaybeNode<TCoIntegralCtor>(current)) {
+ TString atomValue;
+ if (AllowIntegralConversion(maybeInt.Cast(), false, to, &atomValue)) {
+ node = ctx.NewCallable(node->Pos(), expectedType.Cast<TDataExprType>()->GetName(),
+ {ctx.NewAtom(node->Pos(), atomValue, TNodeFlags::Default)});
+ return IGraphTransformer::TStatus::Repeat;
+ }
+ }
+ }
+
+ if (GetNumericDataTypeLevel(to) < GetNumericDataTypeLevel(from)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ auto castResult = NKikimr::NUdf::GetCastResult(from, to);
+ if (!castResult || *castResult & NKikimr::NUdf::Impossible) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ if (*castResult != NKikimr::NUdf::ECastOptions::Complete) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ const auto pos = node->Pos();
+ auto type = ExpandType(pos, expectedType, ctx);
+ node = ctx.NewCallable(pos, "Convert", {std::move(node), std::move(type)});
+ return IGraphTransformer::TStatus::Repeat;
+ }
+
+ auto fromFeatures = NUdf::GetDataTypeInfo(from).Features;
+ auto toFeatures = NUdf::GetDataTypeInfo(to).Features;
+ if ((fromFeatures & NUdf::TzDateType) && (toFeatures & (NUdf::DateType| NUdf::TzDateType)) ||
+ (toFeatures & NUdf::TzDateType) && (fromFeatures & (NUdf::DateType | NUdf::TzDateType))) {
+ node = ctx.Builder(node->Pos())
+ .Callable("Apply")
+ .Callable(0, "Udf")
+ .Atom(0, TString("DateTime2.Make") + expectedType.Cast<TDataExprType>()->GetName())
+ .Seal()
+ .Add(1, node->HeadPtr())
+ .Seal()
+ .Build();
+ return IGraphTransformer::TStatus::Repeat;
+ }
+ }
+
+ return IGraphTransformer::TStatus::Error;
+}
+
void NormalizeRangeHint(TMaybe<TRangeHint>& hint, const TVector<TString>& indexKeys, const TStructExprType& rowType, TExprContext& ctx, TTypeAnnotationContext& types) {
if (!hint) {
return;
@@ -1854,10 +1970,10 @@ void NormalizeRangeHint(TMaybe<TRangeHint>& hint, const TVector<TString>& indexK
[&](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) -> IGraphTransformer::TStatus {
output = input;
- auto status = TrySilentConvertTo(output, *unwrapOptional, ctx);
+ auto status = ConvertLiteral(output, *output->GetTypeAnn(), *unwrapOptional, ctx);
if (status == IGraphTransformer::TStatus::Error) {
output = input;
- status = TrySilentConvertTo(output, *columnType, ctx);
+ status = ConvertLiteral(output, *output->GetTypeAnn(), *columnType, ctx);
}
if (status == IGraphTransformer::TStatus::Repeat) {