aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkungurtsev <kungasc@ydb.tech>2024-01-23 15:28:43 +0500
committerGitHub <noreply@github.com>2024-01-23 11:28:43 +0100
commit6e34ae9e554efaba812d785133fd4594008ff631 (patch)
tree2f6dad2c1b99209837aeead0d19d029bb1ee27a4
parentec6c6aadce6a507c6df12a45edba4fca1c389f80 (diff)
downloadydb-6e34ae9e554efaba812d785133fd4594008ff631.tar.gz
KIKIMR-19522 BTreeIndex Charge Keys (#1102)
-rw-r--r--ydb/core/tablet_flat/flat_part_charge_btree_index.h261
-rw-r--r--ydb/core/tablet_flat/flat_part_charge_range.cpp46
-rw-r--r--ydb/core/tablet_flat/flat_part_dump.cpp30
-rw-r--r--ydb/core/tablet_flat/test/libs/table/test_part.h16
-rw-r--r--ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp132
-rw-r--r--ydb/core/tablet_flat/ut/ut_charge.cpp31
-rw-r--r--ydb/core/tablet_flat/ut/ut_part.cpp21
7 files changed, 368 insertions, 169 deletions
diff --git a/ydb/core/tablet_flat/flat_part_charge_btree_index.h b/ydb/core/tablet_flat/flat_part_charge_btree_index.h
index 5876cc2eea..9faff8d087 100644
--- a/ydb/core/tablet_flat/flat_part_charge_btree_index.h
+++ b/ydb/core/tablet_flat/flat_part_charge_btree_index.h
@@ -11,56 +11,110 @@ class TChargeBTreeIndex : public ICharge {
using TBtreeIndexMeta = NPage::TBtreeIndexMeta;
using TRecIdx = NPage::TRecIdx;
using TGroupId = NPage::TGroupId;
+ using TChild = TBtreeIndexNode::TChild;
+
+ // TODO: store PageId only instead of TChild?
+ struct TNodeState : TBtreeIndexNode, TChild {
+ TRowId BeginRowId;
+ TRowId EndRowId;
+
+ TNodeState(TSharedData data, TChild meta, TRowId beginRowId, TRowId endRowId)
+ : TBtreeIndexNode(data)
+ , TChild(meta)
+ , BeginRowId(beginRowId)
+ , EndRowId(endRowId)
+ {
+ }
+ };
public:
TChargeBTreeIndex(IPages *env, const TPart &part, TTagsRef tags, bool includeHistory = false)
: Part(&part)
+ , Scheme(*Part->Scheme)
, Env(env) {
- Y_UNUSED(env);
Y_UNUSED(part);
Y_UNUSED(tags);
Y_UNUSED(includeHistory);
}
public:
- TResult Do(const TCells key1, const TCells key2, TRowId row1, TRowId row2,
+ TResult Do(TCells key1, TCells key2, TRowId row1, TRowId row2,
const TKeyCellDefaults &keyDefaults, ui64 itemsLimit, ui64 bytesLimit) const noexcept override {
bool ready = true;
- Y_UNUSED(key1);
- Y_UNUSED(key2);
- Y_UNUSED(keyDefaults);
Y_UNUSED(itemsLimit);
Y_UNUSED(bytesLimit);
const auto& meta = Part->IndexPages.BTreeGroups[0];
+ if (meta.LevelCount == 0) {
+ ready &= HasDataPage(meta.PageId, { });
+ return { ready, true };
+ }
+
if (Y_UNLIKELY(row1 >= meta.RowCount)) {
return { true, true }; // already out of bounds, nothing to precharge
}
if (Y_UNLIKELY(row1 > row2)) {
row2 = row1; // will not go further than row1
}
+ TRowId sliceRow2 = row2;
+ if (Y_UNLIKELY(key1 && key2 && Compare(key1, key2, keyDefaults) > 0)) {
+ key2 = key1; // will not go further than key1
+ }
- TVector<TBtreeIndexNode> level, nextLevel(Reserve(2));
- for (ui32 height = 0; height < meta.LevelCount && ready; height++) {
- if (height == 0) {
- ready &= TryLoadNode(meta.PageId, nextLevel);
- } else {
- for (ui32 i : xrange<ui32>(level.size())) {
- TRecIdx from = 0, to = level[i].GetKeysCount();
- if (i == 0) {
- from = level[i].Seek(row1);
- }
- if (i + 1 == level.size() && row2 < meta.RowCount) {
- to = level[i].Seek(row2);
- }
- for (TRecIdx j : xrange(from, to + 1)) {
- ready &= TryLoadNode(level[i].GetShortChild(j).PageId, nextLevel);
+ TVector<TNodeState> level(Reserve(3)), nextLevel(Reserve(3));
+ TPageId key1PageId = key1 ? meta.PageId : Max<TPageId>();
+ TPageId key2PageId = key2 ? meta.PageId : Max<TPageId>();
+
+ const auto iterateLevel = [&](const auto& tryLoadNext) {
+ for (ui32 i : xrange<ui32>(level.size())) {
+ if (level[i].PageId == key1PageId) {
+ TRecIdx pos = level[i].Seek(ESeek::Lower, key1, Scheme.Groups[0].ColsKeyIdx, &keyDefaults);
+ key1PageId = level[i].GetShortChild(pos).PageId;
+ if (pos) {
+ // move row1 to the first key >= key1
+ row1 = Max(row1, level[i].GetShortChild(pos - 1).RowCount);
}
}
+ if (level[i].PageId == key2PageId) {
+ TRecIdx pos = level[i].Seek(ESeek::Lower, key2, Scheme.Groups[0].ColsKeyIdx, &keyDefaults);
+ key2PageId = level[i].GetShortChild(pos).PageId;
+ // move row2 to the first key > key2
+ row2 = Min(row2, level[i].GetShortChild(pos).RowCount);
+ // always charge row2, no matter what keys are
+ row2 = Max(row2, row1);
+ }
+
+ if (level[i].EndRowId <= row1 || level[i].BeginRowId > row2) {
+ continue;
+ }
+
+ TRecIdx from = 0, to = level[i].GetKeysCount();
+ if (level[i].BeginRowId < row1) {
+ from = level[i].Seek(row1);
+ }
+ if (level[i].EndRowId > row2 + 1) {
+ to = level[i].Seek(row2);
+ }
+ for (TRecIdx j : xrange(from, to + 1)) {
+ ready &= tryLoadNext(level[i], j);
+ }
}
+ };
+ const auto tryLoadNode = [&](TNodeState& current, TRecIdx pos) -> bool {
+ return TryLoadNode(current, pos, nextLevel);
+ };
+
+ const auto hasDataPage = [&](TNodeState& current, TRecIdx pos) -> bool {
+ return HasDataPage(current.GetShortChild(pos).PageId, { });
+ };
+
+ ready &= TryLoadRoot(meta, level);
+
+ for (ui32 height = 1; height < meta.LevelCount && ready; height++) {
+ iterateLevel(tryLoadNode);
level.swap(nextLevel);
nextLevel.clear();
}
@@ -70,66 +124,89 @@ public:
return {false, false};
}
- if (meta.LevelCount == 0) {
- ready &= HasDataPage(meta.PageId, { });
- } else {
- for (ui32 i : xrange<ui32>(level.size())) {
- TRecIdx from = 0, to = level[i].GetKeysCount();
- if (i == 0) {
- from = level[i].Seek(row1);
- }
- if (i + 1 == level.size() && row2 < meta.RowCount) {
- to = level[i].Seek(row2);
- }
- for (TRecIdx j : xrange(from, to + 1)) {
- ready &= HasDataPage(level[i].GetShortChild(j).PageId, { });
- }
- }
- }
+ iterateLevel(hasDataPage);
- // TODO: overshot for keys search
- return {ready, false};
+ return {ready, row2 == sliceRow2};
}
- TResult DoReverse(const TCells key1, const TCells key2, TRowId row1, TRowId row2,
+ TResult DoReverse(TCells key1, TCells key2, TRowId row1, TRowId row2,
const TKeyCellDefaults &keyDefaults, ui64 itemsLimit, ui64 bytesLimit) const noexcept override {
bool ready = true;
- Y_UNUSED(key1);
- Y_UNUSED(key2);
- Y_UNUSED(keyDefaults);
Y_UNUSED(itemsLimit);
Y_UNUSED(bytesLimit);
const auto& meta = Part->IndexPages.BTreeGroups[0];
+ if (meta.LevelCount == 0) {
+ ready &= HasDataPage(meta.PageId, { });
+ return { ready, true };
+ }
+
if (Y_UNLIKELY(row1 >= meta.RowCount)) {
row1 = meta.RowCount - 1; // start from the last row
}
- if (Y_UNLIKELY(row1 < row2)) {
+ if (Y_UNLIKELY(row2 > row1)) {
row2 = row1; // will not go further than row1
}
+ TRowId sliceRow2 = row2;
+ if (Y_UNLIKELY(key1 && key2 && Compare(key2, key1, keyDefaults) > 0)) {
+ key2 = key1; // will not go further than key1
+ }
// level contains nodes in reverse order
- TVector<TBtreeIndexNode> level, nextLevel(Reserve(2));
- for (ui32 height = 0; height < meta.LevelCount && ready; height++) {
- if (height == 0) {
- ready &= TryLoadNode(meta.PageId, nextLevel);
- } else {
- for (ui32 i : xrange<ui32>(level.size())) {
- TRecIdx from = level[i].GetKeysCount(), to = 0;
- if (i == 0) {
- from = level[i].Seek(row1);
- }
- if (i + 1 == level.size() && row2 < meta.RowCount) {
- to = level[i].Seek(row2);
- }
- for (TRecIdx j = from + 1; j > to; j--) {
- ready &= TryLoadNode(level[i].GetShortChild(j - 1).PageId, nextLevel);
+ TVector<TNodeState> level(Reserve(3)), nextLevel(Reserve(3));
+ TPageId key1PageId = key1 ? meta.PageId : Max<TPageId>();
+ TPageId key2PageId = key2 ? meta.PageId : Max<TPageId>();
+
+ const auto iterateLevel = [&](const auto& tryLoadNext) {
+ for (ui32 i : xrange<ui32>(level.size())) {
+ if (level[i].PageId == key1PageId) {
+ TRecIdx pos = level[i].SeekReverse(ESeek::Lower, key1, Scheme.Groups[0].ColsKeyIdx, &keyDefaults);
+ key1PageId = level[i].GetShortChild(pos).PageId;
+ // move row1 to the first key <= key1
+ row1 = Min(row1, level[i].GetShortChild(pos).RowCount - 1);
+ }
+ if (level[i].PageId == key2PageId) {
+ TRecIdx pos = level[i].SeekReverse(ESeek::Lower, key2, Scheme.Groups[0].ColsKeyIdx, &keyDefaults);
+ key2PageId = level[i].GetShortChild(pos).PageId;
+ // move row2 to the first key > key2
+ if (pos) {
+ row2 = Max(row2, level[i].GetShortChild(pos - 1).RowCount - 1);
+ // always charge row1, no matter what keys are
+ row2 = Min(row2, row1);
}
}
+
+ if (level[i].EndRowId <= row2 || level[i].BeginRowId > row1) {
+ continue;
+ }
+
+ TRecIdx from = level[i].GetKeysCount(), to = 0;
+ if (level[i].EndRowId > row1 + 1) {
+ from = level[i].Seek(row1);
+ }
+ if (level[i].BeginRowId < row2) {
+ to = level[i].Seek(row2);
+ }
+ for (TRecIdx j = from + 1; j > to; j--) {
+ ready &= tryLoadNext(level[i], j - 1);
+ }
}
+ };
+
+ const auto tryLoadNode = [&](TNodeState& current, TRecIdx pos) -> bool {
+ return TryLoadNode(current, pos, nextLevel);
+ };
+
+ const auto hasDataPage = [&](TNodeState& current, TRecIdx pos) -> bool {
+ return HasDataPage(current.GetShortChild(pos).PageId, { });
+ };
+
+ ready &= TryLoadRoot(meta, level);
+ for (ui32 height = 1; height < meta.LevelCount && ready; height++) {
+ iterateLevel(tryLoadNode);
level.swap(nextLevel);
nextLevel.clear();
}
@@ -139,25 +216,9 @@ public:
return {false, false};
}
- if (meta.LevelCount == 0) {
- ready &= HasDataPage(meta.PageId, { });
- } else {
- for (ui32 i : xrange<ui32>(level.size())) {
- TRecIdx from = level[i].GetKeysCount(), to = 0;
- if (i == 0) {
- from = level[i].Seek(row1);
- }
- if (i + 1 == level.size() && row2 < meta.RowCount) {
- to = level[i].Seek(row2);
- }
- for (TRecIdx j = from + 1; j > to; j--) {
- ready &= HasDataPage(level[i].GetShortChild(j - 1).PageId, { });
- }
- }
- }
+ iterateLevel(hasDataPage);
- // TODO: overshot for keys search
- return {ready, false};
+ return {ready, row2 == sliceRow2};
}
private:
@@ -165,17 +226,53 @@ private:
return Env->TryGetPage(Part, pageId, groupId);
}
- bool TryLoadNode(TPageId pageId, TVector<TBtreeIndexNode>& level) const noexcept {
- auto page = Env->TryGetPage(Part, pageId);
- if (page) {
- level.emplace_back(*page);
- return true;
+ bool TryLoadRoot(const TBtreeIndexMeta& meta, TVector<TNodeState>& level) const noexcept {
+ auto page = Env->TryGetPage(Part, meta.PageId);
+ if (!page) {
+ return false;
}
- return false;
+
+ level.emplace_back(*page, meta, 0, meta.RowCount);
+ return true;
+ }
+
+ bool TryLoadNode(TNodeState& current, TRecIdx pos, TVector<TNodeState>& level) const noexcept {
+ Y_ABORT_UNLESS(pos < current.GetChildrenCount(), "Should point to some child");
+
+ auto child = current.GetChild(pos);
+
+ auto page = Env->TryGetPage(Part, child.PageId);
+ if (!page) {
+ return false;
+ }
+
+ TRowId beginRowId = pos ? current.GetChild(pos - 1).RowCount : current.BeginRowId;
+ TRowId endRowId = child.RowCount;
+
+ level.emplace_back(*page, child, beginRowId, endRowId);
+ return true;
+ }
+
+ int Compare(TCells left, TCells right, const TKeyCellDefaults &keyDefaults) const noexcept
+ {
+ Y_DEBUG_ABORT_UNLESS(left, "Empty keys should be handled separately");
+ Y_DEBUG_ABORT_UNLESS(right, "Empty keys should be handled separately");
+
+ for (TPos it = 0; it < Min(left.size(), right.size(), keyDefaults.Size()); it++) {
+ if (int cmp = CompareTypedCells(left[it], right[it], keyDefaults.Types[it])) {
+ return cmp;
+ }
+ }
+
+ return left.size() == right.size()
+ ? 0
+ // Missing point cells are filled with a virtual +inf
+ : (left.size() > right.size() ? -1 : 1);
}
private:
const TPart* const Part;
+ const TPartScheme &Scheme;
IPages* const Env;
};
diff --git a/ydb/core/tablet_flat/flat_part_charge_range.cpp b/ydb/core/tablet_flat/flat_part_charge_range.cpp
index 3ba7325fca..2906bcd8ab 100644
--- a/ydb/core/tablet_flat/flat_part_charge_range.cpp
+++ b/ydb/core/tablet_flat/flat_part_charge_range.cpp
@@ -7,20 +7,14 @@ bool ChargeRange(IPages *env, const TCells key1, const TCells key2,
const TRun &run, const TKeyCellDefaults &keyDefaults, TTagsRef tags,
ui64 items, ui64 bytes, bool includeHistory) noexcept
{
- if (run.size() == 1) {
- auto pos = run.begin();
- TRowId row1 = pos->Slice.BeginRowId();
- TRowId row2 = pos->Slice.EndRowId() - 1;
- return CreateCharge(env, *pos->Part, tags, includeHistory)->Do(key1, key2, row1, row2, keyDefaults, items, bytes).Ready;
- }
-
bool ready = true;
auto pos = run.LowerBound(key1);
if (pos == run.end())
return true;
- bool fromStart = TSlice::CompareSearchKeyFirstKey(key1, pos->Slice, keyDefaults) <= 0;
+ // key1 <= FirstKey
+ bool chargeFromSliceFirstRow = TSlice::CompareSearchKeyFirstKey(key1, pos->Slice, keyDefaults) <= 0;
while (pos != run.end()) {
TRowId row1 = pos->Slice.BeginRowId();
@@ -29,30 +23,32 @@ bool ChargeRange(IPages *env, const TCells key1, const TCells key2,
const int cmp = TSlice::CompareLastKeySearchKey(pos->Slice, key2, keyDefaults);
TArrayRef<const TCell> key1r;
- if (!fromStart) {
+ if (!chargeFromSliceFirstRow) {
key1r = key1;
}
TArrayRef<const TCell> key2r;
- if (cmp > 0 /* slice->LastKey > key2 */) {
+ if (cmp > 0) {
+ // key2 < LastKey
key2r = key2;
}
auto r = CreateCharge(env, *pos->Part, tags, includeHistory)->Do(key1r, key2r, row1, row2, keyDefaults, items, bytes);
ready &= r.Ready;
- if (cmp >= 0 /* slice->LastKey >= key2 */) {
+ if (cmp >= 0) {
+ // key2 <= LastKey
if (r.Overshot && ++pos != run.end()) {
- // Unfortunately key > key2 might be at the start of the next slice
+ // Unfortunately first key > key2 might be at the start of the next slice
TRowId firstRow = pos->Slice.BeginRowId();
// Precharge the first row on the next slice
- CreateCharge(env, *pos->Part, tags, includeHistory)->Do(firstRow, firstRow, keyDefaults, items, bytes);
+ ready &= CreateCharge(env, *pos->Part, tags, includeHistory)->Do(firstRow, firstRow, keyDefaults, items, bytes);
}
break;
}
// Will consume this slice before encountering key2
- fromStart = true;
+ chargeFromSliceFirstRow = true;
++pos;
}
@@ -63,20 +59,14 @@ bool ChargeRangeReverse(IPages *env, const TCells key1, const TCells key2,
const TRun &run, const TKeyCellDefaults &keyDefaults, TTagsRef tags,
ui64 items, ui64 bytes, bool includeHistory) noexcept
{
- if (run.size() == 1) {
- auto pos = run.begin();
- TRowId row1 = pos->Slice.EndRowId() - 1;
- TRowId row2 = pos->Slice.BeginRowId();
- return CreateCharge(env, *pos->Part, tags, includeHistory)->DoReverse(key1, key2, row1, row2, keyDefaults, items, bytes).Ready;
- }
-
bool ready = true;
auto pos = run.LowerBoundReverse(key1);
if (pos == run.end())
return true;
- bool fromEnd = TSlice::CompareLastKeySearchKey(pos->Slice, key1, keyDefaults) <= 0;
+ // LastKey <= key1
+ bool chargeFromSliceLastRow = TSlice::CompareLastKeySearchKey(pos->Slice, key1, keyDefaults) <= 0;
for (;;) {
TRowId row1 = pos->Slice.EndRowId() - 1;
@@ -86,11 +76,12 @@ bool ChargeRangeReverse(IPages *env, const TCells key1, const TCells key2,
const int cmp = key2 ? TSlice::CompareSearchKeyFirstKey(key2, pos->Slice, keyDefaults) : -1;
TArrayRef<const TCell> key1r;
- if (!fromEnd) {
+ if (!chargeFromSliceLastRow) {
key1r = key1;
}
TArrayRef<const TCell> key2r;
- if (cmp > 0 /* key2 > slice->FirstKey */) {
+ if (cmp > 0) {
+ // FirstKey < key2
key2r = key2;
}
@@ -102,19 +93,20 @@ bool ChargeRangeReverse(IPages *env, const TCells key1, const TCells key2,
}
if (cmp >= 0 /* key2 >= slice->FirstKey */) {
+ // FirstKey <= key2
if (r.Overshot) {
--pos;
- // Unfortunately key <= key2 might be at the end of the previous slice
+ // Unfortunately first key <= key2 might be at the end of the previous slice
TRowId lastRow = pos->Slice.EndRowId() - 1;
// Precharge the last row on the previous slice
- CreateCharge(env, *pos->Part, tags, includeHistory)->DoReverse(lastRow, lastRow, keyDefaults, items, bytes);
+ ready &= CreateCharge(env, *pos->Part, tags, includeHistory)->DoReverse(lastRow, lastRow, keyDefaults, items, bytes);
}
break;
}
// Will consume this slice before encountering key2
- fromEnd = true;
+ chargeFromSliceLastRow = true;
--pos;
}
diff --git a/ydb/core/tablet_flat/flat_part_dump.cpp b/ydb/core/tablet_flat/flat_part_dump.cpp
index d7a83f0373..f338d251f9 100644
--- a/ydb/core/tablet_flat/flat_part_dump.cpp
+++ b/ydb/core/tablet_flat/flat_part_dump.cpp
@@ -122,7 +122,7 @@ namespace {
Out
<< " + Index{" << (ui16)label.Type << " rev "
<< label.Format << ", " << label.Size << "b}"
- << " " << index->Count << " rec" << Endl
+ << " " << index->Count + (index.GetLastKeyRecord() ? 1 : 0) << " rec" << Endl
<< " | Page Row Bytes (";
for (auto off : xrange(part.Scheme->Groups[0].KeyTypes.size())) {
@@ -133,18 +133,8 @@ namespace {
Out << ")" << Endl;
- for (auto iter = index->Begin(); iter; iter++) {
+ auto printIndexKey = [&](const NPage::TIndex::TRecord* record) {
key.clear();
-
- if (depth < 2 && iter.Off() >= 10) {
- Out
- << " | -- skipped " << index->Count - iter.Off()
- << " entries, depth level " << depth << Endl;
-
- break;
- }
-
- auto record = iter.GetRecord();
for (const auto &info: part.Scheme->Groups[0].ColsKeyIdx)
key.push_back(record->Cell(info));
@@ -161,6 +151,22 @@ namespace {
Key(key, *part.Scheme);
Out << Endl;
+ };
+
+ for (auto iter = index->Begin(); iter; iter++) {
+ if (depth < 2 && iter.Off() >= 10) {
+ Out
+ << " | -- skipped " << index->Count - iter.Off()
+ << " entries, depth level " << depth << Endl;
+
+ break;
+ }
+
+ printIndexKey(iter.GetRecord());
+ }
+
+ if (index.GetLastKeyRecord()) {
+ printIndexKey(index.GetLastKeyRecord());
}
}
diff --git a/ydb/core/tablet_flat/test/libs/table/test_part.h b/ydb/core/tablet_flat/test/libs/table/test_part.h
index f67cb654ca..0247155753 100644
--- a/ydb/core/tablet_flat/test/libs/table/test_part.h
+++ b/ydb/core/tablet_flat/test/libs/table/test_part.h
@@ -176,7 +176,7 @@ namespace NTest {
return index.GetLastRecord();
}
- inline const TPartIndexIt::TRecord * GetRecord(const TPartStore& part, TPageId pageIndex) {
+ inline const TPartIndexIt::TRecord * GetRecord(const TPartStore& part, ui32 pageIndex) {
TTestEnv env;
TPartIndexIt index(&part, &env, { });
@@ -187,6 +187,20 @@ namespace NTest {
return index.GetRecord();
}
+
+ inline TPageId GetFirstPageId(const TPartStore& part) {
+ TTestEnv env;
+ TPartIndexIt index(&part, &env, { });
+ index.Seek(0);
+ return index.GetPageId();
+ }
+
+ inline TPageId GetLastPageId(const TPartStore& part) {
+ TTestEnv env;
+ TPartIndexIt index(&part, &env, { });
+ index.Seek(index.GetEndRowId() - 1);
+ return index.GetPageId();
+ }
}
}}}
diff --git a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp
index 3c5acdb18e..be356402fe 100644
--- a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp
+++ b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp
@@ -2,6 +2,7 @@
#include "flat_part_btree_index_iter.h"
#include "flat_part_charge.h"
#include "flat_part_charge_btree_index.h"
+#include "flat_part_charge_range.h"
#include "flat_part_iter_multi.h"
#include "test/libs/table/test_writer.h"
#include <ydb/core/tablet_flat/test/libs/rows/layout.h>
@@ -38,7 +39,7 @@ namespace {
TMap<TGroupId, TSet<TPageId>> Touched;
};
- void AssertLoadTheSame(const TPartStore& part, const TTouchEnv& bTree, const TTouchEnv& flat, const TString& message) {
+ void AssertLoadedTheSame(const TPartStore& part, const TTouchEnv& bTree, const TTouchEnv& flat, const TString& message, bool allowFirstLastPageDifference = false) {
TSet<TGroupId> groupIds;
for (const auto &c : {bTree.Loaded, flat.Loaded}) {
for (const auto &g : c) {
@@ -59,7 +60,17 @@ namespace {
}
}
- UNIT_ASSERT_VALUES_EQUAL_C(flatDataPages, bTreeDataPages, message);
+ // Note: it's possible that B-Tree index touches extra first / last page because it doesn't have boundary keys
+ // this should be resolved using slices (see ChargeRange)
+ if (allowFirstLastPageDifference) {
+ for (auto additionalPageId : {IndexTools::GetFirstPageId(part), IndexTools::GetLastPageId(part)}) {
+ if (bTreeDataPages.contains(additionalPageId)) {
+ flatDataPages.insert(additionalPageId);
+ }
+ }
+ } else {
+ UNIT_ASSERT_VALUES_EQUAL_C(flatDataPages, bTreeDataPages, message);
+ }
}
}
@@ -111,7 +122,7 @@ namespace {
}
return TSerializedCellVec(key);
};
- auto add = [&](TRowId pageIndex1 /*inclusive*/, TRowId pageIndex2 /*exclusive*/) {
+ auto add = [&](ui32 pageIndex1 /*inclusive*/, ui32 pageIndex2 /*exclusive*/) {
TSlice slice;
slice.FirstInclusive = true;
slice.FirstRowId = pageIndex1 * 2;
@@ -366,12 +377,12 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
Y_UNREACHABLE();
}
- bool DoChargeKeys(ICharge& charge, TTouchEnv& env, const TCells key1, const TCells key2, ui64 itemsLimit, ui64 bytesLimit,
+ bool DoChargeKeys(const TPartStore& part, ICharge& charge, TTouchEnv& env, const TCells key1, const TCells key2, ui64 itemsLimit, ui64 bytesLimit,
bool reverse, const TKeyCellDefaults &keyDefaults, const TString& message, ui32 failsAllowed = 10) {
while (true) {
auto result = reverse
- ? charge.DoReverse(key1, key2, 0, Max<TRowId>(), keyDefaults, itemsLimit, bytesLimit)
- : charge.Do(key1, key2, 0, Max<TRowId>(), keyDefaults, itemsLimit, bytesLimit);
+ ? charge.DoReverse(key1, key2, part.Stat.Rows - 1, 0, keyDefaults, itemsLimit, bytesLimit)
+ : charge.Do(key1, key2, 0, part.Stat.Rows - 1, keyDefaults, itemsLimit, bytesLimit);
if (result.Ready) {
return result.Overshot;
}
@@ -382,8 +393,8 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
}
void CheckChargeRowId(const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults, bool reverse) {
- for (TRowId rowId1 : xrange(part.Stat.Rows + 1)) {
- for (TRowId rowId2 : xrange(part.Stat.Rows + 1)) {
+ for (TRowId rowId1 : xrange(part.Stat.Rows)) {
+ for (TRowId rowId2 : xrange(part.Stat.Rows)) {
TTouchEnv bTreeEnv, flatEnv;
TChargeBTreeIndex bTree(&bTreeEnv, part, tags, true);
TCharge flat(&flatEnv, part, tags, true);
@@ -391,7 +402,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
TString message = TStringBuilder() << (reverse ? "ChargeRowIdReverse " : "ChargeRowId ") << rowId1 << " " << rowId2;
DoChargeRowId(bTree, bTreeEnv, rowId1, rowId2, 0, 0, reverse, *keyDefaults, message);
DoChargeRowId(flat, flatEnv, rowId1, rowId2, 0, 0, reverse, *keyDefaults, message);
- AssertLoadTheSame(part, bTreeEnv, flatEnv, message);
+ AssertLoadedTheSame(part, bTreeEnv, flatEnv, message);
}
}
}
@@ -418,15 +429,11 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
}
message << ")";
- bool bTreeOvershot = DoChargeKeys(bTree, bTreeEnv, key1, key2, 0, 0, reverse, *keyDefaults, message);
- bool flatOvershot = DoChargeKeys(flat, flatEnv, key1, key2, 0, 0, reverse, *keyDefaults, message);
+ bool bTreeOvershot = DoChargeKeys(part, bTree, bTreeEnv, key1, key2, 0, 0, reverse, *keyDefaults, message);
+ bool flatOvershot = DoChargeKeys(part, flat, flatEnv, key1, key2, 0, 0, reverse, *keyDefaults, message);
- // TODO
- // UNIT_ASSERT_VALUES_EQUAL_C(bTreeOvershot, flatOvershot, message);
- Y_UNUSED(bTreeOvershot);
- Y_UNUSED(flatOvershot);
-
- AssertLoadTheSame(part, bTreeEnv, flatEnv, message);
+ UNIT_ASSERT_VALUES_EQUAL_C(bTreeOvershot, flatOvershot, message);
+ AssertLoadedTheSame(part, bTreeEnv, flatEnv, message, true);
}
}
}
@@ -444,10 +451,8 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
CheckChargeRowId(part, tags, eggs.Scheme->Keys.Get(), false);
CheckChargeRowId(part, tags, eggs.Scheme->Keys.Get(), true);
- // TODO: isn't working yet
- // CheckChargeKeys(part, tags, eggs.Scheme->Keys.Get(), false);
- // CheckChargeKeys(part, tags, eggs.Scheme->Keys.Get(), true);
- // TODO: mixed
+ CheckChargeKeys(part, tags, eggs.Scheme->Keys.Get(), false);
+ CheckChargeKeys(part, tags, eggs.Scheme->Keys.Get(), true);
}
Y_UNIT_TEST(NoNodes) {
@@ -470,7 +475,7 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIteration) {
UNIT_ASSERT_VALUES_EQUAL_C(bTree.GetRowId(), flat.GetRowId(), message);
}
- EReady SeekKey(TRunIt& iter, TTouchEnv& env, ESeek seek, bool reverse, TCells key, const TString& message, ui32 failsAllowed = 10) {
+ EReady Seek(TRunIt& iter, TTouchEnv& env, ESeek seek, bool reverse, TCells key, const TString& message, ui32 failsAllowed = 10) {
return Retry([&]() {
return reverse ? iter.SeekReverse(key, seek) : iter.Seek(key, seek);
}, env, message, failsAllowed);
@@ -482,7 +487,22 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIteration) {
}, env, message, failsAllowed);
}
- void CheckIterateKey(const TPartEggs& eggs) {
+ void Charge(const TRun &run, const TVector<TTag> tags, TTouchEnv& env, const TCells key1, const TCells key2, ui64 itemsLimit, ui64 bytesLimit,
+ bool reverse, const TKeyCellDefaults &keyDefaults, const TString& message, ui32 failsAllowed = 10) {
+ while (true) {
+ auto result = reverse
+ ? ChargeRangeReverse(&env, key1, key2, run, keyDefaults, tags, itemsLimit, bytesLimit, true)
+ : ChargeRange(&env, key1, key2, run, keyDefaults, tags, itemsLimit, bytesLimit, true);
+ if (result) {
+ return;
+ }
+ env.LoadTouched(true);
+ UNIT_ASSERT_C(failsAllowed--, "Too many fails " + message);
+ }
+ Y_UNREACHABLE();
+ }
+
+ void CheckIterate(const TPartEggs& eggs) {
const auto part = *eggs.Lone();
TRun btreeRun(*eggs.Scheme->Keys), flatRun(*eggs.Scheme->Keys);
@@ -512,18 +532,18 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIteration) {
TRunIt bTree(btreeRun, tags, eggs.Scheme->Keys, &bTreeEnv);
{
- TStringBuilder message = TStringBuilder() << (reverse ? "SeekKeyReverse" : "SeekKey") << "(" << seek << ") ";
+ TStringBuilder message = TStringBuilder() << (reverse ? "IterateReverse" : "Iterate") << "(" << seek << ") ";
for (auto c : key) {
message << c.AsValue<ui32>() << " ";
}
- EReady bTreeReady = SeekKey(bTree, bTreeEnv, seek, reverse, key, message);
- EReady flatReady = SeekKey(flat, flatEnv, seek, reverse, key, message);
+ EReady bTreeReady = Seek(bTree, bTreeEnv, seek, reverse, key, message);
+ EReady flatReady = Seek(flat, flatEnv, seek, reverse, key, message);
AssertEqual(bTree, bTreeReady, flat, flatReady, message);
- AssertLoadTheSame(part, bTreeEnv, flatEnv, message);
+ AssertLoadedTheSame(part, bTreeEnv, flatEnv, message);
}
for (ui32 steps = 1; steps <= 10; steps++) {
- TStringBuilder message = TStringBuilder() << (reverse ? "SeekKeyReverse" : "SeekKey") << "(" << seek << ") ";
+ TStringBuilder message = TStringBuilder() << (reverse ? "IterateReverse" : "Iterate") << "(" << seek << ") ";
for (auto c : key) {
message << c.AsValue<ui32>() << " ";
}
@@ -531,7 +551,58 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIteration) {
EReady bTreeReady = Next(bTree, bTreeEnv, reverse, message);
EReady flatReady = Next(flat, flatEnv, reverse, message);
AssertEqual(bTree, bTreeReady, flat, flatReady, message);
- AssertLoadTheSame(part, bTreeEnv, flatEnv, message);
+ AssertLoadedTheSame(part, bTreeEnv, flatEnv, message);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ void CheckCharge(const TPartEggs& eggs) {
+ const auto part = *eggs.Lone();
+
+ TRun btreeRun(*eggs.Scheme->Keys), flatRun(*eggs.Scheme->Keys);
+ auto flatPart = part.CloneWithEpoch(part.Epoch);
+ for (auto& slice : *part.Slices) {
+ btreeRun.Insert(eggs.Lone(), slice);
+ auto pages = (TVector<TBtreeIndexMeta>*)&flatPart->IndexPages.BTreeGroups;
+ pages->clear();
+ pages = (TVector<TBtreeIndexMeta>*)&flatPart->IndexPages.BTreeHistoric;
+ pages->clear();
+ flatRun.Insert(flatPart, slice);
+ }
+
+ auto tags = TVector<TTag>();
+ for (auto c : eggs.Scheme->Cols) {
+ tags.push_back(c.Tag);
+ }
+
+ for (bool reverse : {false, true}) {
+ for (ui32 firstCellKey1 : xrange<ui32>(0, part.Stat.Rows / 7 + 1)) {
+ for (ui32 secondCellKey1 : xrange<ui32>(0, 14)) {
+ for (ui32 firstCellKey2 : xrange<ui32>(0, part.Stat.Rows / 7 + 1)) {
+ for (ui32 secondCellKey2 : xrange<ui32>(0, 14)) {
+ TVector<TCell> key1 = MakeKey(firstCellKey1, secondCellKey1);
+ TVector<TCell> key2 = MakeKey(firstCellKey2, secondCellKey2);
+
+ TTouchEnv bTreeEnv, flatEnv;
+
+ TStringBuilder message = TStringBuilder() << (reverse ? "ChargeReverse " : "Charge ") << "(";
+ for (auto c : key1) {
+ message << c.AsValue<ui32>() << " ";
+ }
+ message << ") (";
+ for (auto c : key2) {
+ message << c.AsValue<ui32>() << " ";
+ }
+ message << ")";
+
+ // TODO: limits
+ Charge(btreeRun, tags, bTreeEnv, key1, key2, 0, 0, reverse, *eggs.Scheme->Keys, message);
+ Charge(flatRun, tags, flatEnv, key1, key2, 0, 0, reverse, *eggs.Scheme->Keys, message);
+
+ AssertLoadedTheSame(part, bTreeEnv, flatEnv, message);
}
}
}
@@ -543,7 +614,8 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIteration) {
TPartEggs eggs = MakePart(slices, levels);
const auto part = *eggs.Lone();
- CheckIterateKey(eggs);
+ CheckIterate(eggs);
+ CheckCharge(eggs);
}
Y_UNIT_TEST(NoNodes_SingleSlice) {
diff --git a/ydb/core/tablet_flat/ut/ut_charge.cpp b/ydb/core/tablet_flat/ut/ut_charge.cpp
index ca0bf896af..e5072d3a44 100644
--- a/ydb/core/tablet_flat/ut/ut_charge.cpp
+++ b/ydb/core/tablet_flat/ut/ut_charge.cpp
@@ -516,16 +516,18 @@ Y_UNIT_TEST_SUITE(Charge) {
{TGroupId{2}, {}}
});
+ // key 0 transforms into row id 0 because it's before the slice first key 1
me.To(101).CheckByKeys(0, 9, 0, TMap<TGroupId, TArr>{
{TGroupId{0}, {0, 1, 2, 3_I}},
- {TGroupId{1}, {0_g, 1, 2, 3_g}},
- {TGroupId{2}, {0_g, 1_g, 2_g, 3, 4, 5, 6_g}}
+ {TGroupId{1}, {0, 1, 2, 3_g}},
+ {TGroupId{2}, {0, 1, 2, 3, 4, 5, 6_g}}
});
+ // key 1 also transforms into row id 0 because it's before the slice first key 1
me.To(102).CheckByKeys(1, 9, 0, TMap<TGroupId, TArr>{
{TGroupId{0}, {0, 1, 2, 3_I}},
- {TGroupId{1}, {0_g, 1, 2, 3_g}},
- {TGroupId{2}, {0_g, 1_g, 2_g, 3, 4, 5, 6_g}}
+ {TGroupId{1}, {0, 1, 2, 3_g}},
+ {TGroupId{2}, {0, 1, 2, 3, 4, 5, 6_g}}
});
me.To(103).CheckByKeys(2, 9, 0, TMap<TGroupId, TArr>{
@@ -688,10 +690,11 @@ Y_UNIT_TEST_SUITE(Charge) {
{TGroupId{2}, {}}
});
+ // key 35 transforms into row id 26 because it's after the slice last key 35
me.To(301).CheckByKeys(27, 35, 0, TMap<TGroupId, TArr>{
{TGroupId{0}, {6, 7, 8}},
- {TGroupId{1}, {10, 11, 12_g, 13_g}},
- {TGroupId{2}, {20_g, 21, 22, 23, 24_g, 25_g, 26_g}}
+ {TGroupId{1}, {10, 11, 12, 13}},
+ {TGroupId{2}, {20_g, 21, 22, 23, 24, 25, 26}}
});
me.To(302).CheckByKeys(27, 34, 0, TMap<TGroupId, TArr>{
@@ -801,9 +804,11 @@ Y_UNIT_TEST_SUITE(Charge) {
/*_ 1xx: custom spanned loads scenarios */
- me.To(101).CheckByKeys(0, 35, 8 /* rows */, { 0, 1, 2, 3_I });
- me.To(102).CheckByKeys(0, 35, 11 /* rows */, { 0, 1, 2, 3, 4_I });
- me.To(103).CheckByKeys(0, 35, 14 /* rows */, { 0, 1, 2, 3, 4, 5_I });
+ // key 0 transforms into row id 0 because it's before the slice first key 1
+ me.To(101).CheckByKeys(0, 35, 8 /* rows */, { 0, 1, 2 });
+ me.To(102).CheckByKeys(0, 35, 11 /* rows */, { 0, 1, 2, 3 });
+ me.To(103).CheckByKeys(0, 35, 14 /* rows */, { 0, 1, 2, 3, 4 });
+
me.To(104).CheckByKeys(3, 35, 5 /* rows */, { 0, 1, 2 });
me.To(105).CheckByKeys(3, 35, 6 /* rows */, { 0, 1, 2, 3_I });
me.To(106).CheckByKeys(4, 35, 6 /* rows */, { 0, 1, 2, 3 });
@@ -815,7 +820,7 @@ Y_UNIT_TEST_SUITE(Charge) {
/*_ 2xx: one row charge limit on two page */
- for (const ui16 page : xrange(4)) {
+ for (const ui16 page : xrange(1, 5)) {
const TArr span1{ page, operator""_I(page + 1) };
const TArr span2{ page, page + 1 };
@@ -860,9 +865,10 @@ Y_UNIT_TEST_SUITE(Charge) {
{TGroupId{2}, {11_g, 10_g, 9_g, 8, 7, 6, 5, 4, 3, 2_g}}
});
+ // key 1 transforms into row id 0 because it's before the slice first key 1
me.To(104).CheckByKeysReverse(15, 1, 0, TMap<TGroupId, TArr>{
{TGroupId{0}, {3, 2, 1, 0}},
- {TGroupId{2}, {11_g, 10_g, 9_g, 8, 7, 6, 5, 4, 3, 2_g, 1_g, 0_g}}
+ {TGroupId{2}, {11_g, 10_g, 9_g, 8, 7, 6, 5, 4, 3, 2, 1, 0}}
});
me.To(105).CheckByKeysReverse(15, 0, 0, TMap<TGroupId, TArr>{
@@ -890,9 +896,10 @@ Y_UNIT_TEST_SUITE(Charge) {
{TGroupId{2}, {26_g, 25_g, 24_g}}
});
+ // key 35 transforms into row id 26 because it's after the slice last key 35
me.To(110).CheckByKeysReverse(35, 32, 0, TMap<TGroupId, TArr>{
{TGroupId{0}, {8, 7, 6_I}},
- {TGroupId{2}, {26_g, 25_g, 24_g}}
+ {TGroupId{2}, {26, 25, 24}}
});
me.To(111).CheckByKeysReverse(4, 1, 0, TMap<TGroupId, TArr>{
diff --git a/ydb/core/tablet_flat/ut/ut_part.cpp b/ydb/core/tablet_flat/ut/ut_part.cpp
index 9d5c21c447..51203cb2b0 100644
--- a/ydb/core/tablet_flat/ut/ut_part.cpp
+++ b/ydb/core/tablet_flat/ut/ut_part.cpp
@@ -952,7 +952,7 @@ Y_UNIT_TEST_SUITE(TPart) {
return TSerializedCellVec(key);
};
- auto slices = MakeIntrusive<TSlices>();
+ TSlices slices;
for (size_t rowId = 0; rowId < fullRows.size();) {
TSlice slice;
slice.FirstInclusive = true;
@@ -963,16 +963,25 @@ Y_UNIT_TEST_SUITE(TPart) {
slice.LastRowId = rowId + RandomNumber<ui32>(2) + 1;
if (slice.LastRowId < fullRows.size())
slice.LastKey = getKey(IndexTools::GetRecord(*cutPartTmp, slice.LastRowId));
- slices->push_back(slice);
+ slices.push_back(slice);
rowId = slice.LastRowId;
}
Cerr << "======= SLICES =======" << Endl;
- slices->Describe(Cerr);
+ slices.Describe(Cerr);
Cerr << Endl;
- TCheckIt cutWrap(cutCook.Finish(), { new TTouchEnv() }, slices), fullWrap(fullCook.Finish(), { new TTouchEnv() });
- TCheckReverseIt cutWrapR(cutCookR.Finish(), { new TTouchEnv() }, slices), fullWrapR(fullCookR.Finish(), { new TTouchEnv() });
+ auto cutEggs = cutCook.Finish(), cutEggsR = cutCookR.Finish();
+ for (auto& eggs : {cutEggs, cutEggsR}) {
+ auto partSlices = (TSlices*)eggs.Lone()->Slices.Get();
+ partSlices->clear();
+ for (auto s : slices) {
+ partSlices->push_back(s);
+ }
+ }
+
+ TCheckIt cutWrap(cutEggs, { new TTouchEnv() }), fullWrap(fullCook.Finish(), { new TTouchEnv() });
+ TCheckReverseIt cutWrapR(cutEggsR, { new TTouchEnv() }), fullWrapR(fullCookR.Finish(), { new TTouchEnv() });
auto cutPart = (*cutWrap).Eggs.Lone();
auto fullPart = (*fullWrap).Eggs.Lone();
@@ -984,6 +993,8 @@ Y_UNIT_TEST_SUITE(TPart) {
Cerr << DumpPart(*fullPart, 2) << Endl;
UNIT_ASSERT_GT(fullPart->IndexesRawSize, cutPart->IndexesRawSize);
+ UNIT_ASSERT_GT(cutPart->Slices->size(), 1);
+ UNIT_ASSERT_VALUES_EQUAL(fullPart->Slices->size(), 1);
for (auto r : fullRows) {
cutWrap.Has(*TSchemedCookRow(*lay).Col(r.first, r.second));