summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkungurtsev <[email protected]>2024-02-09 15:24:27 +0500
committerGitHub <[email protected]>2024-02-09 11:24:27 +0100
commitbb72994139ec070703dad699862e28099476596a (patch)
tree98efb4a9b4ade077685e81dd198c049b3364ff8f
parenta642f2b67b562368be6037e5df7deabdef058876 (diff)
BTreeIndex Bugfix partially-loaded tree (#1674)
-rw-r--r--ydb/core/tablet_flat/flat_part_charge_btree_index.h424
-rw-r--r--ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp82
2 files changed, 256 insertions, 250 deletions
diff --git a/ydb/core/tablet_flat/flat_part_charge_btree_index.h b/ydb/core/tablet_flat/flat_part_charge_btree_index.h
index ee091ff8e09..4207f85f69f 100644
--- a/ydb/core/tablet_flat/flat_part_charge_btree_index.h
+++ b/ydb/core/tablet_flat/flat_part_charge_btree_index.h
@@ -16,20 +16,25 @@ class TChargeBTreeIndex : public ICharge {
struct TChildState {
TPageId PageId;
- TRowId BeginRowId;
- TRowId EndRowId;
+ TRowId BeginRowId, EndRowId;
+ TRowId PrevItems, Items;
+ ui64 PrevBytes, Bytes;
- TChildState(TPageId pageId, TRowId beginRowId, TRowId endRowId)
+ TChildState(TPageId pageId, TRowId beginRowId, TRowId endRowId, TRowId prevItems, TRowId items, ui64 prevDataSize, ui64 dataSize)
: PageId(pageId)
, BeginRowId(beginRowId)
, EndRowId(endRowId)
+ , PrevItems(prevItems)
+ , Items(items)
+ , PrevBytes(prevDataSize)
+ , Bytes(dataSize)
{
}
};
struct TNodeState : TChildState, TBtreeIndexNode {
- TNodeState(TSharedData data, TPageId pageId, TRowId beginRowId, TRowId endRowId)
- : TChildState(pageId, beginRowId, endRowId)
+ TNodeState(TSharedData data, TPageId pageId, TRowId beginRowId, TRowId endRowId, TRowId prevItems, TRowId items, ui64 prevDataSize, ui64 dataSize)
+ : TChildState(pageId, beginRowId, endRowId, prevItems, items, prevDataSize, dataSize)
, TBtreeIndexNode(data)
{
}
@@ -63,17 +68,13 @@ public:
TResult Do(TCells key1, TCells key2, TRowId beginRowId, TRowId endRowId,
const TKeyCellDefaults &keyDefaults, ui64 itemsLimit, ui64 bytesLimit) const noexcept override {
endRowId++; // current interface accepts inclusive row2 bound
- Y_ABORT_UNLESS(beginRowId < endRowId);
-
- bool ready = true, overshot = true;
- bool hasValidRowsRange = Groups || IncludeHistory; // false value means that beginRowId, endRowId are invalid and shouldn't be used
- ui64 chargeGroupsItemsLimit = itemsLimit; // pessimistic items limit for groups
- TRowId beginBytesLimitRowId = Max<TRowId>(); // first unloaded probably needed row
+ bool ready = true, overshot = true, hasValidRowsRange = Groups || IncludeHistory;
+ const TRowId sliceBeginRowId = beginRowId, sliceEndRowId = endRowId;
const auto& meta = Part->IndexPages.BTreeGroups[0];
+ Y_ABORT_UNLESS(beginRowId < endRowId);
Y_ABORT_UNLESS(endRowId <= meta.RowCount);
- const TRowId sliceEndRowId = endRowId;
if (Y_UNLIKELY(key1 && key2 && Compare(key1, key2, keyDefaults) > 0)) {
key2 = key1; // will not go further than key1
hasValidRowsRange = false;
@@ -82,71 +83,47 @@ public:
TVector<TNodeState> level, nextLevel(::Reserve(3));
TPageId key1PageId = key1 ? meta.PageId : Max<TPageId>();
TPageId key2PageId = key2 ? meta.PageId : Max<TPageId>();
- ui64 key1Items = 0, prevKey1Items = 0;
+ TChildState firstChild = BuildRootChildState(meta);
const auto iterateLevel = [&](const auto& tryHandleChild) {
// tryHandleChild may update them, copy for simplicity
- // always load beginRowId regardless of keys
- const TRowId levelBeginRowId = beginRowId, levelEndRowId = Max(endRowId, beginRowId + 1);
- const TChild* levelFirstChild = nullptr;
-
+ const TRowId levelBeginRowId = beginRowId, levelEndRowId = endRowId;
+
for (const auto &node : level) {
if (node.EndRowId <= levelBeginRowId || node.BeginRowId >= levelEndRowId) {
continue;
}
-
TRecIdx from = 0, to = node.GetChildrenCount();
- if (node.BeginRowId < levelBeginRowId) {
+ if (node.BeginRowId <= levelBeginRowId) {
from = node.Seek(levelBeginRowId);
+ if (firstChild.PageId != Max<TPageId>()) { // still valid and should be updated
+ auto& child = node.GetChild(from);
+ auto prevChild = from ? node.GetChildRef(from - 1) : nullptr;
+ firstChild = BuildChildState(node, child, prevChild);
+ }
}
if (node.EndRowId > levelEndRowId) {
to = node.Seek(levelEndRowId - 1) + 1;
}
+
for (TRecIdx pos : xrange(from, to)) {
- auto child = node.GetChildRef(pos);
+ auto child = node.GetChild(pos);
auto prevChild = pos ? node.GetChildRef(pos - 1) : nullptr;
- TRowId childBeginRowId = prevChild ? prevChild->RowCount : node.BeginRowId;
- TRowId childEndRowId = child->RowCount;
- ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId));
- if (itemsLimit || bytesLimit) {
- if (!levelFirstChild) {
- // do not apply limits on the first child because beginRowId/key1 position is uncertain
- levelFirstChild = child;
- } else {
- if (itemsLimit) {
- ui64 items = child->GetNonErasedRowCount() - levelFirstChild->GetNonErasedRowCount();
- if (LimitExceeded(items, itemsLimit)) {
- overshot = false;
- return;
- }
- }
- if (bytesLimit) {
- ui64 bytes = child->DataSize - levelFirstChild->DataSize;
- if (LimitExceeded(bytes, bytesLimit)) {
- endRowId = Min(endRowId, childEndRowId);
- overshot = false;
- return;
- }
- }
- }
+ auto childState = BuildChildState(node, child, prevChild);
+ if (LimitExceeded(firstChild.Items, childState.PrevItems, itemsLimit) || LimitExceeded(firstChild.Bytes, childState.PrevBytes, bytesLimit)) {
+ endRowId = Min(endRowId, childState.BeginRowId);
+ return;
}
+ ready &= tryHandleChild(childState);
}
}
};
const auto skipUnloadedRows = [&](const TChildState& child) {
+ if (child.PageId == firstChild.PageId) {
+ firstChild.PageId = Max<TPageId>(); // mark first child unloaded
+ }
if (child.PageId == key1PageId) {
- if (hasValidRowsRange && chargeGroupsItemsLimit) {
- ui64 unloadedItems = key1Items - prevKey1Items;
- if (unloadedItems < chargeGroupsItemsLimit) {
- chargeGroupsItemsLimit -= unloadedItems;
- } else {
- hasValidRowsRange = false;
- }
- }
- if (hasValidRowsRange && bytesLimit) {
- beginBytesLimitRowId = Max(beginRowId, child.BeginRowId);
- }
beginRowId = Max(beginRowId, child.EndRowId);
}
if (child.PageId == key2PageId) {
@@ -154,19 +131,16 @@ public:
}
};
- const auto tryHandleNode = [&](TChildState child) -> bool {
- if (child.PageId == key1PageId || child.PageId == key2PageId) {
+ const auto tryHandleNode = [&](const TChildState& child) -> bool {
+ if (child.PageId == firstChild.PageId || child.PageId == key1PageId || child.PageId == key2PageId) {
if (TryLoadNode(child, nextLevel)) {
const auto& node = nextLevel.back();
if (child.PageId == key1PageId) {
TRecIdx pos = node.Seek(ESeek::Lower, key1, Scheme.Groups[0].ColsKeyIdx, &keyDefaults);
auto& key1Child = node.GetChild(pos);
key1PageId = key1Child.PageId;
- key1Items = key1Child.GetNonErasedRowCount();
if (pos) {
- auto& prevKey1Child = node.GetChild(pos - 1);
- prevKey1Items = prevKey1Child.GetNonErasedRowCount();
- beginRowId = Max(beginRowId, prevKey1Child.RowCount); // move beginRowId to the first key >= key1
+ beginRowId = Max(beginRowId, node.GetChild(pos - 1).RowCount); // move beginRowId to the first key >= key1
}
}
if (child.PageId == key2PageId) {
@@ -174,8 +148,9 @@ public:
auto& key2Child = node.GetChild(pos);
key2PageId = key2Child.PageId;
endRowId = Min(endRowId, key2Child.RowCount + 1); // move endRowId - 1 to the first key > key2
- if (key2Child.RowCount <= beginRowId) {
+ if (key2Child.RowCount <= sliceBeginRowId) {
hasValidRowsRange = false; // key2 is before current slice
+ endRowId = Max(endRowId, sliceBeginRowId + 1); // always load sliceBeginRowId regardless of key2
}
}
return true;
@@ -188,7 +163,7 @@ public:
}
};
- const auto tryHandleDataPage = [&](TChildState child) -> bool {
+ const auto tryHandleDataPage = [&](const TChildState& child) -> bool {
if (hasValidRowsRange && (child.PageId == key1PageId || child.PageId == key2PageId)) {
const auto page = TryGetDataPage(child.PageId, { });
if (page) {
@@ -211,19 +186,18 @@ public:
}
};
- for (ui32 height = 0; height < meta.LevelCount && ready; height++) {
+ for (ui32 height = 0; height < meta.LevelCount; height++) {
if (height == 0) {
- ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount));
+ ready &= tryHandleNode(BuildRootChildState(meta));
} else {
iterateLevel(tryHandleNode);
}
level.swap(nextLevel);
nextLevel.clear();
- }
-
- if (!ready) { // some index pages are missing, do not continue
- ready &= DoGroupsAndHistory(hasValidRowsRange, beginRowId, endRowId, beginBytesLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds
- return {ready, false};
+ if (firstChild.PageId == Max<TPageId>()) { // first child is unloaded, consider all first's child rows are needed for next levels
+ firstChild.Items = firstChild.PrevItems;
+ firstChild.Bytes = firstChild.PrevBytes;
+ }
}
// flat index doesn't treat key placement within data page, so let's do the same
@@ -231,12 +205,12 @@ public:
overshot &= endRowId == sliceEndRowId;
if (meta.LevelCount == 0) {
- ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount));
+ ready &= tryHandleDataPage(BuildRootChildState(meta));
} else {
iterateLevel(tryHandleDataPage);
}
- ready &= DoGroupsAndHistory(hasValidRowsRange, beginRowId, endRowId, beginBytesLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds
+ ready &= DoGroupsAndHistory(hasValidRowsRange, beginRowId, endRowId, firstChild, itemsLimit, bytesLimit); // precharge groups using the latest row bounds
return {ready, overshot};
}
@@ -244,17 +218,13 @@ public:
TResult DoReverse(TCells key1, TCells key2, TRowId endRowId, TRowId beginRowId,
const TKeyCellDefaults &keyDefaults, ui64 itemsLimit, ui64 bytesLimit) const noexcept override {
endRowId++; // current interface accepts inclusive row1 bound
- Y_ABORT_UNLESS(beginRowId < endRowId);
- bool ready = true, overshot = true;
- bool hasValidRowsRange = Groups || IncludeHistory; // false value means that beginRowId, endRowId are invalid and shouldn't be used
- ui64 chargeGroupsItemsLimit = itemsLimit; // pessimistic items limit for groups
- TRowId endBytesLimitRowId = Max<TRowId>(); // last unloaded probably needed row
-
+ bool ready = true, overshot = true, hasValidRowsRange = Groups || IncludeHistory;
+ const TRowId sliceBeginRowId = beginRowId, sliceEndRowId = endRowId;
const auto& meta = Part->IndexPages.BTreeGroups[0];
+ Y_ABORT_UNLESS(beginRowId < endRowId);
Y_ABORT_UNLESS(endRowId <= meta.RowCount);
- const TRowId sliceBeginRowId = beginRowId;
if (Y_UNLIKELY(key1 && key2 && Compare(key2, key1, keyDefaults) > 0)) {
key2 = key1; // will not go further than key1
hasValidRowsRange = false;
@@ -264,74 +234,47 @@ public:
TVector<TNodeState> level, nextLevel(::Reserve(3));
TPageId key1PageId = key1 ? meta.PageId : Max<TPageId>();
TPageId key2PageId = key2 ? meta.PageId : Max<TPageId>();
- ui64 prevKey1Items = 0, key1Items = 0;
+ TChildState lastChild = BuildRootChildState(meta);
const auto iterateLevel = [&](const auto& tryHandleChild) {
// tryHandleChild may update them, copy for simplicity
- // always load endRowId - 1 regardless of keys
- const TRowId levelBeginRowId = Min(beginRowId, endRowId - 1), levelEndRowId = endRowId;
- const TChild *levelLastChild = nullptr, *levelPrevLastChild = nullptr;
+ const TRowId levelBeginRowId = beginRowId, levelEndRowId = endRowId;
for (const auto &node : level) {
if (node.EndRowId <= levelBeginRowId || node.BeginRowId >= levelEndRowId) {
continue;
}
-
- TRecIdx from = 0, to = node.GetChildrenCount();
+ TRecIdx from = 0, to = node.GetKeysCount();
if (node.BeginRowId < levelBeginRowId) {
from = node.Seek(levelBeginRowId);
}
- if (node.EndRowId > levelEndRowId) {
- to = node.Seek(levelEndRowId - 1) + 1;
+ if (node.EndRowId >= levelEndRowId) {
+ to = node.Seek(levelEndRowId - 1);
+ if (lastChild.PageId != Max<TPageId>()) { // still valid and should be updated
+ auto& child = node.GetChild(to);
+ auto prevChild = to ? node.GetChildRef(to - 1) : nullptr;
+ lastChild = BuildChildState(node, child, prevChild);
+ }
}
- for (TRecIdx posExt = to; posExt > from; posExt--) {
- auto child = node.GetChildRef(posExt - 1);
+
+ for (TRecIdx posExt = to + 1; posExt > from; posExt--) {
+ auto& child = node.GetChild(posExt - 1);
auto prevChild = posExt - 1 ? node.GetChildRef(posExt - 2) : nullptr;
- TRowId childBeginRowId = prevChild ? prevChild->RowCount : node.BeginRowId;
- TRowId childEndRowId = child->RowCount;
- if (itemsLimit || bytesLimit) {
- if (!levelLastChild) {
- // do not apply limits on the last child because endRowId/key1 position is uncertain
- levelLastChild = child;
- } else {
- if (!levelPrevLastChild) {
- levelPrevLastChild = child;
- }
- if (itemsLimit) {
- ui64 items = levelPrevLastChild->GetNonErasedRowCount() - child->GetNonErasedRowCount();
- if (LimitExceeded(items, itemsLimit)) {
- overshot = false;
- return;
- }
- }
- if (bytesLimit) {
- ui64 bytes = levelPrevLastChild->DataSize - child->DataSize;
- if (LimitExceeded(bytes, bytesLimit)) {
- beginRowId = Max(beginRowId, childEndRowId);
- overshot = false;
- return;
- }
- }
- }
+ auto childState = BuildChildState(node, child, prevChild);
+ if (LimitExceeded(childState.Items, lastChild.PrevItems, itemsLimit) || LimitExceeded(childState.Bytes, lastChild.PrevBytes, bytesLimit)) {
+ beginRowId = Max(beginRowId, childState.EndRowId);
+ return;
}
- ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId));
+ ready &= tryHandleChild(childState);
}
}
};
const auto skipUnloadedRows = [&](const TChildState& child) {
+ if (child.PageId == lastChild.PageId) {
+ lastChild.PageId = Max<TPageId>(); // mark last child unloaded
+ }
if (child.PageId == key1PageId) {
- if (hasValidRowsRange && chargeGroupsItemsLimit) {
- ui64 unloadedItems = key1Items - prevKey1Items;
- if (unloadedItems < chargeGroupsItemsLimit) {
- chargeGroupsItemsLimit -= unloadedItems;
- } else {
- hasValidRowsRange = false;
- }
- }
- if (hasValidRowsRange && bytesLimit) {
- endBytesLimitRowId = Min(endRowId, child.EndRowId);
- }
endRowId = Min(endRowId, child.BeginRowId);
}
if (child.PageId == key2PageId) {
@@ -339,19 +282,14 @@ public:
}
};
- const auto tryHandleNode = [&](TChildState child) -> bool {
- if (child.PageId == key1PageId || child.PageId == key2PageId) {
+ const auto tryHandleNode = [&](const TChildState& child) -> bool {
+ if (child.PageId == lastChild.PageId || child.PageId == key1PageId || child.PageId == key2PageId) {
if (TryLoadNode(child, nextLevel)) {
const auto& node = nextLevel.back();
if (child.PageId == key1PageId) {
TRecIdx pos = node.SeekReverse(ESeek::Lower, key1, Scheme.Groups[0].ColsKeyIdx, &keyDefaults);
auto& key1Child = node.GetChild(pos);
key1PageId = key1Child.PageId;
- key1Items = key1Child.GetNonErasedRowCount();
- if (pos) {
- auto& prevKey1Child = node.GetChild(pos - 1);
- prevKey1Items = prevKey1Child.GetNonErasedRowCount();
- }
endRowId = Min(endRowId, key1Child.RowCount); // move endRowId - 1 to the last key <= key1
}
if (child.PageId == key2PageId) {
@@ -360,8 +298,9 @@ public:
if (pos) {
auto& prevKey2Child = node.GetChild(pos - 1);
beginRowId = Max(beginRowId, prevKey2Child.RowCount - 1); // move beginRowId to the last key < key2
- if (prevKey2Child.RowCount >= endRowId) {
+ if (prevKey2Child.RowCount >= sliceEndRowId) {
hasValidRowsRange = false; // key2 is after current slice
+ beginRowId = Min(beginRowId, sliceEndRowId - 1); // always load endRowId - 1 regardless of keys
}
}
}
@@ -375,7 +314,7 @@ public:
}
};
- const auto tryHandleDataPage = [&](TChildState child) -> bool {
+ const auto tryHandleDataPage = [&](const TChildState& child) -> bool {
if (hasValidRowsRange && (child.PageId == key1PageId || child.PageId == key2PageId)) {
const auto page = TryGetDataPage(child.PageId, { });
if (page) {
@@ -408,19 +347,18 @@ public:
}
};
- for (ui32 height = 0; height < meta.LevelCount && ready; height++) {
+ for (ui32 height = 0; height < meta.LevelCount; height++) {
if (height == 0) {
- ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount));
+ ready &= tryHandleNode(BuildRootChildState(meta));
} else {
iterateLevel(tryHandleNode);
}
level.swap(nextLevel);
nextLevel.clear();
- }
-
- if (!ready) { // some index pages are missing, do not continue
- ready &= DoGroupsAndHistoryReverse(hasValidRowsRange, beginRowId, endRowId, endBytesLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds
- return {ready, false};
+ if (lastChild.PageId == Max<TPageId>()) { // last child is unloaded, consider all last's child rows are needed for next levels
+ lastChild.PrevItems = lastChild.Items;
+ lastChild.PrevBytes = lastChild.Bytes;
+ }
}
// flat index doesn't treat key placement within data page, so let's do the same
@@ -428,115 +366,122 @@ public:
overshot &= beginRowId == sliceBeginRowId;
if (meta.LevelCount == 0) {
- ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount));
+ ready &= tryHandleDataPage(BuildRootChildState(meta));
} else {
iterateLevel(tryHandleDataPage);
}
- ready &= DoGroupsAndHistoryReverse(hasValidRowsRange, beginRowId, endRowId, endBytesLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds
+ ready &= DoGroupsAndHistoryReverse(hasValidRowsRange, beginRowId, endRowId, lastChild, itemsLimit, bytesLimit); // precharge groups using the latest row bounds
return {ready, overshot};
}
private:
- bool DoGroupsAndHistory(bool hasValidRowsRange, TRowId beginRowId, TRowId endRowId, TRowId beginBytesLimitRowId, ui64 itemsLimit, ui64 bytesLimit) const noexcept {
+ bool DoGroupsAndHistory(bool hasValidRowsRange, TRowId beginRowId, TRowId endRowId, const TChildState& firstChild, ui64 itemsLimit, ui64 bytesLimit) const noexcept {
bool ready = true;
- if (hasValidRowsRange && beginRowId < endRowId) {
- if (itemsLimit && endRowId - beginRowId - 1 >= itemsLimit) {
- endRowId = beginRowId + itemsLimit + 1;
- }
-
- if (beginBytesLimitRowId == Max<TRowId>()) {
- beginBytesLimitRowId = beginRowId;
+ if (!hasValidRowsRange) {
+ return ready;
+ }
+ if (beginRowId >= endRowId) {
+ return ready;
+ }
- if (IncludeHistory) {
- ready &= DoHistory(beginRowId, endRowId);
- } // otherwise beginBytesLimitRowId is specified, so we do not know where to start charging history
+ if (itemsLimit) {
+ // TODO: items limit should be applied on items not rows, but it requires iteration via first and last data pages
+ TRowId limitFromRowId = firstChild.PageId == Max<TPageId>() ? firstChild.BeginRowId : beginRowId;
+ if (endRowId - limitFromRowId - 1 > itemsLimit) {
+ endRowId = limitFromRowId + itemsLimit + 1;
}
-
- for (auto groupIndex : Groups) {
- ready &= DoGroup(TGroupId(groupIndex), beginRowId, endRowId, beginBytesLimitRowId, bytesLimit);
+ if (beginRowId >= endRowId) {
+ return ready;
}
}
+ if (IncludeHistory && (!bytesLimit || firstChild.PageId != Max<TPageId>())) {
+ ready &= DoHistory(beginRowId, endRowId);
+ }
+
+ for (auto groupIndex : Groups) {
+ ready &= DoGroup(TGroupId(groupIndex), beginRowId, endRowId, firstChild.BeginRowId, bytesLimit);
+ }
+
return ready;
}
- bool DoGroupsAndHistoryReverse(bool hasValidRowsRange, TRowId beginRowId, TRowId endRowId, TRowId endBytesLimitRowId, ui64 itemsLimit, ui64 bytesLimit) const noexcept {
+ bool DoGroupsAndHistoryReverse(bool hasValidRowsRange, TRowId beginRowId, TRowId endRowId, const TChildState& lastChild, ui64 itemsLimit, ui64 bytesLimit) const noexcept {
bool ready = true;
- if (hasValidRowsRange && beginRowId < endRowId) {
- if (itemsLimit && endRowId - beginRowId - 1 >= itemsLimit) {
- beginRowId = endRowId - itemsLimit - 1;
+ if (!hasValidRowsRange) {
+ return ready;
+ }
+ if (beginRowId >= endRowId) {
+ return ready;
+ }
+
+ if (itemsLimit) {
+ // TODO: items limit should be applied on items not rows, but it requires iteration via first and last data pages
+ TRowId limitToRowId = lastChild.PageId == Max<TPageId>() ? lastChild.EndRowId : endRowId;
+ if (limitToRowId - beginRowId - 1 >= itemsLimit) {
+ beginRowId = limitToRowId - itemsLimit - 1;
+ }
+ if (beginRowId >= endRowId) {
+ return ready;
}
- if (endBytesLimitRowId == Max<TRowId>()) {
- endBytesLimitRowId = endRowId;
+ }
-
- if (IncludeHistory) {
- ready &= DoHistory(beginRowId, endRowId);
- }
- } // otherwise endBytesLimitRowId is specified, so we do not know where to start charging history
+ if (IncludeHistory && (!bytesLimit || lastChild.PageId != Max<TPageId>())) {
+ ready &= DoHistory(beginRowId, endRowId);
+ }
- for (auto groupIndex : Groups) {
- ready &= DoGroupReverse(TGroupId(groupIndex), beginRowId, endRowId, endBytesLimitRowId, bytesLimit);
- }
+ for (auto groupIndex : Groups) {
+ ready &= DoGroupReverse(TGroupId(groupIndex), beginRowId, endRowId, lastChild.EndRowId, bytesLimit);
}
return ready;
}
private:
- bool DoGroup(TGroupId groupId, TRowId beginRowId, TRowId endRowId, TRowId beginBytesLimitRowId, ui64 bytesLimit) const noexcept {
+ bool DoGroup(TGroupId groupId, TRowId beginRowId, TRowId endRowId, TRowId firstChildBeginRowId, ui64 bytesLimit) const noexcept {
bool ready = true;
-
const auto& meta = groupId.IsHistoric() ? Part->IndexPages.BTreeHistoric[groupId.Index] : Part->IndexPages.BTreeGroups[groupId.Index];
TVector<TNodeState> level, nextLevel(::Reserve(3));
- ui64 prevBeginDataSize = 0;
- ui64 prevBeginBytesLimitDataSize = bytesLimit ? GetPrevDataSize(meta, beginBytesLimitRowId) : 0;
+ ui64 firstChildPrevBytes = bytesLimit ? GetPrevBytes(meta, firstChildBeginRowId) : 0;
const auto iterateLevel = [&](const auto& tryHandleChild) {
- ui64 prevChildDataSize = prevBeginDataSize;
for (const auto &node : level) {
TRecIdx from = 0, to = node.GetChildrenCount();
if (node.BeginRowId < beginRowId) {
from = node.Seek(beginRowId);
- if (from) {
- prevChildDataSize = prevBeginDataSize = node.GetShortChild(from - 1).DataSize;
- }
}
if (node.EndRowId > endRowId) {
to = node.Seek(endRowId - 1) + 1;
}
+
for (TRecIdx pos : xrange(from, to)) {
- auto child = node.GetShortChildRef(pos);
+ auto& child = node.GetShortChild(pos);
auto prevChild = pos ? node.GetShortChildRef(pos - 1) : nullptr;
- TRowId childBeginRowId = prevChild ? prevChild->RowCount : node.BeginRowId;
- TRowId childEndRowId = child->RowCount;
- if (bytesLimit) {
- if (prevChildDataSize > prevBeginBytesLimitDataSize && LimitExceeded(prevChildDataSize - prevBeginBytesLimitDataSize, bytesLimit)) {
- return;
- }
+ auto childState = BuildChildState(node, child, prevChild);
+ if (LimitExceeded(firstChildPrevBytes, childState.PrevBytes, bytesLimit)) {
+ return;
}
- ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId));
- prevChildDataSize = child->DataSize;
+ ready &= tryHandleChild(childState);
}
}
};
- const auto tryHandleNode = [&](TChildState child) -> bool {
+ const auto tryHandleNode = [&](const TChildState& child) -> bool {
return TryLoadNode(child, nextLevel);
};
- const auto tryHandleDataPage = [&](TChildState child) -> bool {
+ const auto tryHandleDataPage = [&](const TChildState& child) -> bool {
return HasDataPage(child.PageId, groupId);
};
- for (ui32 height = 0; height < meta.LevelCount && ready; height++) {
+ for (ui32 height = 0; height < meta.LevelCount; height++) {
if (height == 0) {
- ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount));
+ ready &= tryHandleNode(BuildRootChildState(meta));
} else {
iterateLevel(tryHandleNode);
}
@@ -544,12 +489,8 @@ private:
nextLevel.clear();
}
- if (!ready) { // some index pages are missing, do not continue
- return ready;
- }
-
if (meta.LevelCount == 0) {
- ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount));
+ ready &= tryHandleDataPage(BuildRootChildState(meta));
} else {
iterateLevel(tryHandleDataPage);
}
@@ -557,50 +498,46 @@ private:
return ready;
}
- bool DoGroupReverse(TGroupId groupId, TRowId beginRowId, TRowId endRowId, TRowId endBytesLimitRowId, ui64 bytesLimit) const noexcept {
+ bool DoGroupReverse(TGroupId groupId, TRowId beginRowId, TRowId endRowId, TRowId lastChildEndRowId, ui64 bytesLimit) const noexcept {
bool ready = true;
-
const auto& meta = groupId.IsHistoric() ? Part->IndexPages.BTreeHistoric[groupId.Index] : Part->IndexPages.BTreeGroups[groupId.Index];
// level's nodes is in reverse order
TVector<TNodeState> level, nextLevel(::Reserve(3));
- ui64 endBytesLimitDataSize = bytesLimit ? GetDataSize(meta, endBytesLimitRowId - 1) : 0;
+ ui64 lastChildBytes = bytesLimit ? GetBytes(meta, lastChildEndRowId - 1) : 0;
const auto iterateLevel = [&](const auto& tryHandleChild) {
for (const auto &node : level) {
- TRecIdx from = 0, to = node.GetChildrenCount();
+ TRecIdx from = 0, to = node.GetKeysCount();
if (node.BeginRowId < beginRowId) {
from = node.Seek(beginRowId);
}
if (node.EndRowId > endRowId) {
- to = node.Seek(endRowId - 1) + 1;
+ to = node.Seek(endRowId - 1);
}
- for (TRecIdx posExt = to; posExt > from; posExt--) {
- auto child = node.GetShortChildRef(posExt - 1);
+ for (TRecIdx posExt = to + 1; posExt > from; posExt--) {
+ auto& child = node.GetShortChild(posExt - 1);
auto prevChild = posExt - 1 ? node.GetShortChildRef(posExt - 2) : nullptr;
- TRowId childBeginRowId = prevChild ? prevChild->RowCount : node.BeginRowId;
- TRowId childEndRowId = child->RowCount;
- if (bytesLimit) {
- if (endBytesLimitDataSize > child->DataSize && LimitExceeded(endBytesLimitDataSize - child->DataSize, bytesLimit)) {
- return;
- }
+ auto childState = BuildChildState(node, child, prevChild);
+ if (LimitExceeded(childState.Bytes, lastChildBytes, bytesLimit)) {
+ return;
}
- ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId));
+ ready &= tryHandleChild(childState);
}
}
};
- const auto tryHandleNode = [&](TChildState child) -> bool {
+ const auto tryHandleNode = [&](const TChildState& child) -> bool {
return TryLoadNode(child, nextLevel);
};
- const auto tryHandleDataPage = [&](TChildState child) -> bool {
+ const auto tryHandleDataPage = [&](const TChildState& child) -> bool {
return HasDataPage(child.PageId, groupId);
};
- for (ui32 height = 0; height < meta.LevelCount && ready; height++) {
+ for (ui32 height = 0; height < meta.LevelCount; height++) {
if (height == 0) {
- ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount));
+ ready &= tryHandleNode(BuildRootChildState(meta));
} else {
iterateLevel(tryHandleNode);
}
@@ -608,12 +545,8 @@ private:
nextLevel.clear();
}
- if (!ready) { // some index pages are missing, do not continue
- return ready;
- }
-
if (meta.LevelCount == 0) {
- ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount));
+ ready &= tryHandleDataPage(BuildRootChildState(meta));
} else {
iterateLevel(tryHandleDataPage);
}
@@ -645,11 +578,9 @@ private:
};
TCells key2{ key2Cells, 3 };
- // Directly use the history group scheme
+ // Directly use the history group scheme and key defaults with correct sort order
const auto& scheme = Part->Scheme->HistoryGroup;
Y_DEBUG_ABORT_UNLESS(scheme.ColsKeyIdx.size() == 3);
-
- // Directly use the history key defaults with correct sort order
const TKeyCellDefaults* keyDefaults = Part->Scheme->HistoryKeys.Get();
const TGroupId groupId(0, true);
@@ -670,9 +601,9 @@ private:
}
for (TRecIdx pos : xrange(from, to)) {
auto& child = node.GetShortChild(pos);
- TRowId childBeginRowId = pos ? node.GetShortChild(pos - 1).RowCount : node.BeginRowId;
- TRowId childEndRowId = child.RowCount;
- ready &= tryHandleChild(TChildState(child.PageId, childBeginRowId, childEndRowId));
+ auto prevChild = pos ? node.GetShortChildRef(pos - 1) : nullptr;
+ auto childState = BuildChildState(node, child, prevChild);
+ ready &= tryHandleChild(childState);
}
}
};
@@ -686,7 +617,7 @@ private:
}
};
- const auto tryHandleNode = [&](TChildState child) -> bool {
+ const auto tryHandleNode = [&](const TChildState& child) -> bool {
if (child.PageId == key1PageId || child.PageId == key2PageId) {
if (TryLoadNode(child, nextLevel)) {
const auto& node = nextLevel.back();
@@ -714,7 +645,7 @@ private:
}
};
- const auto tryHandleDataPage = [&](TChildState child) -> bool {
+ const auto tryHandleDataPage = [&](const TChildState& child) -> bool {
if (Groups && (child.PageId == key1PageId || child.PageId == key2PageId)) {
const auto page = TryGetDataPage(child.PageId, groupId);
if (page) {
@@ -739,7 +670,7 @@ private:
for (ui32 height = 0; height < meta.LevelCount; height++) {
if (height == 0) {
- ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount));
+ ready &= tryHandleNode(BuildRootChildState(meta));
} else {
iterateLevel(tryHandleNode);
}
@@ -748,7 +679,7 @@ private:
}
if (meta.LevelCount == 0) {
- ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount));
+ ready &= tryHandleDataPage(BuildRootChildState(meta));
} else {
iterateLevel(tryHandleDataPage);
}
@@ -771,7 +702,7 @@ private:
}
private:
- ui64 GetPrevDataSize(const TBtreeIndexMeta& meta, TRowId rowId) const {
+ ui64 GetPrevBytes(const TBtreeIndexMeta& meta, TRowId rowId) const {
TPageId pageId = meta.PageId;
ui64 result = 0;
@@ -791,7 +722,7 @@ private:
return result;
}
- ui64 GetDataSize(TBtreeIndexMeta meta, TRowId rowId) const {
+ ui64 GetBytes(TBtreeIndexMeta meta, TRowId rowId) const {
TPageId pageId = meta.PageId;
ui64 result = meta.DataSize;
@@ -845,8 +776,29 @@ private:
: (left.size() > right.size() ? -1 : 1);
}
- bool LimitExceeded(ui64 value, ui64 limit) const noexcept {
- return limit && value > limit;
+ TChildState BuildRootChildState(const TBtreeIndexMeta& meta) const noexcept {
+ return TChildState(meta.PageId,
+ 0, meta.RowCount,
+ 0, meta.GetNonErasedRowCount(),
+ 0, meta.DataSize);
+ }
+
+ TChildState BuildChildState(const TNodeState& parent, TChild child, const TChild* prevChild) const noexcept {
+ return TChildState(child.PageId,
+ prevChild ? prevChild->RowCount : parent.BeginRowId, child.RowCount,
+ prevChild ? prevChild->GetNonErasedRowCount() : parent.PrevItems, child.GetNonErasedRowCount(),
+ prevChild ? prevChild->DataSize : parent.PrevBytes, child.DataSize);
+ }
+
+ TChildState BuildChildState(const TNodeState& parent, TShortChild child, const TShortChild* prevChild) const noexcept {
+ return TChildState(child.PageId,
+ prevChild ? prevChild->RowCount : parent.BeginRowId, child.RowCount,
+ prevChild ? prevChild->RowCount : parent.BeginRowId, child.RowCount,
+ prevChild ? prevChild->DataSize : parent.PrevBytes, child.DataSize);
+ }
+
+ bool LimitExceeded(ui64 prev, ui64 current, ui64 limit) const noexcept {
+ return limit && current > prev && current - prev > limit;
}
private:
diff --git a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp
index 68b2433f468..4bb71c2a938 100644
--- a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp
+++ b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp
@@ -18,6 +18,10 @@ namespace {
struct TTouchEnv : public NTest::TTestEnv {
const TSharedData* TryGetPage(const TPart *part, TPageId pageId, TGroupId groupId) override
{
+ if (Sticky[groupId].contains(pageId)) {
+ Loaded[groupId].insert(pageId);
+ }
+
Touched[groupId].insert(pageId);
if (Loaded[groupId].contains(pageId)) {
return NTest::TTestEnv::TryGetPage(part, pageId, groupId);
@@ -32,12 +36,22 @@ namespace {
Touched.clear();
}
+ void StickLoaded() {
+ for (const auto &g : Loaded) {
+ Sticky[g.first].insert(g.second.begin(), g.second.end());
+ }
+ Touched.clear();
+ Loaded.clear();
+ }
+
TMap<TGroupId, TSet<TPageId>> Loaded;
TMap<TGroupId, TSet<TPageId>> Touched;
+ TMap<TGroupId, TSet<TPageId>> Sticky;
};
void AssertLoadedTheSame(const TPartStore& part, const TTouchEnv& bTree, const TTouchEnv& flat, const TString& message,
bool allowAdditionalFirstLastPartPages = false, bool allowAdditionalFirstLoadedPage = false, bool allowLastLoadedPageDifference = false) {
+
TSet<TGroupId> groupIds;
for (const auto &c : {bTree.Loaded, flat.Loaded}) {
for (const auto &g : c) {
@@ -79,15 +93,16 @@ namespace {
}
}
- struct TMakePartParams {
+ struct TTestParams {
const ui32 Levels = Max<ui32>();
const bool Groups = false;
const bool History = false;
const bool Slices = false;
const ui32 Rows = 40;
+ const bool StickSomePages = false;
};
- TPartEggs MakePart(TMakePartParams params) {
+ TPartEggs MakePart(TTestParams params) {
NPage::TConf conf;
switch (params.Levels) {
case 0:
@@ -315,7 +330,7 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIt) {
}
void CheckSeekKey(const TPartStore& part, const TKeyCellDefaults *keyDefaults) {
- for (bool reverse : {false}) {
+ for (bool reverse : {false, true}) {
for (ESeek seek : {ESeek::Exact, ESeek::Lower, ESeek::Upper}) {
for (ui32 firstCell : xrange<ui32>(0, part.Stat.Rows / 7 + 1)) {
for (ui32 secondCell : xrange<ui32>(0, 14)) {
@@ -370,7 +385,7 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIt) {
}
}
- void CheckPart(TMakePartParams params) {
+ void CheckPart(TTestParams params) {
TPartEggs eggs = MakePart(params);
const auto part = *eggs.Lone();
@@ -394,8 +409,23 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIt) {
}
Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
+ void StickSomePages(TTestParams params, const TPartStore& part, TTagsRef tags, const TKeyCellDefaults &keyDefaults, TTouchEnv& bTreeEnv, TTouchEnv& flatEnv) {
+ if (params.StickSomePages) {
+ TChargeBTreeIndex bTree(&bTreeEnv, part, tags, true);
+
+ for (int times = 0; times < 5; times++) {
+ bTree.ICharge::Do(10, 10, keyDefaults, 0, 0);
+ bTreeEnv.LoadTouched();
+ }
+
+ flatEnv.Loaded = bTreeEnv.Loaded;
+ flatEnv.StickLoaded();
+ bTreeEnv.StickLoaded();
+ }
+ }
+
void DoChargeRowId(ICharge& charge, TTouchEnv& env, const TRowId row1, const TRowId row2, ui64 itemsLimit, ui64 bytesLimit,
- bool reverse, const TKeyCellDefaults &keyDefaults, const TString& message, ui32 failsAllowed = 10) {
+ bool reverse, const TKeyCellDefaults &keyDefaults, const TString& message, ui32 failsAllowed = 15) {
while (true) {
bool ready = reverse
? charge.DoReverse(row2, row1, keyDefaults, itemsLimit, bytesLimit)
@@ -424,7 +454,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
Y_UNREACHABLE();
}
- void CheckChargeRowId(const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) {
+ void CheckChargeRowId(TTestParams params, const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) {
for (bool reverse : {false, true}) {
for (ui64 itemsLimit : TVector<ui64>{0, 1, 2, 5, 13, 19, part.Stat.Rows - 2, part.Stat.Rows - 1}) {
for (TRowId rowId1 : xrange<TRowId>(0, part.Stat.Rows - 1)) {
@@ -432,6 +462,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
TTouchEnv bTreeEnv, flatEnv;
TChargeBTreeIndex bTree(&bTreeEnv, part, tags, true);
TCharge flat(&flatEnv, part, tags, true);
+ StickSomePages(params, part, tags, *keyDefaults, bTreeEnv, flatEnv);
TString message = TStringBuilder() << (reverse ? "ChargeRowIdReverse " : "ChargeRowId ") << rowId1 << " " << rowId2 << " items " << itemsLimit;
DoChargeRowId(bTree, bTreeEnv, rowId1, rowId2, itemsLimit, 0, reverse, *keyDefaults, message);
@@ -444,7 +475,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
}
}
- void CheckChargeKeys(const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) {
+ void CheckChargeKeys(TTestParams params, const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) {
for (bool reverse : {false, true}) {
for (ui64 itemsLimit : TVector<ui64>{0, 1, 2, 5, 13, 19, part.Stat.Rows - 2, part.Stat.Rows - 1}) {
for (ui32 firstCellKey1 : xrange<ui32>(0, part.Stat.Rows / 7 + 1)) {
@@ -457,6 +488,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
TTouchEnv bTreeEnv, flatEnv;
TChargeBTreeIndex bTree(&bTreeEnv, part, tags, true);
TCharge flat(&flatEnv, part, tags, true);
+ StickSomePages(params, part, tags, *keyDefaults, bTreeEnv, flatEnv);
TStringBuilder message = TStringBuilder() << (reverse ? "ChargeKeysReverse " : "ChargeKeys ") << "(";
for (auto c : key1) {
@@ -487,7 +519,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
}
}
- void CheckChargeBytesLimit(const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) {
+ void CheckChargeBytesLimit(TTestParams params, const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) {
for (bool reverse : {false, true}) {
for (ui64 bytesLimit : xrange<ui64>(1, part.Stat.Bytes + 100, part.Stat.Bytes / 100)) {
for (ui32 firstCellKey1 : xrange<ui32>(0, part.Stat.Rows / 7 + 1)) {
@@ -497,6 +529,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
TTouchEnv limitedEnv, unlimitedEnv;
TChargeBTreeIndex limitedCharge(&limitedEnv, part, tags, true);
TChargeBTreeIndex unlimitedCharge(&unlimitedEnv, part, tags, true);
+ StickSomePages(params, part, tags, *keyDefaults, limitedEnv, unlimitedEnv);
TStringBuilder message = TStringBuilder() << (reverse ? "ChargeBytesLimitReverse " : "ChargeBytesLimit ") << "(";
for (auto c : key1) {
@@ -533,7 +566,12 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
}
if (!groupId.IsMain() && !loaded.contains(pageId)) {
// only check that we loaded consecutive pages
- break;
+ if (params.StickSomePages) {
+ // extra pages may appear after the bytes limit is applied on main pages
+ continue;
+ } else {
+ break;
+ }
}
expected.insert(pageId);
if (size > bytesLimit) {
@@ -551,7 +589,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
}
}
- void CheckPart(TMakePartParams params) {
+ void CheckPart(TTestParams params) {
TPartEggs eggs = MakePart(params);
const auto part = *eggs.Lone();
@@ -560,9 +598,9 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
tags.push_back(c.Tag);
}
- CheckChargeRowId(part, tags, eggs.Scheme->Keys.Get());
- CheckChargeKeys(part, tags, eggs.Scheme->Keys.Get());
- CheckChargeBytesLimit(part, tags, eggs.Scheme->Keys.Get());
+ CheckChargeRowId(params, part, tags, eggs.Scheme->Keys.Get());
+ CheckChargeKeys(params, part, tags, eggs.Scheme->Keys.Get());
+ CheckChargeBytesLimit(params, part, tags, eggs.Scheme->Keys.Get());
}
Y_UNIT_TEST(NoNodes) {
@@ -609,9 +647,17 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
CheckPart({.Levels = 3, .History = true});
}
+ Y_UNIT_TEST(FewNodes_Sticky) {
+ CheckPart({.Levels = 3, .StickSomePages = true});
+ }
+
Y_UNIT_TEST(FewNodes_Groups_History) {
CheckPart({.Levels = 3, .Groups = true, .History = true});
}
+
+ Y_UNIT_TEST(FewNodes_Groups_History_Sticky) {
+ CheckPart({.Levels = 3, .Groups = true, .History = true, .StickSomePages = true});
+ }
}
Y_UNIT_TEST_SUITE(TPartBtreeIndexIteration) {
@@ -756,7 +802,7 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIteration) {
}
}
- void CheckPart(TMakePartParams params) {
+ void CheckPart(TTestParams params) {
TPartEggs eggs = MakePart(params);
const auto part = *eggs.Lone();
@@ -816,6 +862,10 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIteration) {
CheckPart({.Levels = 3, .History = true});
}
+ Y_UNIT_TEST(FewNodes_Sticky) {
+ CheckPart({.Levels = 3, .StickSomePages = true});
+ }
+
Y_UNIT_TEST(FewNodes_Slices) {
CheckPart({.Levels = 3, .Slices = true});
}
@@ -831,6 +881,10 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIteration) {
Y_UNIT_TEST(FewNodes_Groups_History_Slices) {
CheckPart({.Levels = 3, .Groups = true, .History = true, .Slices = true});
}
+
+ Y_UNIT_TEST(FewNodes_Groups_History_Slices_Sticky) {
+ CheckPart({.Levels = 3, .Groups = true, .History = true, .Slices = true, .StickSomePages = true});
+ }
}
}