aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkungasc <kungasc@yandex-team.com>2023-11-30 19:05:46 +0300
committerkungasc <kungasc@yandex-team.com>2023-11-30 21:41:31 +0300
commit2f7d8b6e692bd7584a66686b5504381631dd4522 (patch)
treef21c802bc84eb153cd7f229639952c7d0c93e03b
parentae76055fae2835dc30e64116d2d6b76e1bdd026d (diff)
downloadydb-2f7d8b6e692bd7584a66686b5504381631dd4522.tar.gz
KIKIMR-19521 BTreeIndex Iter Multi
-rw-r--r--ydb/core/tablet_flat/CMakeLists.darwin-arm64.txt1
-rw-r--r--ydb/core/tablet_flat/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/core/tablet_flat/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/core/tablet_flat/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/core/tablet_flat/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/core/tablet_flat/flat_fwd_env.h16
-rw-r--r--ydb/core/tablet_flat/flat_page_btree_index.h14
-rw-r--r--ydb/core/tablet_flat/flat_part_btree_index_iter.h121
-rw-r--r--ydb/core/tablet_flat/flat_part_index_iter.h35
-rw-r--r--ydb/core/tablet_flat/flat_part_index_iter_create.cpp15
-rw-r--r--ydb/core/tablet_flat/flat_part_index_iter_iface.h32
-rw-r--r--ydb/core/tablet_flat/flat_part_iter_multi.h390
-rw-r--r--ydb/core/tablet_flat/test/libs/table/test_envs.h5
-rw-r--r--ydb/core/tablet_flat/ut/ut_btree_index.cpp87
-rw-r--r--ydb/core/tablet_flat/ut/ut_iterator.cpp3
-rw-r--r--ydb/core/tablet_flat/ut/ut_part.cpp13
-rw-r--r--ydb/core/tablet_flat/ut/ut_versions.cpp1
-rw-r--r--ydb/core/tablet_flat/ya.make1
18 files changed, 511 insertions, 227 deletions
diff --git a/ydb/core/tablet_flat/CMakeLists.darwin-arm64.txt b/ydb/core/tablet_flat/CMakeLists.darwin-arm64.txt
index 9f009108f0..68ffdc4aac 100644
--- a/ydb/core/tablet_flat/CMakeLists.darwin-arm64.txt
+++ b/ydb/core/tablet_flat/CMakeLists.darwin-arm64.txt
@@ -110,6 +110,7 @@ target_sources(ydb-core-tablet_flat PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_page_label.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_dump.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_iter_multi.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_index_iter_create.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_loader.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_overlay.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_outset.cpp
diff --git a/ydb/core/tablet_flat/CMakeLists.darwin-x86_64.txt b/ydb/core/tablet_flat/CMakeLists.darwin-x86_64.txt
index 9f009108f0..68ffdc4aac 100644
--- a/ydb/core/tablet_flat/CMakeLists.darwin-x86_64.txt
+++ b/ydb/core/tablet_flat/CMakeLists.darwin-x86_64.txt
@@ -110,6 +110,7 @@ target_sources(ydb-core-tablet_flat PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_page_label.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_dump.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_iter_multi.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_index_iter_create.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_loader.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_overlay.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_outset.cpp
diff --git a/ydb/core/tablet_flat/CMakeLists.linux-aarch64.txt b/ydb/core/tablet_flat/CMakeLists.linux-aarch64.txt
index 7621ab1675..11100537ff 100644
--- a/ydb/core/tablet_flat/CMakeLists.linux-aarch64.txt
+++ b/ydb/core/tablet_flat/CMakeLists.linux-aarch64.txt
@@ -111,6 +111,7 @@ target_sources(ydb-core-tablet_flat PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_page_label.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_dump.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_iter_multi.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_index_iter_create.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_loader.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_overlay.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_outset.cpp
diff --git a/ydb/core/tablet_flat/CMakeLists.linux-x86_64.txt b/ydb/core/tablet_flat/CMakeLists.linux-x86_64.txt
index 7621ab1675..11100537ff 100644
--- a/ydb/core/tablet_flat/CMakeLists.linux-x86_64.txt
+++ b/ydb/core/tablet_flat/CMakeLists.linux-x86_64.txt
@@ -111,6 +111,7 @@ target_sources(ydb-core-tablet_flat PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_page_label.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_dump.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_iter_multi.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_index_iter_create.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_loader.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_overlay.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_outset.cpp
diff --git a/ydb/core/tablet_flat/CMakeLists.windows-x86_64.txt b/ydb/core/tablet_flat/CMakeLists.windows-x86_64.txt
index 9f009108f0..68ffdc4aac 100644
--- a/ydb/core/tablet_flat/CMakeLists.windows-x86_64.txt
+++ b/ydb/core/tablet_flat/CMakeLists.windows-x86_64.txt
@@ -110,6 +110,7 @@ target_sources(ydb-core-tablet_flat PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_page_label.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_dump.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_iter_multi.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_index_iter_create.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_loader.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_overlay.cpp
${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_part_outset.cpp
diff --git a/ydb/core/tablet_flat/flat_fwd_env.h b/ydb/core/tablet_flat/flat_fwd_env.h
index ba2046c81d..7432617d30 100644
--- a/ydb/core/tablet_flat/flat_fwd_env.h
+++ b/ydb/core/tablet_flat/flat_fwd_env.h
@@ -113,8 +113,8 @@ namespace NFwd {
ui16 room = (groupId.Historic ? part->GroupsCount + 2 : 0) + groupId.Index;
TSlot slot = GetQueueSlot(part, room);
- if (part->GetPageType(ref, groupId) == EPage::Index) {
- return TryGetIndexPage(slot, ref);
+ if (auto type = part->GetPageType(ref, groupId); IsIndexPage(type)) {
+ return TryGetIndexPage(slot, ref, type);
}
return Handle(Queues.at(slot), ref).Page;
@@ -162,7 +162,7 @@ namespace NFwd {
TVector<NPageCollection::TLoadedPage> queuePages(Reserve(pages.size()));
for (auto& page : pages) {
const auto &meta = pageCollection->Page(page.PageId);
- if (NTable::EPage(meta.Type) == EPage::Index) {
+ if (IsIndexPage(NTable::EPage(meta.Type))) {
IndexPages.at(part).Pages[page.PageId] = page;
} else {
queuePages.push_back(page);
@@ -259,8 +259,14 @@ namespace NFwd {
}
private:
- const TSharedData* TryGetIndexPage(TSlot slot, TPageId pageId) noexcept
+ static bool IsIndexPage(EPage type) noexcept {
+ return type == EPage::Index || type == EPage::BTreeIndex;
+ }
+
+ const TSharedData* TryGetIndexPage(TSlot slot, TPageId pageId, EPage type) noexcept
{
+ Y_DEBUG_ABORT_UNLESS(IsIndexPage(type));
+
// TODO: count index pages in Stats later
auto &env = IndexPages.at(slot);
@@ -270,7 +276,7 @@ namespace NFwd {
return &pageIt->second.Data;
} else {
auto &queue = Queues.at(slot);
- queue.AddToQueue(pageId, EPage::Index);
+ queue.AddToQueue(pageId, type);
Queue.PushBack(&queue);
return nullptr;
}
diff --git a/ydb/core/tablet_flat/flat_page_btree_index.h b/ydb/core/tablet_flat/flat_page_btree_index.h
index cca8b0961f..51e4902d53 100644
--- a/ydb/core/tablet_flat/flat_page_btree_index.h
+++ b/ydb/core/tablet_flat/flat_page_btree_index.h
@@ -150,6 +150,20 @@ namespace NKikimr::NTable::NPage {
return result;
}
+ TCell At(TPos index)
+ {
+ Y_ABORT_UNLESS(Pos == 0, "Shouldn't be used");
+
+ // We may use offset = Columns[index].Offset - index * sizeof(TIndex::TItem) for fixed format
+ // But it looks too complicated because this method will be used only for history columns 0, 1, 2
+ Y_DEBUG_ABORT_UNLESS(index < 3);
+
+ for (TPos i = 0; i < index; i++) {
+ Next();
+ }
+ return Next();
+ }
+
int CompareTo(const TCells key, const TKeyCellDefaults *keyDefaults) noexcept
{
Y_ABORT_UNLESS(Pos == 0, "Shouldn't be used");
diff --git a/ydb/core/tablet_flat/flat_part_btree_index_iter.h b/ydb/core/tablet_flat/flat_part_btree_index_iter.h
index c6c993d0ae..58e5e2052a 100644
--- a/ydb/core/tablet_flat/flat_part_btree_index_iter.h
+++ b/ydb/core/tablet_flat/flat_part_btree_index_iter.h
@@ -3,11 +3,11 @@
#include "flat_part_iface.h"
#include "flat_page_index.h"
#include "flat_table_part.h"
-
+#include "flat_part_index_iter_iface.h"
namespace NKikimr::NTable {
-class TPartBtreeIndexIt {
+class TPartBtreeIndexIt : public IIndexIter {
using TCells = NPage::TCells;
using TBtreeIndexNode = NPage::TBtreeIndexNode;
using TGroupId = NPage::TGroupId;
@@ -33,7 +33,19 @@ class TPartBtreeIndexIt {
, EndRowId(endRowId)
, BeginKey(beginKey)
, EndKey(endKey)
- {
+ {
+ }
+
+ bool IsLastPos() const noexcept {
+ Y_ABORT_UNLESS(Node);
+ Y_ABORT_UNLESS(Pos);
+ return *Pos == Node->GetKeysCount();
+ }
+
+ bool IsFirstPos() const noexcept {
+ Y_ABORT_UNLESS(Node);
+ Y_ABORT_UNLESS(Pos);
+ return *Pos == 0;
}
};
@@ -103,13 +115,13 @@ public:
, Env(env)
, GroupId(groupId)
, GroupInfo(part->Scheme->GetLayout(groupId))
- , Meta(groupId.IsMain() ? part->IndexPages.BTreeGroups[groupId.Index] : part->IndexPages.BTreeHistoric[groupId.Index])
+ , Meta(groupId.IsHistoric() ? part->IndexPages.BTreeHistoric[groupId.Index] : part->IndexPages.BTreeGroups[groupId.Index])
{
const static TCellsIterable EmptyKey(static_cast<const char*>(nullptr), TColumns());
State.emplace_back(Meta, 0, GetEndRowId(), EmptyKey, EmptyKey);
}
- EReady Seek(TRowId rowId) {
+ EReady Seek(TRowId rowId) override {
if (rowId >= GetEndRowId()) {
return Exhaust();
}
@@ -122,7 +134,7 @@ public:
*
* Result is approximate and may be off by one page
*/
- EReady Seek(ESeek seek, TCells key, const TKeyCellDefaults *keyDefaults) {
+ EReady Seek(ESeek seek, TCells key, const TKeyCellDefaults *keyDefaults) override {
if (!key) {
// Special treatment for an empty key
switch (seek) {
@@ -142,7 +154,7 @@ public:
*
* Result is approximate and may be off by one page
*/
- EReady SeekReverse(ESeek seek, TCells key, const TKeyCellDefaults *keyDefaults) {
+ EReady SeekReverse(ESeek seek, TCells key, const TKeyCellDefaults *keyDefaults) override {
if (!key) {
// Special treatment for an empty key
switch (seek) {
@@ -157,39 +169,101 @@ public:
return DoSeek<TSeekKeyReverse>({seek, key, GroupInfo.ColsKeyIdx, keyDefaults});
}
- // EReady Next() {
- // Y_DEBUG_ABORT_UNLESS(IsLeaf());
- // return Exhaust();
- // }
+ EReady Next() override {
+ Y_ABORT_UNLESS(!IsExhausted());
- // EReady Prev() {
- // Y_DEBUG_ABORT_UNLESS(IsLeaf());
- // return Exhaust();
- // }
+ if (Meta.LevelsCount == 0) {
+ return Exhaust();
+ }
+
+ if (IsLeaf()) {
+ do {
+ State.pop_back();
+ } while (State.size() > 1 && State.back().IsLastPos());
+ if (State.back().IsLastPos()) {
+ return Exhaust();
+ }
+ PushNextState(*State.back().Pos + 1);
+ }
+
+ for (size_t level : xrange(State.size() - 1, Meta.LevelsCount)) {
+ if (!TryLoad(State[level])) {
+ // exiting with an intermediate state
+ Y_DEBUG_ABORT_UNLESS(!IsLeaf() && !IsExhausted());
+ return EReady::Page;
+ }
+ PushNextState(0);
+ }
+
+ // State.back() points to the target data page
+ Y_ABORT_UNLESS(IsLeaf());
+ return EReady::Data;
+ }
+
+ EReady Prev() override {
+ Y_ABORT_UNLESS(!IsExhausted());
+
+ if (Meta.LevelsCount == 0) {
+ return Exhaust();
+ }
+
+ if (IsLeaf()) {
+ do {
+ State.pop_back();
+ } while (State.size() > 1 && State.back().IsFirstPos());
+ if (State.back().IsFirstPos()) {
+ return Exhaust();
+ }
+ PushNextState(*State.back().Pos - 1);
+ }
+
+ for (size_t level : xrange(State.size() - 1, Meta.LevelsCount)) {
+ if (!TryLoad(State[level])) {
+ // exiting with an intermediate state
+ Y_DEBUG_ABORT_UNLESS(!IsLeaf() && !IsExhausted());
+ return EReady::Page;
+ }
+ PushNextState(State[level].Node->GetKeysCount());
+ }
+
+ // State.back() points to the target data page
+ Y_ABORT_UNLESS(IsLeaf());
+ return EReady::Data;
+ }
public:
- bool IsValid() const {
+ bool IsValid() const override {
Y_DEBUG_ABORT_UNLESS(IsLeaf() || IsExhausted());
return IsLeaf();
}
- TPageId GetPageId() const {
+ TRowId GetEndRowId() const override {
+ return Meta.Count;
+ }
+
+ TPageId GetPageId() const override {
Y_ABORT_UNLESS(IsLeaf());
return State.back().Meta.PageId;
}
- TRowId GetRowId() const {
+ TRowId GetRowId() const override {
Y_ABORT_UNLESS(IsLeaf());
return State.back().BeginRowId;
}
- TRowId GetNextRowId() const {
+ TRowId GetNextRowId() const override {
Y_ABORT_UNLESS(IsLeaf());
return State.back().EndRowId;
}
- TRowId GetEndRowId() const {
- return Meta.Count;
+ bool HasKeyCells() const override {
+ Y_ABORT_UNLESS(IsLeaf());
+ return State.back().BeginKey.Count();
+ }
+
+ TCell GetKeyCell(TPos index) const override {
+ Y_ABORT_UNLESS(IsLeaf());
+ return State.back().BeginKey.Iter().At(index);
}
private:
@@ -214,7 +288,7 @@ private:
}
auto pos = seek.Do(state);
- PushNextState(state, pos);
+ PushNextState(pos);
}
// State.back() points to the target data page
@@ -245,7 +319,8 @@ private:
return EReady::Gone;
}
- void PushNextState(TNodeState& current, TRecIdx pos) {
+ void PushNextState(TRecIdx pos) {
+ TNodeState& current = State.back();
Y_ABORT_UNLESS(pos < current.Node->GetChildrenCount(), "Should point to some child");
current.Pos.emplace(pos);
diff --git a/ydb/core/tablet_flat/flat_part_index_iter.h b/ydb/core/tablet_flat/flat_part_index_iter.h
index 99db6d1a6f..addfe16236 100644
--- a/ydb/core/tablet_flat/flat_part_index_iter.h
+++ b/ydb/core/tablet_flat/flat_part_index_iter.h
@@ -2,13 +2,14 @@
#include "flat_part_iface.h"
#include "flat_page_index.h"
+#include "flat_part_index_iter_iface.h"
#include "flat_table_part.h"
#include <ydb/library/yverify_stream/yverify_stream.h>
namespace NKikimr::NTable {
-class TPartIndexIt {
+class TPartIndexIt : public IIndexIter {
public:
using TCells = NPage::TCells;
using TRecord = NPage::TIndex::TRecord;
@@ -25,7 +26,7 @@ public:
, EndRowId(groupId.IsMain() && part->Stat.Rows ? part->Stat.Rows : Max<TRowId>())
{ }
- EReady Seek(TRowId rowId) {
+ EReady Seek(TRowId rowId) override {
auto index = TryGetIndex();
if (!index) {
return EReady::Page;
@@ -35,7 +36,7 @@ public:
return DataOrGone();
}
- EReady Seek(ESeek seek, TCells key, const TKeyCellDefaults *keyDefaults) {
+ EReady Seek(ESeek seek, TCells key, const TKeyCellDefaults *keyDefaults) override {
auto index = TryGetIndex();
if (!index) {
return EReady::Page;
@@ -45,7 +46,7 @@ public:
return DataOrGone();
}
- EReady SeekReverse(ESeek seek, TCells key, const TKeyCellDefaults *keyDefaults) {
+ EReady SeekReverse(ESeek seek, TCells key, const TKeyCellDefaults *keyDefaults) override {
auto index = TryGetIndex();
if (!index) {
return EReady::Page;
@@ -68,14 +69,14 @@ public:
return DataOrGone();
}
- EReady Next() {
+ EReady Next() override {
Y_DEBUG_ABORT_UNLESS(Index);
Y_DEBUG_ABORT_UNLESS(Iter);
Iter++;
return DataOrGone();
}
- EReady Prev() {
+ EReady Prev() override {
Y_DEBUG_ABORT_UNLESS(Index);
Y_DEBUG_ABORT_UNLESS(Iter);
if (Iter.Off() == 0) {
@@ -86,7 +87,7 @@ public:
return DataOrGone();
}
- bool IsValid() const {
+ bool IsValid() const override {
Y_DEBUG_ABORT_UNLESS(Index);
return bool(Iter);
}
@@ -97,23 +98,23 @@ public:
}
public:
- TRowId GetEndRowId() const {
+ TRowId GetEndRowId() const override {
return EndRowId;
}
- TPageId GetPageId() const {
+ TPageId GetPageId() const override {
Y_ABORT_UNLESS(Index);
Y_ABORT_UNLESS(Iter);
return Iter->GetPageId();
}
- TRowId GetRowId() const {
+ TRowId GetRowId() const override {
Y_ABORT_UNLESS(Index);
Y_ABORT_UNLESS(Iter);
return Iter->GetRowId();
}
- TRowId GetNextRowId() const {
+ TRowId GetNextRowId() const override {
Y_ABORT_UNLESS(Index);
Y_ABORT_UNLESS(Iter);
auto next = Iter + 1;
@@ -122,6 +123,18 @@ public:
: EndRowId;
}
+ bool HasKeyCells() const override {
+ Y_ABORT_UNLESS(Index);
+ Y_ABORT_UNLESS(Iter);
+ return true;
+ }
+
+ TCell GetKeyCell(TPos index) const override {
+ Y_ABORT_UNLESS(Index);
+ Y_ABORT_UNLESS(Iter);
+ return Iter.GetRecord()->Cell(GroupInfo.ColsKeyIdx[index]);
+ }
+
const TRecord * GetRecord() const {
Y_ABORT_UNLESS(Index);
Y_ABORT_UNLESS(Iter);
diff --git a/ydb/core/tablet_flat/flat_part_index_iter_create.cpp b/ydb/core/tablet_flat/flat_part_index_iter_create.cpp
new file mode 100644
index 0000000000..65a19e1066
--- /dev/null
+++ b/ydb/core/tablet_flat/flat_part_index_iter_create.cpp
@@ -0,0 +1,15 @@
+#include "flat_part_index_iter.h"
+#include "flat_part_btree_index_iter.h"
+
+namespace NKikimr::NTable {
+
+THolder<IIndexIter> CreateIndexIter(const TPart* part, IPages* env, NPage::TGroupId groupId)
+{
+ if (groupId.Index < (groupId.IsHistoric() ? part->IndexPages.BTreeHistoric : part->IndexPages.BTreeGroups).size()) {
+ return MakeHolder<TPartBtreeIndexIt>(part, env, groupId);
+ } else {
+ return MakeHolder<TPartIndexIt>(part, env, groupId);
+ }
+}
+
+}
diff --git a/ydb/core/tablet_flat/flat_part_index_iter_iface.h b/ydb/core/tablet_flat/flat_part_index_iter_iface.h
new file mode 100644
index 0000000000..02da8c5773
--- /dev/null
+++ b/ydb/core/tablet_flat/flat_part_index_iter_iface.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include "flat_page_base.h"
+#include "flat_part_iface.h"
+
+namespace NKikimr::NTable {
+
+ struct IIndexIter {
+ using TCells = NPage::TCells;
+
+ virtual EReady Seek(TRowId rowId) = 0;
+ virtual EReady Seek(ESeek seek, TCells key, const TKeyCellDefaults *keyDefaults) = 0;
+ virtual EReady SeekReverse(ESeek seek, TCells key, const TKeyCellDefaults *keyDefaults) = 0;
+ virtual EReady Next() = 0;
+ virtual EReady Prev() = 0;
+
+ virtual bool IsValid() const = 0;
+
+ virtual TRowId GetEndRowId() const = 0;
+ virtual TPageId GetPageId() const = 0;
+ virtual TRowId GetRowId() const = 0;
+ virtual TRowId GetNextRowId() const = 0;
+
+ virtual bool HasKeyCells() const = 0;
+ virtual TCell GetKeyCell(TPos index) const = 0;
+
+ virtual ~IIndexIter() = default;
+ };
+
+ THolder<IIndexIter> CreateIndexIter(const TPart* part, IPages* env, NPage::TGroupId groupId);
+
+}
diff --git a/ydb/core/tablet_flat/flat_part_iter_multi.h b/ydb/core/tablet_flat/flat_part_iter_multi.h
index 8c2d592242..e714912634 100644
--- a/ydb/core/tablet_flat/flat_part_iter_multi.h
+++ b/ydb/core/tablet_flat/flat_part_iter_multi.h
@@ -1,13 +1,13 @@
#pragma once
#include "flat_part_iface.h"
+#include "flat_part_index_iter_iface.h"
#include "flat_table_part.h"
#include "flat_row_eggs.h"
#include "flat_row_state.h"
#include "flat_part_pinout.h"
#include "flat_part_slice.h"
#include "flat_table_committed.h"
-#include "flat_part_index_iter.h"
namespace NKikimr {
namespace NTable {
@@ -21,7 +21,8 @@ namespace NTable {
: Part(part)
, Env(env)
, GroupId(groupId)
- , Index(part, env, groupId)
+ , Index_(CreateIndexIter(part, env, groupId))
+ , Index(*Index_.Get())
{
}
@@ -96,7 +97,8 @@ namespace NTable {
TPageId PageId = Max<TPageId>();
- TPartIndexIt Index;
+ THolder<IIndexIter> Index_;
+ IIndexIter& Index;
NPage::TDataPage Page;
NPage::TDataPage::TIter Data;
};
@@ -104,23 +106,17 @@ namespace NTable {
/**
* Part group iterator that may be positioned on a specific key
*/
- class TPartGroupKeyIt
- : private TPartGroupRowIt
+ class TPartGroupKeyIt : private TPartGroupRowIt
{
public:
using TCells = NPage::TCells;
TPartGroupKeyIt(const TPart* part, IPages* env, NPage::TGroupId groupId)
: TPartGroupRowIt(part, env, groupId)
+ , BeginRowId(0)
+ , EndRowId(Index.GetEndRowId())
+ , RowId(Max<TRowId>())
{
- ClearBounds();
- }
-
- void ClearBounds() noexcept
- {
- BeginRowId = 0;
- EndRowId = Index.GetEndRowId();
- RowId = Max<TRowId>();
}
void SetBounds(TRowId beginRowId, TRowId endRowId) noexcept
@@ -132,8 +128,7 @@ namespace NTable {
RowId = Max<TRowId>();
}
- EReady Seek(
- const TCells key, ESeek seek,
+ EReady Seek(const TCells key, ESeek seek,
const TPartScheme::TGroupInfo& scheme, const TKeyCellDefaults* keyDefaults) noexcept
{
Y_DEBUG_ABORT_UNLESS(seek == ESeek::Exact || seek == ESeek::Lower || seek == ESeek::Upper,
@@ -203,15 +198,17 @@ namespace NTable {
RowId, BeginRowId);
if (RowId < EndRowId) {
- return Next();
+ if (auto ready = LoadRowData(); ready != EReady::Data) {
+ return Terminate(ready);
+ }
+ return EReady::Data;
}
}
return Exhausted();
}
- EReady SeekReverse(
- const TCells key, ESeek seek,
+ EReady SeekReverse(const TCells key, ESeek seek,
const TPartScheme::TGroupInfo& scheme, const TKeyCellDefaults* keyDefaults) noexcept
{
Y_DEBUG_ABORT_UNLESS(seek == ESeek::Exact || seek == ESeek::Lower || seek == ESeek::Upper,
@@ -272,7 +269,10 @@ namespace NTable {
"Unexpected prev page RowId=%lu (EndRowId=%lu)",
RowId, EndRowId);
if (RowId >= BeginRowId) {
- return Prev();
+ if (auto ready = LoadRowData(); ready != EReady::Data) {
+ return Terminate(ready);
+ }
+ return EReady::Data;
}
}
@@ -290,9 +290,11 @@ namespace NTable {
}
RowId = rowId;
- Data = { };
- return Next();
+ if (auto ready = LoadRowData(); ready != EReady::Data) {
+ return Terminate(ready);
+ }
+ return EReady::Data;
}
EReady SeekToStart() noexcept
@@ -300,27 +302,16 @@ namespace NTable {
return Seek(BeginRowId);
}
- EReady SeekReverse(TRowId rowId) noexcept
- {
- if (Y_UNLIKELY(rowId < BeginRowId || rowId >= EndRowId)) {
- return Exhausted();
- }
-
- if (auto ready = SeekIndex(rowId); ready != EReady::Data) {
- return Terminate(ready);
- }
-
- RowId = rowId;
- Data = { };
-
- return Prev();
- }
-
EReady SeekToEnd() noexcept
{
- return SeekReverse(EndRowId - 1);
+ return Seek(EndRowId - 1);
}
+ /**
+ * If has Data, goes to the next row
+ *
+ * If doesn't have Data, loads the current row
+ */
EReady Next() noexcept
{
if (Y_UNLIKELY(RowId == Max<TRowId>())) {
@@ -332,34 +323,42 @@ namespace NTable {
RowId, BeginRowId, EndRowId);
if (Data) {
- if (++RowId >= EndRowId) {
+ if (RowId + 1 >= EndRowId) {
return Exhausted();
}
- if (++Data) {
+ if (Data + 1) {
+ ++RowId;
+ ++Data;
return EReady::Data;
- } else {
- if (auto ready = Index.Next(); ready != EReady::Data) {
- return Terminate(ready);
- }
}
- }
- Y_DEBUG_ABORT_UNLESS(Index.IsValid() && Index.GetRowId() <= RowId,
- "Next called without a valid index record");
-
- if (!LoadPage(Index.GetPageId(), Index.GetRowId())) {
- return EReady::Page;
+ // Go to the next data page
+ // Keep Data in case we have page faulted on Index.Next and need to call it again
+ auto ready = Index.Next();
+ if (ready == EReady::Gone) {
+ return Exhausted();
+ } else if (ready == EReady::Page) {
+ return ready;
+ } else {
+ ++RowId;
+ Data = { };
+ }
}
- if (Data = Page->Begin() + (RowId - Page.BaseRow())) {
- return EReady::Data;
+ if (auto ready = LoadRowData(); ready != EReady::Gone) {
+ return ready;
}
Y_ABORT_UNLESS(Index.Next() == EReady::Gone, "Unexpected failure to seek in a non-final data page");
return Exhausted();
}
+ /**
+ * If has Data, goes to the prev row
+ *
+ * If doesn't have Data, loads the current row
+ */
EReady Prev() noexcept
{
if (Y_UNLIKELY(RowId == Max<TRowId>())) {
@@ -371,30 +370,31 @@ namespace NTable {
RowId, BeginRowId, EndRowId);
if (Data) {
- if (RowId-- <= BeginRowId) {
+ if (RowId <= BeginRowId) {
return Exhausted();
}
if (Data.Off() != 0) {
+ --RowId;
--Data;
return EReady::Data;
+ }
+
+ // Go to the prev data page
+ // Keep Data in case we have page faulted on Index.Prev and need to call it again
+ auto ready = Index.Prev();
+ if (ready == EReady::Gone) {
+ return Exhausted();
+ } else if (ready == EReady::Page) {
+ return ready;
} else {
+ --RowId;
Data = { };
- if (auto ready = Index.Prev(); ready != EReady::Data) {
- return Terminate(ready);
- }
}
}
- Y_DEBUG_ABORT_UNLESS(Index.IsValid() && Index.GetRowId() <= RowId,
- "Prev called without a valid index record");
-
- if (!LoadPage(Index.GetPageId(), Index.GetRowId())) {
- return EReady::Page;
- }
-
- if (Data = Page->Begin() + (RowId - Page.BaseRow())) {
- return EReady::Data;
+ if (auto ready = LoadRowData(); ready != EReady::Gone) {
+ return ready;
}
Y_ABORT("Unexpected failure to seek in a non-final data page");
@@ -426,6 +426,22 @@ namespace NTable {
return ready;
}
+ EReady LoadRowData() noexcept
+ {
+ Y_DEBUG_ABORT_UNLESS(Index.IsValid() && Index.GetRowId() <= RowId,
+ "Called without a valid index record");
+
+ if (!LoadPage(Index.GetPageId(), Index.GetRowId())) {
+ return EReady::Page;
+ }
+
+ if (Data = Page->Begin() + (RowId - Page.BaseRow())) {
+ return EReady::Data;
+ }
+
+ return EReady::Gone;
+ }
+
EReady SeekIndex(TRowId rowId) noexcept
{
auto ready = Index.Seek(rowId);
@@ -444,8 +460,7 @@ namespace NTable {
/**
* Part group iterator over history data
*/
- class TPartGroupHistoryIt
- : private TPartGroupRowIt
+ class TPartGroupHistoryIt : private TPartGroupRowIt
{
public:
using TCells = NPage::TCells;
@@ -454,8 +469,7 @@ namespace NTable {
: TPartGroupRowIt(part, env, NPage::TGroupId(0, /* historic */ true))
{ }
- EReady Seek(
- TRowId rowId, const TRowVersion& rowVersion) noexcept
+ EReady Seek(TRowId rowId, const TRowVersion& rowVersion) noexcept
{
Y_DEBUG_ABORT_UNLESS(rowId != Max<TRowId>());
@@ -476,15 +490,14 @@ namespace NTable {
Y_DEBUG_ABORT_UNLESS(scheme.ColsKeyIdx.size() == 3);
Y_DEBUG_ABORT_UNLESS(scheme.ColsKeyData.size() == 3);
- // Directly use the histroy key keyDefaults with correct sort order
+ // Directly use the history key keyDefaults with correct sort order
const TKeyCellDefaults* keyDefaults = Part->Scheme->HistoryKeys.Get();
// Helper for loading row id and row version from the index
auto checkIndex = [&]() -> bool {
- auto record = Index.GetRecord();
- RowId = record->Cell(scheme.ColsKeyIdx[0]).AsValue<TRowId>();
- RowVersion.Step = record->Cell(scheme.ColsKeyIdx[1]).AsValue<ui64>();
- RowVersion.TxId = record->Cell(scheme.ColsKeyIdx[2]).AsValue<ui64>();
+ RowId = Index.GetKeyCell(0).AsValue<TRowId>();
+ RowVersion.Step = Index.GetKeyCell(1).AsValue<ui64>();
+ RowVersion.TxId = Index.GetKeyCell(2).AsValue<ui64>();
return rowId == RowId;
};
@@ -496,6 +509,81 @@ namespace NTable {
return rowId == RowId;
};
+ // Returns { } when current page is not sufficient
+ auto seekDownToRowVersion = [&]() -> std::optional<EReady> {
+ Y_DEBUG_ABORT_UNLESS(rowId == RowId && Data);
+ Y_DEBUG_ABORT_UNLESS(rowVersion < RowVersion);
+
+ // If we previously returned EReady::Data, then we are
+ // potentially seeking to an older row version on the same
+ // page. We want to optimize for two cases:
+ //
+ // 1. When the next row is very close, we dont want to perform
+ // expensive index seeks, page reloads and binary searches
+ //
+ // 2. When the next row is far away, we still attempt to find
+ // it on the current page, otherwise fallback to a binary
+ // search over the whole group.
+ for (int linear = 4; linear >= 0; --linear) {
+ ++Data;
+
+ // Perform binary search on the last iteration
+ if (linear == 0 && Data) {
+ Data = Page.LookupKey(key, scheme, ESeek::Lower, keyDefaults);
+ }
+
+ if (!Data) {
+ // Row is not on current page, move to the next
+ switch (Index.Next()) {
+ case EReady::Page: {
+ // Fallback to full binary search (maybe we need some other index page)
+ RowId = Max<TRowId>();
+ return { };
+ };
+ case EReady::Gone: {
+ return Exhausted();
+ };
+ case EReady::Data: {
+ Y_DEBUG_ABORT_UNLESS(Index.HasKeyCells(), "Non-first page is expected to have key cells");
+ if (Index.HasKeyCells()) {
+ if (!checkIndex()) {
+ // First row for the next RowId
+ MaxVersion = TRowVersion::Max();
+ Y_DEBUG_ABORT_UNLESS(rowId < RowId);
+ return EReady::Gone;
+ }
+
+ // Next page has the same row id
+ // Save an estimate for MaxVersion
+ MaxVersion = rowVersion;
+ return { };
+ } else {
+ // Fallback to full binary search
+ RowId = Max<TRowId>();
+ return { };
+ }
+ };
+ };
+ }
+
+ if (!checkData()) {
+ // First row for the new RowId
+ MaxVersion = TRowVersion::Max();
+ Y_DEBUG_ABORT_UNLESS(rowId < RowId);
+ return EReady::Gone;
+ }
+
+ if (RowVersion <= rowVersion) {
+ // Save an estimate for MaxVersion
+ MaxVersion = rowVersion;
+ return EReady::Data;
+ }
+ }
+
+ // This lambda has to be terminated
+ Y_ABORT_UNLESS(false, "Data binary search bug");
+ };
+
// Special case when we already have data with the same row id
if (rowId == RowId && Data) {
// Check if the current row is correct.
@@ -507,60 +595,11 @@ namespace NTable {
// Check if we are descending below the current row
if (Y_LIKELY(rowVersion < RowVersion)) {
- // If we previously returned EReady::Data, then we are
- // potentially seeking to an older row version on the same
- // page. We want to optimize for two cases:
- //
- // 1. When the next row is very close, we dont want to perform
- // expensive index seeks, page reloads and binary searches
- //
- // 2. When the next row is far away, we still attempt to find
- // it on the current page, otherwise fallback to a binary
- // search over the whole group.
- for (int linear = 4; linear >= 0; --linear) {
- ++Data;
-
- // Perform binary search on the last iteration
- if (linear == 0 && Data) {
- Data = Page.LookupKey(key, scheme, ESeek::Lower, keyDefaults);
- }
-
- if (!Data) {
- // Row is not on current page, move to the next
- if (auto ready = Index.Next(); ready != EReady::Data) {
- return Terminate(ready);
- }
- if (!checkIndex()) {
- // First row for the next RowId
- MaxVersion = TRowVersion::Max();
- Y_DEBUG_ABORT_UNLESS(rowId < RowId);
- return EReady::Gone;
- }
-
- // Next page has the same row id
- break;
- }
-
- if (!checkData()) {
- // First row for the new RowId
- MaxVersion = TRowVersion::Max();
- Y_DEBUG_ABORT_UNLESS(rowId < RowId);
- return EReady::Gone;
- }
-
- if (RowVersion <= rowVersion) {
- // Save an estimate for MaxVersion
- MaxVersion = rowVersion;
- return EReady::Data;
- }
-
- // This loop may only terminate with a break
- Y_ABORT_UNLESS(linear > 0, "Data binary search bug");
+ if (std::optional<EReady> ready = seekDownToRowVersion(); ready.has_value()) {
+ return *ready;
}
-
- // Save an estimate for MaxVersion
- MaxVersion = rowVersion;
- } else {
+ Y_DEBUG_ABORT_UNLESS(!Data, "Shouldn't continue search with Data");
+ } else {
// High-level iterators never go up once descended to a
// particular version within a specific key. However a
// cached iterator may be reused by seeking to an earlier
@@ -587,27 +626,33 @@ namespace NTable {
return EReady::Data;
}
- // Full binary search
+ // Full index binary search
if (auto ready = Index.Seek(ESeek::Lower, key, keyDefaults); ready != EReady::Data) {
return Terminate(ready);
}
// We need exact match on rowId, bail on larger values
- TRowId indexRowId = Index.GetRecord()->Cell(scheme.ColsKeyIdx[0]).AsValue<TRowId>();
- if (rowId < indexRowId) {
- // We cannot compute MaxVersion anyway
- return Exhausted();
+ Y_DEBUG_ABORT_UNLESS(Index.GetRowId() == 0 || Index.HasKeyCells(), "Non-first page is expected to have key cells");
+ if (Index.HasKeyCells()) {
+ TRowId indexRowId = Index.GetKeyCell(0).AsValue<TRowId>();
+ if (rowId < indexRowId) {
+ // We cannot compute MaxVersion anyway as indexRowId row may be presented on the previous page
+ // and last existing version for rowId is unknown
+ return Exhausted();
+ }
+ } else {
+ // No information about the current index row
+ RowId = Max<TRowId>();
}
if (!LoadPage(Index.GetPageId(), Index.GetRowId())) {
// It's ok to repeat binary search on the next iteration,
// since page faults take a long time and optimizing it
// wouldn't be worth it.
- Data = { };
- RowId = Max<TRowId>();
- return EReady::Page;
+ return Terminate(EReady::Page);
}
+ // Full data binary search
if (Data = Page.LookupKey(key, scheme, ESeek::Lower, keyDefaults)) {
if (!checkData()) {
// First row for the next RowId
@@ -623,20 +668,29 @@ namespace NTable {
return EReady::Data;
}
+ // Our key might be on the next page as lookups are not exact
if (auto ready = Index.Next(); ready != EReady::Data) {
+ // TODO: do not drop current index pages in case of a page fault
+ // Will also repeat index binary search in case of a page fault on the next iteration
return Terminate(ready);
}
- if (!checkIndex()) {
- // First row for the nextRowId
- MaxVersion = TRowVersion::Max();
- Y_DEBUG_ABORT_UNLESS(rowId < RowId);
- return EReady::Gone;
- }
+ Y_DEBUG_ABORT_UNLESS(Index.HasKeyCells(), "Non-first page is expected to have key cells");
+ if (Y_LIKELY(Index.HasKeyCells())) {
+ if (!checkIndex()) {
+ // First row for the nextRowId
+ MaxVersion = TRowVersion::Max();
+ Y_DEBUG_ABORT_UNLESS(rowId < RowId);
+ return EReady::Gone;
+ }
- // The above binary search failed, but since we started with
- // an index search the first row must be the one we want.
- Y_ABORT_UNLESS(RowVersion <= rowVersion, "Index binary search bug");
+ // The above binary search failed, but since we started with
+ // an index search the first row must be the one we want.
+ Y_ABORT_UNLESS(RowVersion <= rowVersion, "Index binary search bug");
+ } else {
+ // No information about the current index row
+ RowId = Max<TRowId>();
+ }
if (!LoadPage(Index.GetPageId(), Index.GetRowId())) {
// We don't want to repeat binary search on the next
@@ -650,12 +704,29 @@ namespace NTable {
}
Data = Page->Begin();
+ Y_ABORT_UNLESS(Data);
- Y_ABORT_UNLESS(Data && checkData() && RowVersion <= rowVersion, "Index and Data are out of sync");
+ if (Index.HasKeyCells()) {
+ // Must have rowId as we have checked index
+ Y_ABORT_UNLESS(checkData() && RowVersion <= rowVersion, "Index and Data are out of sync");
- // Save an estimate for MaxVersion
- MaxVersion = rowVersion;
- return EReady::Data;
+ // Save an estimate for MaxVersion
+ MaxVersion = rowVersion;
+ return EReady::Data;
+ } else {
+ if (checkData()) {
+ Y_ABORT_UNLESS(RowVersion <= rowVersion, "Index and Data are out of sync");
+
+ // Save an estimate for MaxVersion
+ MaxVersion = rowVersion;
+ return EReady::Data;
+ } else {
+ // First row for the nextRowId
+ MaxVersion = TRowVersion::Max();
+ Y_DEBUG_ABORT_UNLESS(rowId < RowId);
+ return EReady::Gone;
+ }
+ }
}
using TPartGroupRowIt::IsValid;
@@ -729,11 +800,6 @@ namespace NTable {
}
}
- void ClearBounds() noexcept
- {
- Main.ClearBounds();
- }
-
void SetBounds(const TSlice& slice) noexcept
{
Main.SetBounds(slice.BeginRowId(), slice.EndRowId());
@@ -768,12 +834,6 @@ namespace NTable {
return Main.SeekToStart();
}
- EReady SeekReverse(TRowId rowId) noexcept
- {
- ClearKey();
- return Main.SeekReverse(rowId);
- }
-
EReady SeekToEnd() noexcept
{
ClearKey();
@@ -1014,8 +1074,7 @@ namespace NTable {
Y_UNREACHABLE();
}
- std::optional<TRowVersion> SkipToCommitted(
- NTable::ITransactionMapSimplePtr committedTransactions,
+ std::optional<TRowVersion> SkipToCommitted(NTable::ITransactionMapSimplePtr committedTransactions,
NTable::ITransactionObserverSimplePtr transactionObserver) noexcept
{
Y_DEBUG_ABORT_UNLESS(Main.IsValid(), "Attempt to use an invalid iterator");
@@ -1229,10 +1288,7 @@ namespace NTable {
Apply(row, pin, data, col);
}
- void Apply(
- TRowState& row,
- TPinout::TPin pin,
- const NPage::TDataPage::TRecord* data,
+ void Apply(TRowState& row, TPinout::TPin pin, const NPage::TDataPage::TRecord* data,
const TPartScheme::TColumn& info) const noexcept
{
auto op = data->GetCellOp(info);
@@ -1532,7 +1588,7 @@ namespace NTable {
ready = SeekToStart();
if (ready == EReady::Page) {
- // we haven't seeked start, will do it again later
+ // we haven't sought start, will do it again later
Current--;
UpdateCurrent();
}
@@ -1564,7 +1620,7 @@ namespace NTable {
ready = SeekToEnd();
if (ready == EReady::Page) {
- // we haven't seeked end, will do it again later
+ // we haven't sought end, will do it again later
Current++;
UpdateCurrent();
}
diff --git a/ydb/core/tablet_flat/test/libs/table/test_envs.h b/ydb/core/tablet_flat/test/libs/table/test_envs.h
index 3735d38a54..3ff8dd4977 100644
--- a/ydb/core/tablet_flat/test/libs/table/test_envs.h
+++ b/ydb/core/tablet_flat/test/libs/table/test_envs.h
@@ -103,7 +103,8 @@ namespace NTest {
const TSharedData* TryGetPage(const TPart* part, TPageId ref, TGroupId groupId) override
{
- auto pass = ShouldPass((const void*)part, ref | (ui64(groupId.Raw()) << 32), part->GetPageType(ref, groupId) == EPage::Index);
+ auto pass = ShouldPass((const void*)part, ref | (ui64(groupId.Raw()) << 32),
+ part->GetPageType(ref, groupId) == EPage::Index || part->GetPageType(ref, groupId) == EPage::BTreeIndex);
return pass ? TTestEnv::TryGetPage(part, ref, groupId) : nullptr;
}
@@ -255,7 +256,7 @@ namespace NTest {
Y_ABORT_UNLESS(groupId.Index < partStore->Store->GetGroupCount());
- if (part->GetPageType(pageId, groupId) == EPage::Index) {
+ if (part->GetPageType(pageId, groupId) == EPage::Index || part->GetPageType(pageId, groupId) == EPage::BTreeIndex) {
return partStore->Store->GetPage(groupId.Index, pageId);
} else {
ui32 room = (groupId.Historic ? partStore->Store->GetRoomCount() : 0) + groupId.Index;
diff --git a/ydb/core/tablet_flat/ut/ut_btree_index.cpp b/ydb/core/tablet_flat/ut/ut_btree_index.cpp
index be9f18ad41..e80a41b900 100644
--- a/ydb/core/tablet_flat/ut/ut_btree_index.cpp
+++ b/ydb/core/tablet_flat/ut/ut_btree_index.cpp
@@ -258,6 +258,9 @@ Y_UNIT_TEST_SUITE(TBtreeIndexNode) {
Dump(serialized, writer.GroupInfo);
CheckKeys(node, keys, writer.GroupInfo);
CheckChildren(node, children);
+
+ UNIT_ASSERT_VALUES_EQUAL(node.GetKeyCellsIter(1, writer.GroupInfo.ColsKeyIdx).At(0).AsValue<ui32>(), 100);
+ UNIT_ASSERT_VALUES_EQUAL(node.GetKeyCellsIter(1, writer.GroupInfo.ColsKeyIdx).At(2).AsValue<bool>(), true);
}
Y_UNIT_TEST(Group) {
@@ -343,6 +346,10 @@ Y_UNIT_TEST_SUITE(TBtreeIndexNode) {
Dump(serialized, writer.GroupInfo);
CheckKeys(node, keys, writer.GroupInfo);
CheckChildren(node, children);
+
+ UNIT_ASSERT_VALUES_EQUAL(node.GetKeyCellsIter(12, writer.GroupInfo.ColsKeyIdx).At(0).AsValue<TRowId>(), 12);
+ UNIT_ASSERT_VALUES_EQUAL(node.GetKeyCellsIter(12, writer.GroupInfo.ColsKeyIdx).At(1).AsValue<ui64>(), 120);
+ UNIT_ASSERT_VALUES_EQUAL(node.GetKeyCellsIter(12, writer.GroupInfo.ColsKeyIdx).At(2).AsValue<ui64>(), 1200);
}
Y_UNIT_TEST(OneKey) {
@@ -886,10 +893,9 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIt) {
}
}
- template<typename TIter>
- EReady SeekRowId(TIter& iter, TRowId rowId, const TString& message, ui32 failsAllowed = 10) {
+ EReady Retry(std::function<EReady()> action, const TString& message, ui32 failsAllowed = 10) {
while (true) {
- if (auto ready = iter.Seek(rowId); ready != EReady::Page) {
+ if (auto ready = action(); ready != EReady::Page) {
return ready;
}
UNIT_ASSERT_C(failsAllowed--, "Too many fails " + message);
@@ -898,22 +904,32 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIt) {
}
template<typename TIter>
+ EReady SeekRowId(TIter& iter, TRowId rowId, const TString& message, ui32 failsAllowed = 10) {
+ return Retry([&]() {
+ return iter.Seek(rowId);
+ }, message, failsAllowed);
+ }
+
+ template<typename TIter>
EReady SeekKey(TIter& iter, ESeek seek, bool reverse, TCells key, const TKeyCellDefaults *keyDefaults, const TString& message, ui32 failsAllowed = 10) {
- const auto doSeek = [&] () {
+ return Retry([&]() {
if (reverse) {
return iter.SeekReverse(seek, key, keyDefaults);
} else {
return iter.Seek(seek, key, keyDefaults);
}
- };
+ }, message, failsAllowed);
+ }
- while (true) {
- if (auto ready = doSeek(); ready != EReady::Page) {
- return ready;
+ template<typename TIter>
+ EReady NextPrev(TIter& iter, bool next, const TString& message, ui32 failsAllowed = 10) {
+ return Retry([&]() {
+ if (next) {
+ return iter.Next();
+ } else {
+ return iter.Prev();
}
- UNIT_ASSERT_C(failsAllowed--, "Too many fails " + message);
- }
- return EReady::Page;
+ }, message, failsAllowed);
}
template<typename TEnv>
@@ -946,10 +962,10 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIt) {
}
template<typename TEnv>
- void CheckSeekKey(const TPartStore& part, ui32 rows, const TKeyCellDefaults *keyDefaults) {
+ void CheckSeekKey(const TPartStore& part, const TKeyCellDefaults *keyDefaults) {
for (bool reverse : {false, true}) {
for (ESeek seek : {ESeek::Exact, ESeek::Lower, ESeek::Upper}) {
- for (ui32 keyId : xrange(0u, rows + 2)) {
+ for (ui32 keyId : xrange(0u, static_cast<ui32>(part.Stat.Rows) + 2)) {
TVector<TCell> key{TCell::Make(keyId / 7), TCell::Make(keyId % 7)};
while (true) {
@@ -977,6 +993,38 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIt) {
}
}
+ template<typename TEnv>
+ void CheckNextPrev(const TPartStore& part) {
+ for (bool next : {true, false}) {
+ for (TRowId rowId : xrange(part.Stat.Rows)) {
+ TEnv env;
+ TPartBtreeIndexIt bTree(&part, &env, { });
+ TPartIndexIt flat(&part, &env, { });
+
+ // checking initial seek:
+ {
+ TString message = TStringBuilder() << "CheckNext<" << typeid(TEnv).name() << "> " << rowId;
+ EReady bTreeReady = SeekRowId(bTree, rowId, message);
+ EReady flatReady = SeekRowId(flat, rowId, message);
+ UNIT_ASSERT_VALUES_EQUAL(bTreeReady, rowId < part.Stat.Rows ? EReady::Data : EReady::Gone);
+ AssertEqual(bTree, bTreeReady, flat, flatReady, message);
+ }
+
+ // checking next:
+ while (true)
+ {
+ TString message = TStringBuilder() << "CheckNext<" << typeid(TEnv).name() << "> " << rowId << " -> " << rowId;
+ EReady bTreeReady = NextPrev(bTree, next, message);
+ EReady flatReady = NextPrev(flat, next, message);
+ AssertEqual(bTree, bTreeReady, flat, flatReady, message);
+ if (flatReady == EReady::Gone) {
+ break;
+ }
+ }
+ }
+ }
+ }
+
void CheckPart(TConf&& conf, ui32 rows, ui32 levels) {
TLayoutCook lay;
@@ -1002,8 +1050,17 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIt) {
CheckSeekRowId<TTestEnv>(part);
CheckSeekRowId<TTouchEnv>(part);
- CheckSeekKey<TTestEnv>(part, rows, eggs.Scheme->Keys.Get());
- CheckSeekKey<TTouchEnv>(part, rows, eggs.Scheme->Keys.Get());
+ CheckSeekKey<TTestEnv>(part, eggs.Scheme->Keys.Get());
+ CheckSeekKey<TTouchEnv>(part, eggs.Scheme->Keys.Get());
+ CheckNextPrev<TTestEnv>(part);
+ CheckNextPrev<TTouchEnv>(part);
+ }
+
+ Y_UNIT_TEST(Conf) {
+ NPage::TConf conf;
+
+ // to not accidentally turn this setting on in trunk
+ UNIT_ASSERT_VALUES_EQUAL(conf.WriteBTreeIndex, false);
}
Y_UNIT_TEST(NoNodes) {
diff --git a/ydb/core/tablet_flat/ut/ut_iterator.cpp b/ydb/core/tablet_flat/ut/ut_iterator.cpp
index 7a4e6b3c6f..459074b6cb 100644
--- a/ydb/core/tablet_flat/ut/ut_iterator.cpp
+++ b/ydb/core/tablet_flat/ut/ut_iterator.cpp
@@ -24,7 +24,8 @@ namespace {
NPage::TConf conf;
conf.Group(0).PageSize = page;
- conf.LargeEdge = 29; /* neet to cover external blob usage */
+ conf.Group(0).BTreeIndexNodeTargetSize = 128;
+ conf.LargeEdge = 29; /* need to cover external blob usage */
return conf;
}
diff --git a/ydb/core/tablet_flat/ut/ut_part.cpp b/ydb/core/tablet_flat/ut/ut_part.cpp
index cbb1b10283..3de1b3e3d9 100644
--- a/ydb/core/tablet_flat/ut/ut_part.cpp
+++ b/ydb/core/tablet_flat/ut/ut_part.cpp
@@ -23,6 +23,7 @@ namespace {
conf.Groups.resize(groups);
for (size_t group : xrange(groups)) {
conf.Group(group).IndexMin = 1024; /* Should cover index buffer grow code */
+ conf.Group(group).BTreeIndexNodeTargetSize = 512; /* Should cover up/down moves */
}
conf.SmallEdge = 19; /* Packed to page collection large cell values */
conf.LargeEdge = 29; /* Large values placed to single blobs */
@@ -416,7 +417,7 @@ Y_UNIT_TEST_SUITE(TPart) {
UNIT_ASSERT(one && one == part.Store->PageCollectionPagesCount(part.Store->GetOuterRoom()));
}
- { /*_ Enusre there is some rows with two cells with references */
+ { /*_ Ensure that there is some rows with two cells with references */
ui32 large = cWidth(Eggs0().Lone()->Large.Get(), 2);
ui32 small = cWidth(Eggs0().Lone()->Small.Get(), 2);
@@ -425,18 +426,24 @@ Y_UNIT_TEST_SUITE(TPart) {
UNIT_ASSERT_C(large > 10, "Eggs0 has trivial external blobs set");
}
- { /*_ Check that the last key matches in the index */
+ { /*_ Ensure that the last key matches in the index */
UNIT_ASSERT_VALUES_EQUAL(IndexTools::GetEndRowId(part), Mass0().Saved.Size());
const NPage::TCompare<NPage::TIndex::TRecord> cmp(part.Scheme->Groups[0].ColsKeyIdx, *Eggs0().Scheme->Keys);
auto lastKey = TRowTool(*Eggs0().Scheme).KeyCells(Mass0().Saved[Mass0().Saved.Size()-1]);
UNIT_ASSERT_VALUES_EQUAL(cmp.Compare(*IndexTools::GetLastRecord(part), lastKey), 0);
}
- { /*_ Check that part has correct number of slices */
+ { /*_ Ensure that part has correct number of slices */
UNIT_ASSERT_C(part.Slices, "Part was generated without slices");
UNIT_ASSERT_C(part.Slices->size() > 1, "Slice items " << +part.Slices->size());
}
+ { /*_ Ensure that B-Tree index has enough layers */
+ if (part.IndexPages.BTreeGroups.size()) {
+ UNIT_ASSERT_VALUES_EQUAL(part.IndexPages.BTreeGroups[0].LevelsCount, 3);
+ }
+ }
+
DumpPart(*Eggs0().Lone(), 1);
}
diff --git a/ydb/core/tablet_flat/ut/ut_versions.cpp b/ydb/core/tablet_flat/ut/ut_versions.cpp
index 6e628a7c40..e8d24107d6 100644
--- a/ydb/core/tablet_flat/ut/ut_versions.cpp
+++ b/ydb/core/tablet_flat/ut/ut_versions.cpp
@@ -25,6 +25,7 @@ namespace {
conf.Groups.resize(groups);
for (size_t group : xrange(groups)) {
conf.Group(group).IndexMin = 1024; /* Should cover index buffer grow code */
+ conf.Group(group).BTreeIndexNodeTargetSize = 128; /* Should cover up/down moves */
}
conf.SmallEdge = 19; /* Packed to page collection large cell values */
conf.LargeEdge = 29; /* Large values placed to single blobs */
diff --git a/ydb/core/tablet_flat/ya.make b/ydb/core/tablet_flat/ya.make
index 12d88cc87e..5738a4bf35 100644
--- a/ydb/core/tablet_flat/ya.make
+++ b/ydb/core/tablet_flat/ya.make
@@ -44,6 +44,7 @@ SRCS(
flat_page_label.cpp
flat_part_dump.cpp
flat_part_iter_multi.cpp
+ flat_part_index_iter_create.cpp
flat_part_loader.cpp
flat_part_overlay.cpp
flat_part_outset.cpp