diff options
author | kungasc <kungasc@yandex-team.com> | 2023-12-01 14:34:07 +0300 |
---|---|---|
committer | kungasc <kungasc@yandex-team.com> | 2023-12-01 16:30:55 +0300 |
commit | ebe7a9b148efe82f86ed8ee5efbc3c3d40ef5833 (patch) | |
tree | 569d5e3130380f753cef63e4d5ecc33a9789c17e | |
parent | e766ad595b8580e92d8c04f360d1847a1395201e (diff) | |
download | ydb-ebe7a9b148efe82f86ed8ee5efbc3c3d40ef5833.tar.gz |
KIKIMR-19521 BTreeIndex ShortChild
Экономия ~26% для групп и ~16% для истории
-rw-r--r-- | ydb/core/tablet_flat/flat_page_btree_index.h | 72 | ||||
-rw-r--r-- | ydb/core/tablet_flat/flat_page_btree_index_writer.h | 47 | ||||
-rw-r--r-- | ydb/core/tablet_flat/flat_part_loader.cpp | 4 | ||||
-rw-r--r-- | ydb/core/tablet_flat/flat_part_writer.h | 5 | ||||
-rw-r--r-- | ydb/core/tablet_flat/test/libs/table/test_writer.h | 4 | ||||
-rw-r--r-- | ydb/core/tablet_flat/ut/ut_btree_index.cpp | 37 |
6 files changed, 113 insertions, 56 deletions
diff --git a/ydb/core/tablet_flat/flat_page_btree_index.h b/ydb/core/tablet_flat/flat_page_btree_index.h index 51e4902d53..968fb5f9c7 100644 --- a/ydb/core/tablet_flat/flat_page_btree_index.h +++ b/ydb/core/tablet_flat/flat_page_btree_index.h @@ -53,7 +53,10 @@ namespace NKikimr::NTable::NPage { struct THeader { TRecIdx KeysCount; TPgSize KeysSize; - ui8 FixedKeySize; + ui8 IsShortChildFormat : 1; + ui8 FixedKeySize : 7; + + const static ui8 MaxFixedKeySize = 127; } Y_PACKED; static_assert(sizeof(THeader) == 9, "Invalid TBtreeIndexNode THeader size"); @@ -80,23 +83,36 @@ namespace NKikimr::NTable::NPage { static_assert(sizeof(TIsNullBitmap) == 1, "Invalid TBtreeIndexNode TIsNullBitmap size"); + struct TShortChild { + TPageId PageId; + TRowId Count; + ui64 DataSize; + + auto operator<=>(const TShortChild&) const = default; + } Y_PACKED; + + static_assert(sizeof(TShortChild) == 20, "Invalid TBtreeIndexNode TShortChild size"); + struct TChild { TPageId PageId; TRowId Count; - TRowId ErasedCount; ui64 DataSize; + TRowId ErasedCount; auto operator<=>(const TChild&) const = default; TString ToString() const noexcept { - // copy values to prevent 'reference binding to misaligned address' error - return TStringBuilder() << "PageId: " << TPageId(PageId) << " Count: " << TRowId(Count) << " Erased: " << TRowId(ErasedCount) << " DataSize: " << ui64(DataSize); + return TStringBuilder() << "PageId: " << PageId << " Count: " << Count << " DataSize: " << DataSize << " Erased: " << ErasedCount; } } Y_PACKED; static_assert(sizeof(TChild) == 28, "Invalid TBtreeIndexNode TChild size"); + static_assert(offsetof(TChild, PageId) == offsetof(TShortChild, PageId)); + static_assert(offsetof(TChild, Count) == offsetof(TShortChild, Count)); + static_assert(offsetof(TChild, DataSize) == offsetof(TShortChild, DataSize)); + #pragma pack(pop) struct TCellsIter { @@ -242,7 +258,7 @@ namespace NKikimr::NTable::NPage { offset += Header->KeysSize; Children = TDeref<const TChild>::At(Header, offset); - offset += (1 + Header->KeysCount) * sizeof(TChild); + offset += (1 + Header->KeysCount) * (Header->IsShortChildFormat ? sizeof(TShortChild) : sizeof(TChild)); Y_ABORT_UNLESS(offset == data.Page.size()); } @@ -254,7 +270,7 @@ namespace NKikimr::NTable::NPage { bool IsFixedFormat() const noexcept { - return Header->FixedKeySize != Max<ui8>(); + return Header->FixedKeySize != Header->MaxFixedKeySize; } TRecIdx GetKeysCount() const noexcept @@ -277,9 +293,23 @@ namespace NKikimr::NTable::NPage { return GetCells<TCellsIter>(pos, columns); } - const TChild& GetChild(TRecIdx pos) const noexcept + const TShortChild& GetShortChild(TRecIdx pos) const noexcept { - return Children[pos]; + if (Header->IsShortChildFormat) { + return *TDeref<const TShortChild>::At(Children, pos * sizeof(TShortChild)); + } else { + return *TDeref<const TShortChild>::At(Children, pos * sizeof(TChild)); + } + } + + TChild GetChild(TRecIdx pos) const noexcept + { + if (Header->IsShortChildFormat) { + const TShortChild* const shortChild = TDeref<const TShortChild>::At(Children, pos * sizeof(TShortChild)); + return { shortChild->PageId, shortChild->Count, shortChild->DataSize, 0 }; + } else { + return *TDeref<const TChild>::At(Children, pos * sizeof(TChild)); + } } static bool Has(TRowId rowId, TRowId beginRowId, TRowId endRowId) noexcept { @@ -294,27 +324,27 @@ namespace NKikimr::NTable::NPage { on = { }; } - const auto cmp = [](TRowId rowId, const TChild& child) { - return rowId < child.Count; + auto range = xrange(0u, childrenCount); + const auto cmp = [this](TRowId rowId, TPos pos) { + return rowId < GetShortChild(pos).Count; }; TRecIdx result; if (!on) { - // Use a full binary search - result = std::upper_bound(Children, Children + childrenCount, rowId, cmp) - Children; - } else if (Children[*on].Count <= rowId) { + // Will do a full binary search on full range + } else if (GetShortChild(*on).Count <= rowId) { // Try a short linear search first result = *on; for (int linear = 0; linear < 4; ++linear) { result++; Y_ABORT_UNLESS(result < childrenCount, "Should always seek some child"); - if (Children[result].Count > rowId) { + if (GetShortChild(result).Count > rowId) { return result; } } - // Binary search from the next record - result = std::upper_bound(Children + result + 1, Children + childrenCount, rowId, cmp) - Children; + // Will do a binary search from the next record + range = xrange(result + 1, childrenCount); } else { // Children[*on].Count > rowId // Try a short linear search first result = *on; @@ -322,16 +352,18 @@ namespace NKikimr::NTable::NPage { if (result == 0) { return 0; } - if (Children[result - 1].Count <= rowId) { + if (GetShortChild(result - 1).Count <= rowId) { return result; } result--; } - // Binary search up to current record - result = std::upper_bound(Children, Children + result, rowId, cmp) - Children; + // Will do a binary search up to current record + range = xrange(0u, result); } + result = *std::upper_bound(range.begin(), range.end(), rowId, cmp); + Y_ABORT_UNLESS(result < childrenCount, "Should always seek some child"); return result; } @@ -428,7 +460,7 @@ namespace NKikimr::NTable::NPage { const THeader* Header = nullptr; const void* Keys = nullptr; const TRecordsEntry* Offsets = nullptr; - const TChild* Children = nullptr; + const void* Children = nullptr; }; struct TBtreeIndexMeta : public TBtreeIndexNode::TChild { diff --git a/ydb/core/tablet_flat/flat_page_btree_index_writer.h b/ydb/core/tablet_flat/flat_page_btree_index_writer.h index fd855f18f9..0c7b084f05 100644 --- a/ydb/core/tablet_flat/flat_page_btree_index_writer.h +++ b/ydb/core/tablet_flat/flat_page_btree_index_writer.h @@ -8,6 +8,7 @@ namespace NKikimr::NTable::NPage { class TBtreeIndexNodeWriter { using THeader = TBtreeIndexNode::THeader; using TIsNullBitmap = TBtreeIndexNode::TIsNullBitmap; + using TShortChild = TBtreeIndexNode::TShortChild; using TChild = TBtreeIndexNode::TChild; public: @@ -17,20 +18,26 @@ namespace NKikimr::NTable::NPage { , GroupInfo(Scheme->GetLayout(groupId)) { if (GroupId.IsMain()) { - FixedKeySize = Max<ui8>(); + // TODO: some main groups without nulls and var-sized cells also may use fixed format + FixedKeySize = TBtreeIndexNode::THeader::MaxFixedKeySize; } else { FixedKeySize = 0; for (TPos pos : xrange(GroupInfo.KeyTypes.size())) { Y_ABORT_UNLESS(GroupInfo.ColsKeyIdx[pos].IsFixed); FixedKeySize += GroupInfo.ColsKeyIdx[pos].FixedSize; } - Y_ABORT_UNLESS(FixedKeySize < Max<ui8>(), "KeysSize is out of bounds"); + Y_ABORT_UNLESS(FixedKeySize < TBtreeIndexNode::THeader::MaxFixedKeySize, "FixedKeySize is out of bounds"); } } bool IsFixedFormat() const noexcept { - return FixedKeySize != Max<ui8>(); + return FixedKeySize != TBtreeIndexNode::THeader::MaxFixedKeySize; + } + + bool IsShortChildFormat() const noexcept + { + return !GroupId.IsMain(); } void AddKey(TCellsRef cells) { @@ -43,6 +50,7 @@ namespace NKikimr::NTable::NPage { } void AddChild(TChild child) { + Y_ABORT_UNLESS(child.ErasedCount == 0 || !IsShortChildFormat(), "Short format can't have ErasedCount"); Children.push_back(child); } @@ -95,6 +103,7 @@ namespace NKikimr::NTable::NPage { header.KeysCount = Keys.size(); Y_ABORT_UNLESS(KeysSize < Max<TPgSize>(), "KeysSize is out of bounds"); header.KeysSize = KeysSize; + header.IsShortChildFormat = IsShortChildFormat(); header.FixedKeySize = FixedKeySize; if (!IsFixedFormat()) { @@ -118,7 +127,10 @@ namespace NKikimr::NTable::NPage { Keys.clear(); KeysSize = 0; - PlaceVector(Children); + for (auto &child : Children) { + PlaceChild(child); + } + Children.clear(); Y_ABORT_UNLESS(Ptr == End); NSan::CheckMemIsInitialized(buf.data(), buf.size()); @@ -137,7 +149,7 @@ namespace NKikimr::NTable::NPage { sizeof(TLabel) + sizeof(THeader) + (IsFixedFormat() ? 0 : sizeof(TRecordsEntry) * keysCount) + keysSize + - sizeof(TChild) * (keysCount + 1); + (IsShortChildFormat() ? sizeof(TShortChild) : sizeof(TChild)) * (keysCount + 1); } size_t GetKeysCount() const { @@ -145,7 +157,10 @@ namespace NKikimr::NTable::NPage { } TPgSize CalcKeySizeWithMeta(TCellsRef cells) const noexcept { - return sizeof(TRecordsEntry) + CalcKeySize(cells) + sizeof(TChild); + return + sizeof(TRecordsEntry) + + CalcKeySize(cells) + + (IsShortChildFormat() ? sizeof(TShortChild) : sizeof(TChild)); } private: @@ -231,12 +246,13 @@ namespace NKikimr::NTable::NPage { std::copy(data.data(), data.data() + data.size(), Advance(data.size())); } - template<typename T> - void PlaceVector(TVector<T> &vector) noexcept + void PlaceChild(const TChild& child) noexcept { - auto *dst = reinterpret_cast<T*>(Advance(sizeof(T)*vector.size())); - std::copy(vector.begin(), vector.end(), dst); - vector.clear(); + if (IsShortChildFormat()) { + Place<TShortChild>() = TShortChild{child.PageId, child.Count, child.DataSize}; + } else { + Place<TChild>() = child; + } } template<typename T> @@ -277,6 +293,7 @@ namespace NKikimr::NTable::NPage { class TBtreeIndexBuilder { public: + using TShortChild = TBtreeIndexNode::TShortChild; using TChild = TBtreeIndexNode::TChild; private: @@ -357,11 +374,15 @@ namespace NKikimr::NTable::NPage { Levels[0].PushKey(Writer.SerializeKey(cells)); } + void AddShortChild(TShortChild child) { + AddChild(TChild{child.PageId, child.Count, child.DataSize, 0}); + } + void AddChild(TChild child) { // aggregate in order to perform search by row id from any leaf node child.Count = (ChildrenCount += child.Count); - child.ErasedCount = (ChildrenErasedCount += child.ErasedCount); child.DataSize = (ChildrenSize += child.DataSize); + child.ErasedCount = (ChildrenErasedCount += child.ErasedCount); Levels[0].PushChild(child); } @@ -441,7 +462,7 @@ namespace NKikimr::NTable::NPage { if (levelIndex + 1 == Levels.size()) { Levels.emplace_back(); } - Levels[levelIndex + 1].PushChild(TChild{pageId, lastChild.Count, lastChild.ErasedCount, lastChild.DataSize}); + Levels[levelIndex + 1].PushChild(TChild{pageId, lastChild.Count, lastChild.DataSize, lastChild.ErasedCount}); if (!last) { Levels[levelIndex + 1].PushKey(Levels[levelIndex].PopKey()); } diff --git a/ydb/core/tablet_flat/flat_part_loader.cpp b/ydb/core/tablet_flat/flat_part_loader.cpp index 3e636dcb4b..b6de80b949 100644 --- a/ydb/core/tablet_flat/flat_part_loader.cpp +++ b/ydb/core/tablet_flat/flat_part_loader.cpp @@ -83,8 +83,8 @@ void TLoader::StageParseMeta() noexcept NPage::TBtreeIndexMeta converted{{ meta.GetRootPageId(), meta.GetCount(), - meta.GetErasedCount(), - meta.GetDataSize()}, + meta.GetDataSize(), + meta.GetErasedCount()}, meta.GetLevelsCount(), meta.GetIndexSize()}; (history ? BTreeHistoricIndexes : BTreeGroupIndexes).push_back(converted); diff --git a/ydb/core/tablet_flat/flat_part_writer.h b/ydb/core/tablet_flat/flat_part_writer.h index 4b2e4723da..e35c2979b0 100644 --- a/ydb/core/tablet_flat/flat_part_writer.h +++ b/ydb/core/tablet_flat/flat_part_writer.h @@ -796,11 +796,10 @@ namespace NTable { g.BTreeIndex.AddKey(Key); } if (groupId.IsMain()) { - g.BTreeIndex.AddChild({page, dataPage->Count, Current.BTreeIndexErased, raw.size()}); + g.BTreeIndex.AddChild({page, dataPage->Count, raw.size(), Current.BTreeIndexErased}); Current.BTreeIndexErased = 0; } else { - // TODO: don't write erased for non-main groups - g.BTreeIndex.AddChild({page, dataPage->Count, 0, raw.size()}); + g.BTreeIndex.AddShortChild({page, dataPage->Count, raw.size()}); } g.BTreeIndex.Flush(Pager, false); } diff --git a/ydb/core/tablet_flat/test/libs/table/test_writer.h b/ydb/core/tablet_flat/test/libs/table/test_writer.h index cf1567b699..4a49364708 100644 --- a/ydb/core/tablet_flat/test/libs/table/test_writer.h +++ b/ydb/core/tablet_flat/test/libs/table/test_writer.h @@ -130,8 +130,8 @@ namespace NTest { NPage::TBtreeIndexMeta converted{{ meta.GetRootPageId(), meta.GetCount(), - meta.GetErasedCount(), - meta.GetDataSize()}, + meta.GetDataSize(), + meta.GetErasedCount()}, meta.GetLevelsCount(), meta.GetIndexSize()}; (history ? BTreeHistoricIndexes : BTreeGroupIndexes).push_back(converted); diff --git a/ydb/core/tablet_flat/ut/ut_btree_index.cpp b/ydb/core/tablet_flat/ut/ut_btree_index.cpp index e80a41b900..45d173db70 100644 --- a/ydb/core/tablet_flat/ut/ut_btree_index.cpp +++ b/ydb/core/tablet_flat/ut/ut_btree_index.cpp @@ -9,6 +9,7 @@ namespace NKikimr::NTable::NPage { namespace { using namespace NTest; + using TShortChild = TBtreeIndexNode::TShortChild; using TChild = TBtreeIndexNode::TChild; struct TTouchEnv : public NTest::TTestEnv { @@ -68,7 +69,7 @@ namespace { } TChild MakeChild(ui32 index) { - return TChild{index + 10000, index + 100, index + 30, index + 1000}; + return TChild{index + 10000, index + 100, index + 1000, index + 30}; } void Dump(TChild meta, const TPartScheme::TGroupInfo& groupInfo, const TStore& store, ui32 level = 0) noexcept @@ -154,6 +155,8 @@ namespace { UNIT_ASSERT_VALUES_EQUAL(node.GetKeysCount() + 1, children.size()); for (TRecIdx i : xrange(node.GetKeysCount() + 1)) { UNIT_ASSERT_EQUAL(node.GetChild(i), children[i]); + TShortChild shortChild{children[i].PageId, children[i].Count, children[i].DataSize}; + UNIT_ASSERT_EQUAL(node.GetShortChild(i), shortChild); } } } @@ -291,7 +294,8 @@ Y_UNIT_TEST_SUITE(TBtreeIndexNode) { TSerializedCellVec deserialized(k); writer.AddKey(deserialized.GetCells()); } - for (auto c : children) { + for (auto &c : children) { + c.ErasedCount = 0; writer.AddChild(c); } @@ -335,7 +339,8 @@ Y_UNIT_TEST_SUITE(TBtreeIndexNode) { TSerializedCellVec deserialized(k); writer.AddKey(deserialized.GetCells()); } - for (auto c : children) { + for (auto &c : children) { + c.ErasedCount = 0; writer.AddChild(c); } @@ -534,7 +539,7 @@ Y_UNIT_TEST_SUITE(TBtreeIndexBuilder) { Dump(*result, builder.GroupInfo, pager.Back()); - TBtreeIndexMeta expected{{0, 1155, 385, 11055}, 1, 595}; + TBtreeIndexMeta expected{{0, 1155, 11055, 385}, 1, 595}; UNIT_ASSERT_EQUAL_C(*result, expected, "Got " + result->ToString()); CheckKeys(result->PageId, keys, builder.GroupInfo, pager.Back()); @@ -621,8 +626,8 @@ Y_UNIT_TEST_SUITE(TBtreeIndexBuilder) { TBtreeIndexMeta expected{{9, 0, 0, 0}, 3, 1550}; for (auto c : children) { expected.Count += c.Count; - expected.ErasedCount += c.ErasedCount; expected.DataSize += c.DataSize; + expected.ErasedCount += c.ErasedCount; } UNIT_ASSERT_EQUAL_C(*result, expected, "Got " + result->ToString()); } @@ -657,7 +662,7 @@ Y_UNIT_TEST_SUITE(TBtreeIndexBuilder) { Dump(*result, builder.GroupInfo, pager.Back()); - TBtreeIndexMeta expected{{15, 15150, 8080, 106050}, 3, 10270}; + TBtreeIndexMeta expected{{15, 15150, 106050, 8080}, 3, 10270}; UNIT_ASSERT_EQUAL_C(*result, expected, "Got " + result->ToString()); } @@ -691,7 +696,7 @@ Y_UNIT_TEST_SUITE(TBtreeIndexTPart) { auto pages = IndexTools::CountMainPages(*part); UNIT_ASSERT_VALUES_EQUAL(pages, 1); - TBtreeIndexMeta expected{{0 /*Data page*/, 5, 0, 5240}, 0, 0}; + TBtreeIndexMeta expected{{0 /*Data page*/, 5, 5240, 0}, 0, 0}; UNIT_ASSERT_EQUAL_C(part->IndexPages.BTreeGroups[0], expected, "Got " + part->IndexPages.BTreeGroups[0].ToString()); } @@ -721,7 +726,7 @@ Y_UNIT_TEST_SUITE(TBtreeIndexTPart) { auto pages = IndexTools::CountMainPages(*part); UNIT_ASSERT_VALUES_EQUAL(pages, 2); - TBtreeIndexMeta expected{{3, 10, 0, 10480}, 1, 1115}; + TBtreeIndexMeta expected{{3, 10, 10480, 0}, 1, 1115}; UNIT_ASSERT_EQUAL_C(part->IndexPages.BTreeGroups[0], expected, "Got " + part->IndexPages.BTreeGroups[0].ToString()); } @@ -754,7 +759,7 @@ Y_UNIT_TEST_SUITE(TBtreeIndexTPart) { auto pages = IndexTools::CountMainPages(*part); UNIT_ASSERT_VALUES_EQUAL(pages, 117); - TBtreeIndexMeta expected{{143, 700, 0, 733140}, 3, 86036}; + TBtreeIndexMeta expected{{143, 700, 733140, 0}, 3, 86036}; UNIT_ASSERT_EQUAL_C(part->IndexPages.BTreeGroups[0], expected, "Got " + part->IndexPages.BTreeGroups[0].ToString()); } @@ -788,7 +793,7 @@ Y_UNIT_TEST_SUITE(TBtreeIndexTPart) { auto pages = IndexTools::CountMainPages(*part); UNIT_ASSERT_VALUES_EQUAL(pages, 31); - TBtreeIndexMeta expected{{37, 1000, 143, 22098}, 2, 1380}; + TBtreeIndexMeta expected{{37, 1000, 22098, 143}, 2, 1380}; UNIT_ASSERT_EQUAL_C(part->IndexPages.BTreeGroups[0], expected, "Got " + part->IndexPages.BTreeGroups[0].ToString()); } @@ -822,10 +827,10 @@ Y_UNIT_TEST_SUITE(TBtreeIndexTPart) { auto pages = IndexTools::CountMainPages(*part); UNIT_ASSERT_VALUES_EQUAL(pages, 334); - TBtreeIndexMeta expected0{{438, 1000, 0, 16680}, 3, 15246}; + TBtreeIndexMeta expected0{{438, 1000, 16680, 0}, 3, 15246}; UNIT_ASSERT_EQUAL_C(part->IndexPages.BTreeGroups[0], expected0, "Got " + part->IndexPages.BTreeGroups[0].ToString()); - TBtreeIndexMeta expected1{{441, 1000, 0, 21890}, 3, 8817}; + TBtreeIndexMeta expected1{{441, 1000, 21890, 0}, 3, 6497}; UNIT_ASSERT_EQUAL_C(part->IndexPages.BTreeGroups[1], expected1, "Got " + part->IndexPages.BTreeGroups[1].ToString()); } @@ -861,16 +866,16 @@ Y_UNIT_TEST_SUITE(TBtreeIndexTPart) { auto pages = IndexTools::CountMainPages(*part); UNIT_ASSERT_VALUES_EQUAL(pages, 334); - TBtreeIndexMeta expected0{{1315, 1000, 0, 32680}, 3, 15246}; + TBtreeIndexMeta expected0{{1315, 1000, 32680, 0}, 3, 15246}; UNIT_ASSERT_EQUAL_C(part->IndexPages.BTreeGroups[0], expected0, "Got " + part->IndexPages.BTreeGroups[0].ToString()); - TBtreeIndexMeta expected1{{1318, 1000, 0, 22889}, 3, 8817}; + TBtreeIndexMeta expected1{{1318, 1000, 22889, 0}, 3, 6497}; UNIT_ASSERT_EQUAL_C(part->IndexPages.BTreeGroups[1], expected1, "Got " + part->IndexPages.BTreeGroups[1].ToString()); - TBtreeIndexMeta expectedHist0{{1322, 2000, 0, 77340}, 4, 40617}; + TBtreeIndexMeta expectedHist0{{1322, 2000, 77340, 0}, 4, 34225}; UNIT_ASSERT_EQUAL_C(part->IndexPages.BTreeHistoric[0], expectedHist0, "Got " + part->IndexPages.BTreeHistoric[0].ToString()); - TBtreeIndexMeta expectedHist1{{1325, 2000, 0, 45780}, 3, 17662}; + TBtreeIndexMeta expectedHist1{{1325, 2000, 45780, 0}, 3, 13014}; UNIT_ASSERT_EQUAL_C(part->IndexPages.BTreeHistoric[1], expectedHist1, "Got " + part->IndexPages.BTreeHistoric[1].ToString()); } } |