diff options
author | kungasc <kungasc@yandex-team.com> | 2023-10-13 13:04:20 +0300 |
---|---|---|
committer | kungasc <kungasc@yandex-team.com> | 2023-10-13 14:04:34 +0300 |
commit | cf4d301c44398f63b387618472918acfff0dc87c (patch) | |
tree | 5c69dc325ea0a39d4ff2f2ffdc52ff0faaff57af | |
parent | 7ee659757df24a301c15da203481cd5b60e0bf67 (diff) | |
download | ydb-cf4d301c44398f63b387618472918acfff0dc87c.tar.gz |
KIKIMR-19521 BTreeIndex Node
-rw-r--r-- | ydb/core/tablet_flat/flat_page_btree_index.h | 180 | ||||
-rw-r--r-- | ydb/core/tablet_flat/flat_page_btree_index_writer.h | 203 | ||||
-rw-r--r-- | ydb/core/tablet_flat/flat_page_iface.h | 1 | ||||
-rw-r--r-- | ydb/core/tablet_flat/flat_part_scheme.h | 2 | ||||
-rw-r--r-- | ydb/core/tablet_flat/ut/CMakeLists.darwin-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/core/tablet_flat/ut/CMakeLists.linux-aarch64.txt | 1 | ||||
-rw-r--r-- | ydb/core/tablet_flat/ut/CMakeLists.linux-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/core/tablet_flat/ut/CMakeLists.windows-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/core/tablet_flat/ut/ut_btree_index.cpp | 305 | ||||
-rw-r--r-- | ydb/core/tablet_flat/ut/ya.make | 1 |
10 files changed, 695 insertions, 1 deletions
diff --git a/ydb/core/tablet_flat/flat_page_btree_index.h b/ydb/core/tablet_flat/flat_page_btree_index.h new file mode 100644 index 0000000000..749a6bab86 --- /dev/null +++ b/ydb/core/tablet_flat/flat_page_btree_index.h @@ -0,0 +1,180 @@ +#pragma once + +#include <ydb/core/base/defs.h> +#include <util/generic/bitmap.h> +#include "flat_page_base.h" +#include "flat_page_label.h" + +namespace NKikimr::NTable::NPage { + + /* + TKey binary layout + .---------.---------------. + | Non-null cell bitmap | for all schema key columns + .---------.---------------. -. + | value OR offs | col_1 | + .---------.---------------. | + | . . . | | for each non-null column + .---------.---------------. | + | value OR offs | col_K | + .-.------.--.-------.-----. -' + | | | | | | var-size values + '-'------'--'-------'-----' + + TBtreeIndexNode page binary layout + - TLabel - page label + - THeader - page header + - TKey[N] - keys data <-- var-size + - TPgSize[N] - keys offsets + - TChild[N+1] - children + */ + + class TBtreeIndexNode { + public: + using TColumns = TArrayRef<const TPartScheme::TColumn>; + +#pragma pack(push,1) + public: + struct THeader { + TRecIdx KeysCount; + TPgSize KeysSize; + } Y_PACKED; + + static_assert(sizeof(THeader) == 8, "Invalid TBtreeIndexNode THeader size"); + + struct TKey { + struct TCellsIter { + TCellsIter(const TKey* key, TColumns columns) + : Key(key) + , Columns(columns) + , Pos(0) + { + Ptr = (char*)Key + key->NullBitmapLength(columns.size()); + } + + TPos Count() + { + return Columns.size(); + } + + TCell Next() + { + Y_ABORT_UNLESS(Pos < Columns.size()); + + if (Key->IsNull(Pos)) { + Pos++; + return { }; + } + + TCell result; + + if (Columns[Pos].IsFixed) { + result = TCell(Ptr, Columns[Pos].FixedSize); + } else { + auto *ref = TDeref<TDataRef>::At(Ptr); + result = TCell(TDeref<const char>::At(Ptr, ref->Offset), ref->Size); + } + Ptr += Columns[Pos].FixedSize; // fixed data or data ref size + Pos++; + + return result; + } + + private: + const TKey* const Key; + const TColumns Columns; + TPos Pos; + const char* Ptr; + }; + + static TPos NullBitmapLength(TPos capacity) { + // round up (capacity / 8) + return (capacity + 7) >> 3; + } + + bool IsNull(TPos pos) const { + ui8 x = IsNullBitmap[pos >> 3]; + return (x >> (pos & 7)) & 1; + } + + void SetNull(TPos pos) { + ui8& x = IsNullBitmap[pos >> 3]; + x |= (1 << (pos & 7)); + } + + // 1 = null + ui8 IsNullBitmap[0]; + } Y_PACKED; + + static_assert(sizeof(TKey) == 0, "Invalid TBtreeIndexNode TKey size"); + + struct TChild { + TPageId PageId; + TRowId Count; + TRowId ErasedCount; + ui64 Size; + + auto operator<=>(const TChild&) const = default; + + TString ToString() const noexcept + { + return TStringBuilder() << "PageId: " << PageId << " Count: " << Count << " Size: " << Size; + } + } Y_PACKED; + + static_assert(sizeof(TChild) == 28, "Invalid TBtreeIndexNode TChild size"); + +#pragma pack(pop) + + public: + TBtreeIndexNode(TSharedData raw) + : Raw(std::move(raw)) + { + const auto data = NPage::TLabelWrapper().Read(Raw, EPage::BTreeIndex); + Y_ABORT_UNLESS(data == ECodec::Plain && data.Version == 0); + + auto *header = TDeref<const THeader>::At(data.Page.data()); + Keys.Count = header->KeysCount; + size_t offset = sizeof(THeader); + + Keys.Base = Raw.data(); + offset += header->KeysSize; + + Keys.Offsets = TDeref<const TRecordsEntry>::At(header, offset); + offset += Keys.Count * sizeof(TRecordsEntry); + + Children = TDeref<const TChild>::At(header, offset); + offset += (1 + Keys.Count) * sizeof(TChild); + + Y_ABORT_UNLESS(offset == data.Page.size()); + } + + NPage::TLabel Label() const noexcept + { + return ReadUnaligned<NPage::TLabel>(Raw.data()); + } + + TRecIdx GetKeysCount() const noexcept + { + return Keys.Count; + } + + TKey::TCellsIter GetKeyCells(TRecIdx pos, TColumns columns) const noexcept + { + return TKey::TCellsIter(Keys.Record(pos), columns); + } + + const TChild& GetChild(TPos pos) const noexcept + { + return Children[pos]; + } + + // TODO: Seek methods will go here + + private: + TSharedData Raw; + TBlockWithRecords<TKey> Keys; + const TChild* Children; + }; + +} diff --git a/ydb/core/tablet_flat/flat_page_btree_index_writer.h b/ydb/core/tablet_flat/flat_page_btree_index_writer.h new file mode 100644 index 0000000000..da13588152 --- /dev/null +++ b/ydb/core/tablet_flat/flat_page_btree_index_writer.h @@ -0,0 +1,203 @@ +#pragma once + +#include "flat_page_btree_index.h" + +namespace NKikimr::NTable::NPage { + + class TBtreeIndexNodeWriter { + using THeader = TBtreeIndexNode::THeader; + using TKey = TBtreeIndexNode::TKey; + using TChild = TBtreeIndexNode::TChild; + + public: + TBtreeIndexNodeWriter(TIntrusiveConstPtr<TPartScheme> scheme, TGroupId groupId) + : Scheme(std::move(scheme)) + , GroupId(groupId) + , GroupInfo(Scheme->GetLayout(groupId)) + { + } + + // TODO: pass serialized key + size from TBtreeIndexBuilder + void AddKey(TString serializedKey) { + TSerializedCellVec key(serializedKey); + KeysSize += CalcSize(key.GetCells()); + // TODO: serialize to page bytes directly + Keys.push_back(serializedKey); + } + + void AddChild(TChild child) { + Children.push_back(child); + } + + TSharedData Finish() { + Y_ABORT_UNLESS(Keys.size()); + Y_ABORT_UNLESS(Children.size() == Keys.size() + 1); + + size_t childSize = sizeof(TChild); + size_t pageSize = + sizeof(TLabel) + sizeof(THeader) + + KeysSize + + sizeof(TRecordsEntry) * Keys.size() + + childSize * Children.size(); + + TSharedData buf = TSharedData::Uninitialized(pageSize); + Ptr = buf.mutable_begin(); + End = buf.end(); + + WriteUnaligned<TLabel>(Advance(Ptr, sizeof(TLabel)), TLabel::Encode(EPage::BTreeIndex, 0, pageSize)); + + auto &header = Place<THeader>(); + header.KeysCount = Keys.size(); + Y_ABORT_UNLESS(KeysSize < Max<TPgSize>(), "KeysSize is out of bounds"); + header.KeysSize = KeysSize; + + char* keyOffsetPtr = Ptr + KeysSize; + for (const auto &key : Keys) { + auto &meta = Place<TRecordsEntry>(keyOffsetPtr); + size_t offset = Ptr - buf.mutable_begin(); + Y_ABORT_UNLESS(offset < Max<TPgSize>(), "Key offset is out of bounds"); + meta.Offset = offset; + + TSerializedCellVec cells(key); + PlaceKey(cells.GetCells()); + } + Y_ABORT_UNLESS(Ptr == buf.mutable_begin() + sizeof(TLabel) + sizeof(THeader) + KeysSize); + Ptr = keyOffsetPtr; + Keys.clear(); + KeysSize = 0; + + PlaceVector(Children); + + Y_ABORT_UNLESS(Ptr == End); + NSan::CheckMemIsInitialized(buf.data(), buf.size()); + Ptr = 0; + End = 0; + + return buf; + }; + + private: + TPgSize CalcSize(TCellsRef key) const noexcept + { + Y_ABORT_UNLESS(key.size() <= GroupInfo.KeyTypes.size()); + + TPgSize size = TKey::NullBitmapLength(GroupInfo.ColsKeyIdx.size()); + + for (TPos pos : xrange(key.size())) { + if (const auto &val = key[pos]) { + size += GroupInfo.ColsKeyIdx[pos].FixedSize; // fixed data or data ref + if (!GroupInfo.ColsKeyIdx[pos].IsFixed) { + size += key[pos].Size(); + } + } + } + + return size; + } + + void PlaceKey(TCellsRef cells) + { + const TPos count = GroupInfo.ColsKeyIdx.size(); + Y_ABORT_UNLESS(cells.size() <= count); + + auto *key = TDeref<TKey>::At(Ptr); + + // mark all cells as non-null + Zero(key->NullBitmapLength(count)); + + auto* keyCellsPtr = Ptr; + + for (TPos pos : xrange(cells.size())) { + if (const auto &val = cells[pos]) { + const auto &info = GroupInfo.ColsKeyIdx[pos]; + Zero(info.FixedSize); + } else { + key->SetNull(pos); + } + } + for (TPos pos : xrange(cells.size(), GroupInfo.ColsKeyIdx.size())) { + key->SetNull(pos); + } + + for (TPos pos : xrange(cells.size())) { + if (const auto &cell = cells[pos]) { + Y_DEBUG_ABORT_UNLESS(!key->IsNull(pos)); + const auto &info = GroupInfo.ColsKeyIdx[pos]; + PlaceCell(info, cell, keyCellsPtr); + keyCellsPtr += info.FixedSize; + } else { + Y_DEBUG_ABORT_UNLESS(key->IsNull(pos)); + } + } + } + + void PlaceCell(const TPartScheme::TColumn& info, TCell value, char* keyCellsPtr) + { + if (info.IsFixed) { + Y_ABORT_UNLESS(value.Size() == info.FixedSize, "invalid fixed cell size)"); + memcpy(keyCellsPtr, value.Data(), value.Size()); + } else { + auto *ref = TDeref<TDataRef>::At(keyCellsPtr); + ref->Offset = Ptr - keyCellsPtr; + ref->Size = value.Size(); + std::copy(value.Data(), value.Data() + value.Size(), Advance(value.Size())); + } + } + + template<typename T> + void PlaceVector(TVector<T> &vector) noexcept + { + auto *dst = reinterpret_cast<T*>(Advance(Ptr, sizeof(T)*vector.size())); + std::copy(vector.begin(), vector.end(), dst); + vector.clear(); + } + + template<typename T> + T& Place() + { + return *reinterpret_cast<T*>(Advance(TPgSizeOf<T>::Value)); + } + + template<typename T> + T& Place(char*& ptr) + { + return *reinterpret_cast<T*>(Advance(ptr, TPgSizeOf<T>::Value)); + } + + void Zero(size_t size) noexcept + { + auto *from = Advance(Ptr, size); + std::fill(from, Ptr, 0); + } + + char* Advance(char*& ptr, size_t size) noexcept + { + auto newPtr = ptr + size; + Y_ABORT_UNLESS(newPtr <= End); + return std::exchange(ptr, newPtr); + } + + char* Advance(size_t size) noexcept + { + auto newPtr = Ptr + size; + Y_ABORT_UNLESS(newPtr <= End); + return std::exchange(Ptr, newPtr); + } + + public: + const TIntrusiveConstPtr<TPartScheme> Scheme; + + private: + const TGroupId GroupId; + const TPartScheme::TGroupInfo& GroupInfo; + + TVector<TString> Keys; + size_t KeysSize = 0; + + TVector<TChild> Children; + + char* Ptr = 0; + const char* End = 0; + }; + +} diff --git a/ydb/core/tablet_flat/flat_page_iface.h b/ydb/core/tablet_flat/flat_page_iface.h index d1b2386040..5adf7b7e3d 100644 --- a/ydb/core/tablet_flat/flat_page_iface.h +++ b/ydb/core/tablet_flat/flat_page_iface.h @@ -66,6 +66,7 @@ namespace NPage { GarbageStats = 10, /* Stats on garbage in historic data */ TxIdStats = 11, /* Stats for uncommitted TxIds at compaction time */ TxStatus = 12, /* Status of committed/removed transactions */ + BTreeIndex = 13, }; enum class ECodec : ui8 { diff --git a/ydb/core/tablet_flat/flat_part_scheme.h b/ydb/core/tablet_flat/flat_part_scheme.h index 71e573c811..5e7a93802c 100644 --- a/ydb/core/tablet_flat/flat_part_scheme.h +++ b/ydb/core/tablet_flat/flat_part_scheme.h @@ -7,7 +7,7 @@ #include "flat_row_nulls.h" #include "flat_part_pinout.h" -#include <library/cpp/actors/util/shared_data.h> +#include <ydb/core/base/defs.h> #include <util/generic/ptr.h> #include <util/generic/hash.h> diff --git a/ydb/core/tablet_flat/ut/CMakeLists.darwin-x86_64.txt b/ydb/core/tablet_flat/ut/CMakeLists.darwin-x86_64.txt index 98947f9779..5f7ccdb8db 100644 --- a/ydb/core/tablet_flat/ut/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/tablet_flat/ut/CMakeLists.darwin-x86_64.txt @@ -52,6 +52,7 @@ target_sources(ydb-core-tablet_flat-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_table_part_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/flat_test_db.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/shared_handle_ut.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_btree_index.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_self.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_iterator.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_memtable.cpp diff --git a/ydb/core/tablet_flat/ut/CMakeLists.linux-aarch64.txt b/ydb/core/tablet_flat/ut/CMakeLists.linux-aarch64.txt index 42bf30b3ba..38753b3d83 100644 --- a/ydb/core/tablet_flat/ut/CMakeLists.linux-aarch64.txt +++ b/ydb/core/tablet_flat/ut/CMakeLists.linux-aarch64.txt @@ -55,6 +55,7 @@ target_sources(ydb-core-tablet_flat-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_table_part_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/flat_test_db.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/shared_handle_ut.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_btree_index.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_self.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_iterator.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_memtable.cpp diff --git a/ydb/core/tablet_flat/ut/CMakeLists.linux-x86_64.txt b/ydb/core/tablet_flat/ut/CMakeLists.linux-x86_64.txt index af845e1051..922c5115c7 100644 --- a/ydb/core/tablet_flat/ut/CMakeLists.linux-x86_64.txt +++ b/ydb/core/tablet_flat/ut/CMakeLists.linux-x86_64.txt @@ -56,6 +56,7 @@ target_sources(ydb-core-tablet_flat-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_table_part_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/flat_test_db.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/shared_handle_ut.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_btree_index.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_self.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_iterator.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_memtable.cpp diff --git a/ydb/core/tablet_flat/ut/CMakeLists.windows-x86_64.txt b/ydb/core/tablet_flat/ut/CMakeLists.windows-x86_64.txt index ff33e86aa7..2233ba3a10 100644 --- a/ydb/core/tablet_flat/ut/CMakeLists.windows-x86_64.txt +++ b/ydb/core/tablet_flat/ut/CMakeLists.windows-x86_64.txt @@ -45,6 +45,7 @@ target_sources(ydb-core-tablet_flat-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/flat_table_part_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/flat_test_db.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/shared_handle_ut.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_btree_index.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_self.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_iterator.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_memtable.cpp diff --git a/ydb/core/tablet_flat/ut/ut_btree_index.cpp b/ydb/core/tablet_flat/ut/ut_btree_index.cpp new file mode 100644 index 0000000000..fabcb71758 --- /dev/null +++ b/ydb/core/tablet_flat/ut/ut_btree_index.cpp @@ -0,0 +1,305 @@ +#include "flat_page_btree_index.h" +#include "flat_page_btree_index_writer.h" +#include <ydb/core/tablet_flat/test/libs/rows/layout.h> +#include <library/cpp/testing/unittest/registar.h> + +namespace NKikimr::NTable::NPage { + +namespace { + using namespace NTest; + using TChild = TBtreeIndexNode::TChild; + + TLayoutCook MakeLayout() { + TLayoutCook lay; + + lay + .Col(0, 0, NScheme::NTypeIds::Uint32) + .Col(0, 1, NScheme::NTypeIds::String) + .Col(0, 2, NScheme::NTypeIds::Bool) + .Col(0, 3, NScheme::NTypeIds::Uint64) + .Key({0, 1, 2, 3}); + + return lay; + } + + TString MakeKey(std::optional<ui32> c0 = { }, std::optional<std::string> c1 = { }, std::optional<bool> c2 = { }, std::optional<ui64> c3 = { }) { + TVector<TCell> cells; + + if (c0) { + cells.push_back(TCell::Make(c0.value())); + } else { + cells.push_back(TCell()); + } + + if (c1) { + cells.push_back(TCell(c1.value().data(), c1.value().size())); + } else { + cells.push_back(TCell()); + } + + if (c2) { + cells.push_back(TCell::Make(c2.value())); + } else { + cells.push_back(TCell()); + } + + if (c3) { + cells.push_back(TCell::Make(c3.value())); + } else { + cells.push_back(TCell()); + } + + return TSerializedCellVec::Serialize(cells); + } + + void Dump(const NPage::TBtreeIndexNode& node, const TPartScheme& scheme) noexcept + { + auto label = node.Label(); + + Cerr + << " + BTreeIndex{" << (ui16)label.Type << " rev " + << label.Format << ", " << label.Size << "b}" + << Endl; + + Cerr << node.GetChild(0).ToString() << Endl; + + for (TRecIdx i : xrange(node.GetKeysCount())) { + Cerr << "> "; + + auto cells = node.GetKeyCells(i, scheme.Groups[0].ColsKeyIdx); + for (TPos pos : xrange(cells.Count())) { + TString str; + DbgPrintValue(str, cells.Next(), scheme.Groups[0].KeyTypes[pos]); + if (str.size() > 10) { + str = str.substr(0, 10) + ".."; + } + Cerr << (pos ? ", " : "") << str; + } + + Cerr << Endl; + Cerr << node.GetChild(i + 1).ToString() << Endl; + } + + Cerr << Endl; + } + + void CheckKeys(const NPage::TBtreeIndexNode& node, const TVector<TString>& keys, const TPartScheme& scheme) { + UNIT_ASSERT_VALUES_EQUAL(node.GetKeysCount(), keys.size()); + for (TRecIdx i : xrange(node.GetKeysCount())) { + TVector<TCell> actualCells; + auto cells = node.GetKeyCells(i, scheme.Groups[0].ColsKeyIdx); + UNIT_ASSERT_VALUES_EQUAL(cells.Count(), scheme.Groups[0].ColsKeyIdx.size()); + + for (TPos pos : xrange(cells.Count())) { + Y_UNUSED(pos); + actualCells.push_back(cells.Next()); + } + + auto actual = TSerializedCellVec::Serialize(actualCells); + UNIT_ASSERT_VALUES_EQUAL(actual, keys[i]); + } + } + + void CheckChildren(const NPage::TBtreeIndexNode& node, const TVector<TChild>& children) { + UNIT_ASSERT_VALUES_EQUAL(node.GetKeysCount() + 1, children.size()); + for (TRecIdx i : xrange(node.GetKeysCount() + 1)) { + UNIT_ASSERT_EQUAL(node.GetChild(i), children[i]); + } + } +} + +Y_UNIT_TEST_SUITE(TBtreeIndexNode) { + using namespace NTest; + using TChild = TBtreeIndexNode::TChild; + + Y_UNIT_TEST(TKeyBitmap) { + TString buffer(754, 0); + auto* key = (TBtreeIndexNode::TKey*)(buffer.data()); + + UNIT_ASSERT_VALUES_EQUAL(key->NullBitmapLength(0), 0); + UNIT_ASSERT_VALUES_EQUAL(key->NullBitmapLength(1), 1); + UNIT_ASSERT_VALUES_EQUAL(key->NullBitmapLength(4), 1); + UNIT_ASSERT_VALUES_EQUAL(key->NullBitmapLength(7), 1); + UNIT_ASSERT_VALUES_EQUAL(key->NullBitmapLength(8), 1); + UNIT_ASSERT_VALUES_EQUAL(key->NullBitmapLength(9), 2); + UNIT_ASSERT_VALUES_EQUAL(key->NullBitmapLength(256), 32); + UNIT_ASSERT_VALUES_EQUAL(key->NullBitmapLength(257), 33); + + for (TPos pos : xrange(buffer.size() * 8)) { + UNIT_ASSERT(!key->IsNull(pos)); + key->SetNull(pos); + UNIT_ASSERT(key->IsNull(pos)); + } + } + + Y_UNIT_TEST(Basics) { + TLayoutCook lay = MakeLayout(); + + TBtreeIndexNodeWriter writer(new TPartScheme(lay.RowScheme()->Cols), { }); + + TVector<TString> keys; + keys.push_back(MakeKey({ }, { }, true)); + keys.push_back(MakeKey(100, "asdf", true, 10000)); + keys.push_back(MakeKey(101)); + keys.push_back(MakeKey(101, "asdf")); + keys.push_back(MakeKey(102, "asdf")); + keys.push_back(MakeKey(102, "asdfg")); + keys.push_back(MakeKey(102, "asdfg", 1)); + keys.push_back(MakeKey(103, { }, false)); + keys.push_back(MakeKey(103, { }, true)); + keys.push_back(MakeKey(103, "x")); + keys.push_back(MakeKey(104, "asdf", true, 10000)); + keys.push_back(MakeKey(104, "asdf", true, 10001)); + keys.push_back(MakeKey(104, TString(1024*1024, 'x'), true, 10000)); + keys.push_back(MakeKey(105)); + + TVector<TChild> children; + for (ui32 i : xrange(keys.size() + 1)) { + children.push_back(TChild{i * 10, i * 100, i * 30, i * 1000}); + } + + for (auto k : keys) { + writer.AddKey(k); + } + for (auto c : children) { + writer.AddChild(c); + } + + auto serialized = writer.Finish(); + + auto node = TBtreeIndexNode(serialized); + + Dump(node, *writer.Scheme); + CheckKeys(node, keys, *writer.Scheme); + CheckChildren(node, children); + } + + Y_UNIT_TEST(OneKey) { + TLayoutCook lay = MakeLayout(); + + TBtreeIndexNodeWriter writer(new TPartScheme(lay.RowScheme()->Cols), { }); + + auto check= [&] (TString key) { + TVector<TString> keys; + keys.push_back(key); + + TVector<TChild> children; + for (ui32 i : xrange(keys.size() + 1)) { + children.push_back(TChild{i * 10, i * 100, i * 30, i * 1000}); + } + + for (auto k : keys) { + writer.AddKey(k); + } + for (auto c : children) { + writer.AddChild(c); + } + + auto serialized = writer.Finish(); + + auto node = TBtreeIndexNode(serialized); + + Dump(node, *writer.Scheme); + CheckKeys(node, keys, *writer.Scheme); + CheckChildren(node, children); + }; + + check(MakeKey(100, "asdf", true, 10000)); + check(MakeKey(100, TString(1024*1024, 'x'), true, 10000)); + check(MakeKey(100, "asdf", true, { })); + check(MakeKey(100, "asdf", { }, 10000)); + check(MakeKey(100, { }, true, 10000)); + check(MakeKey({ }, "asdf", true, 10000)); + check(MakeKey({ }, "asdf", { }, 10000)); + check(MakeKey({ }, "asdf", { }, { })); + check(MakeKey({ }, { }, { }, { })); + } + + Y_UNIT_TEST(Reusable) { + TLayoutCook lay = MakeLayout(); + + TBtreeIndexNodeWriter writer(new TPartScheme(lay.RowScheme()->Cols), { }); + + TVector<TString> keys; + keys.push_back(MakeKey(100, "asdf", true, 10000)); + keys.push_back(MakeKey(101, "xyz", true, 10000)); + keys.push_back(MakeKey(103, { }, true, 10000)); + + TVector<TChild> children; + for (ui32 i : xrange(keys.size() + 1)) { + children.push_back(TChild{i * 10, i * 100, i * 30, i * 1000}); + } + + for (auto k : keys) { + writer.AddKey(k); + } + for (auto c : children) { + writer.AddChild(c); + } + writer.Finish(); + + keys.erase(keys.begin()); + children.erase(children.begin()); + for (auto k : keys) { + writer.AddKey(k); + } + for (auto c : children) { + writer.AddChild(c); + } + + auto serialized = writer.Finish(); + + auto node = TBtreeIndexNode(serialized); + + Dump(node, *writer.Scheme); + CheckKeys(node, keys, *writer.Scheme); + CheckChildren(node, children); + } + + Y_UNIT_TEST(CutKeys) { + TLayoutCook lay = MakeLayout(); + + TBtreeIndexNodeWriter writer(new TPartScheme(lay.RowScheme()->Cols), { }); + + TVector<TString> fullKeys; + fullKeys.push_back(MakeKey({ }, { }, { }, { })); + fullKeys.push_back(MakeKey(100, { }, { }, { })); + fullKeys.push_back(MakeKey(100, "asdf", { }, { })); + fullKeys.push_back(MakeKey(100, "asdf", true, { })); + fullKeys.push_back(MakeKey(100, "asdf", true, 10000)); + + // cut keys don't have trailing nulls + TVector<TString> cutKeys; + for (ui32 i : xrange(5)) { + TVector<TCell> cells; + auto key = TSerializedCellVec(fullKeys[i]); + for (ui32 j : xrange(i)) { + cells.push_back(key.GetCells()[j]); + } + cutKeys.push_back(TSerializedCellVec::Serialize(cells)); + } + + TVector<TChild> children; + for (ui32 i : xrange(fullKeys.size() + 1)) { + children.push_back(TChild{i * 10, i * 100, i * 30, i * 1000}); + } + + for (auto k : cutKeys) { + writer.AddKey(k); + } + for (auto c : children) { + writer.AddChild(c); + } + + auto serialized = writer.Finish(); + + auto node = TBtreeIndexNode(serialized); + + Dump(node, *writer.Scheme); + CheckKeys(node, fullKeys, *writer.Scheme); + CheckChildren(node, children); + } + +} + +} diff --git a/ydb/core/tablet_flat/ut/ya.make b/ydb/core/tablet_flat/ut/ya.make index 29789db278..1dea88ef76 100644 --- a/ydb/core/tablet_flat/ut/ya.make +++ b/ydb/core/tablet_flat/ut/ya.make @@ -28,6 +28,7 @@ SRCS( flat_test_db.cpp flat_test_db_helpers.h shared_handle_ut.cpp + ut_btree_index.cpp ut_self.cpp ut_iterator.cpp ut_memtable.cpp |