aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkungasc <kungasc@yandex-team.com>2023-08-12 16:04:08 +0300
committerkungasc <kungasc@yandex-team.com>2023-08-12 16:30:53 +0300
commit8ea06a8e6fcc59144afa5076beb47fb580837e3d (patch)
tree2ce45080f21bace8409a38d84aba3959b4f5ef21
parent43534adff748eb2e50440cdcd0b0a87835ac5cde (diff)
downloadydb-8ea06a8e6fcc59144afa5076beb47fb580837e3d.tar.gz
KIKIMR-18845 Cut keys option
-rw-r--r--ydb/core/tablet_flat/flat_page_conf.h1
-rw-r--r--ydb/core/tablet_flat/flat_page_index.h2
-rw-r--r--ydb/core/tablet_flat/flat_part_dump.cpp5
-rw-r--r--ydb/core/tablet_flat/flat_part_iter_multi.h4
-rw-r--r--ydb/core/tablet_flat/flat_part_writer.h102
-rw-r--r--ydb/core/tablet_flat/test/libs/rows/rows.h5
-rw-r--r--ydb/core/tablet_flat/ut/ut_part.cpp344
7 files changed, 448 insertions, 15 deletions
diff --git a/ydb/core/tablet_flat/flat_page_conf.h b/ydb/core/tablet_flat/flat_page_conf.h
index 32255732ea..b275633fa1 100644
--- a/ydb/core/tablet_flat/flat_page_conf.h
+++ b/ydb/core/tablet_flat/flat_page_conf.h
@@ -78,6 +78,7 @@ namespace NPage {
}
bool Final = true;
+ bool CutIndexKeys = false;
ui32 MaxLargeBlob = 8 * 1024 * 1024 - 8; /* Maximum large blob size */
ui32 LargeEdge = Max<ui32>(); /* External blob edge size */
ui32 SmallEdge = Max<ui32>(); /* Outer blobs edge bytes limit */
diff --git a/ydb/core/tablet_flat/flat_page_index.h b/ydb/core/tablet_flat/flat_page_index.h
index 61c5b5c3c8..362d63a298 100644
--- a/ydb/core/tablet_flat/flat_page_index.h
+++ b/ydb/core/tablet_flat/flat_page_index.h
@@ -190,7 +190,7 @@ namespace NPage {
* Lookup a page that may contain key with specified seek mode
*
* Returns end iterator when there is definitely no such page,
- * otherwise the result is exact given such a key exists.
+ * otherwise the result is approximate and may be off by one page.
*/
TIter LookupKeyReverse(
TCells key, const TPartScheme::TGroupInfo &group,
diff --git a/ydb/core/tablet_flat/flat_part_dump.cpp b/ydb/core/tablet_flat/flat_part_dump.cpp
index 40b38e6651..7c923e2954 100644
--- a/ydb/core/tablet_flat/flat_part_dump.cpp
+++ b/ydb/core/tablet_flat/flat_part_dump.cpp
@@ -92,7 +92,7 @@ namespace {
auto label = part.Index.Label();
- const auto items = (part.Index->End() - part.Index->Begin());
+ const auto items = (part.Index->End() - part.Index->Begin() + 1);
Out
<< " + Index{" << (ui16)label.Type << " rev "
@@ -110,7 +110,7 @@ namespace {
ssize_t seen = 0;
- for (auto iter = part.Index->Begin(); iter; ++iter) {
+ for (ssize_t i = 0; i < items; i++) {
Key.clear();
if (depth < 2 && (seen += 1) > 10) {
@@ -121,6 +121,7 @@ namespace {
break;
}
+ auto iter = i == items - 1 ? part.Index.GetLastKeyRecord() : (part.Index->Begin() + i).GetRecord();
for (const auto &info: part.Scheme->Groups[0].ColsKeyIdx)
Key.push_back(iter->Cell(info));
diff --git a/ydb/core/tablet_flat/flat_part_iter_multi.h b/ydb/core/tablet_flat/flat_part_iter_multi.h
index 972b781009..dfd5bf1a1d 100644
--- a/ydb/core/tablet_flat/flat_part_iter_multi.h
+++ b/ydb/core/tablet_flat/flat_part_iter_multi.h
@@ -254,9 +254,7 @@ namespace NTable {
}
if (seek != ESeek::Exact && Index.Off() > 0) {
- // The row we seek is on the previous page
- // N.B. actually this should never be triggered,
- // since reverse search should always have exact==true
+ // The row we seek is on the next page
RowId = Index->GetRowId() - 1;
--Index;
Y_VERIFY_DEBUG(RowId < EndRowId,
diff --git a/ydb/core/tablet_flat/flat_part_writer.h b/ydb/core/tablet_flat/flat_part_writer.h
index 3c8f3aae6d..1854916693 100644
--- a/ydb/core/tablet_flat/flat_part_writer.h
+++ b/ydb/core/tablet_flat/flat_part_writer.h
@@ -40,6 +40,7 @@ namespace NTable {
TPartWriter(TIntrusiveConstPtr<TPartScheme> scheme, TTagsRef tags, IPageWriter& pager,
const NPage::TConf &conf, TEpoch epoch)
: Final(conf.Final)
+ , CutIndexKeys(conf.CutIndexKeys)
, SmallEdge(conf.SmallEdge)
, LargeEdge(conf.LargeEdge)
, MaxLargeBlob(conf.MaxLargeBlob)
@@ -557,6 +558,8 @@ namespace NTable {
Phase = 0;
Current = { };
+
+ Y_VERIFY(!PrevPageLastKey);
}
}
@@ -701,9 +704,15 @@ namespace NTable {
if (groupId.IsMain()) {
Y_VERIFY_DEBUG(NextSliceFirstRowId != Max<TRowId>());
- InitKey(dataPage->Record(0), groupId);
+ InitKey(Key, dataPage->Record(0), groupId);
+
+ if (CutIndexKeys) {
+ CutKey(groupId);
+ }
} else if (groupId.Index == 0) {
- InitKey(dataPage->Record(0), groupId);
+ // TODO: Call CutKey here too, but don't touch MVCC columns
+
+ InitKey(Key, dataPage->Record(0), groupId);
} else {
Key.clear();
}
@@ -721,23 +730,32 @@ namespace NTable {
auto page = WritePage(raw, EPage::DataPage, groupId.Index);
// N.B. non-main groups have no key
- Y_VERIFY_DEBUG(g.Index.CalcSize(Key) == g.FirstKeyIndexSize);
+ if (CutIndexKeys) {
+ Y_VERIFY_DEBUG(g.Index.CalcSize(Key) <= g.FirstKeyIndexSize);
+ } else {
+ Y_VERIFY_DEBUG(g.Index.CalcSize(Key) == g.FirstKeyIndexSize);
+ }
g.Index.Add(g.FirstKeyIndexSize, Key, dataPage.BaseRow(), page);
+ if (CutIndexKeys && groupId.IsMain()) {
+ InitKey(PrevPageLastKey, dataPage->Record(dataPage->Count - 1), groupId);
+ }
+
// N.B. hack to save the last row/key for the main group
// SliceSize is wrong, but it's a hack for tests right now
if (groupId.IsMain() && (NextSliceForce || Phase == 1 || Current.Bytes - LastSliceBytes >= SliceSize)) {
NextSliceForce = false;
TRowId lastRowId = dataPage.BaseRow() + dataPage->Count - 1;
- InitKey(dataPage->Record(dataPage->Count - 1), groupId);
+ InitKey(Key, dataPage->Record(dataPage->Count - 1), groupId);
SaveSlice(lastRowId, TSerializedCellVec(Key));
if (Phase == 1) {
Y_VERIFY_DEBUG(g.Index.CalcSize(Key) == g.LastKeyIndexSize);
g.Index.Add(g.LastKeyIndexSize, Key, lastRowId, page);
- ++Phase;
+ Y_VERIFY(std::exchange(Phase, 2) == 1);
+ PrevPageLastKey.clear(); // new index will be started
}
}
@@ -846,15 +864,79 @@ namespace NTable {
}
}
- void InitKey(const NPage::TDataPage::TRecord* record, NPage::TGroupId groupId) noexcept
+ void InitKey(TStackVec<TCell, 16>& key, const NPage::TDataPage::TRecord* record, NPage::TGroupId groupId) noexcept
{
const auto& layout = Scheme->GetLayout(groupId);
- Key.resize(layout.ColsKeyData.size());
+ key.resize(layout.ColsKeyData.size());
for (const auto &info: layout.ColsKeyData) {
- Key[info.Key] = record->Cell(info);
+ key[info.Key] = record->Cell(info);
}
}
+ void CutKey(NPage::TGroupId groupId) noexcept
+ {
+ if (!PrevPageLastKey) {
+ return;
+ }
+
+ Y_VERIFY(PrevPageLastKey.size() == Key.size());
+
+ const auto& layout = Scheme->GetLayout(groupId);
+
+ TPos it;
+ for (it = 0; it < Key.size(); it++) {
+ if (int cmp = CompareTypedCells(PrevPageLastKey[it], Key[it], layout.KeyTypes[it])) {
+ break;
+ }
+ }
+
+ Y_VERIFY(it < Key.size(), "All keys should be different");
+
+ if (!layout.Columns[it].IsFixed && IsCharPointerType(layout.KeyTypes[it].GetTypeId())) {
+ auto &prevCell = PrevPageLastKey[it];
+ auto &cell = Key[it];
+
+ Y_VERIFY(!cell.IsNull(), "Keys should be in ascendic order");
+
+ size_t index;
+ for (index = 0; index < Min(prevCell.Size(), cell.Size()); index++) {
+ if (prevCell.AsBuf()[index] != cell.AsBuf()[index]) {
+ break;
+ }
+ }
+
+ index++; // last taken symbol
+
+ if (layout.KeyTypes[it].GetTypeId() == NKikimr::NScheme::NTypeIds::Utf8) {
+ while (index < cell.Size() && ((u_char)cell.AsBuf()[index] >> 6) == 2) {
+ // skip tail character bits
+ index++;
+ }
+ }
+
+ if (index < cell.Size()) {
+ Key[it] = TCell(cell.Data(), index);
+ }
+ }
+
+ for (it++; it < Key.size(); it++) {
+ Key[it] = TCell();
+ }
+ }
+
+ constexpr bool IsCharPointerType(NKikimr::NScheme::TTypeId typeId) {
+ // Note: we don't cut Json/Yson/JsonDocument/DyNumber as will lead to invalid shard bounds
+ switch (typeId) {
+ case NKikimr::NScheme::NTypeIds::String:
+ case NKikimr::NScheme::NTypeIds::String4k:
+ case NKikimr::NScheme::NTypeIds::String2m:
+ case NKikimr::NScheme::NTypeIds::Utf8:
+ return true;
+ }
+
+ return false;
+ }
+
void SaveSlice(TRowId lastRowId, TSerializedCellVec lastKey) noexcept
{
Y_VERIFY(NextSliceFirstRowId != Max<TRowId>());
@@ -875,6 +957,7 @@ namespace NTable {
private:
const bool Final = false;
+ const bool CutIndexKeys;
const ui32 SmallEdge;
const ui32 LargeEdge;
const ui32 MaxLargeBlob;
@@ -895,7 +978,8 @@ namespace NTable {
THolder<NBloom::IWriter> ByKey;
TWriteStats WriteStats;
TStackVec<TCell, 16> Key;
- ui32 Phase = 0;
+ TStackVec<TCell, 16> PrevPageLastKey;
+ ui32 Phase = 0; // 0 - writing rows, 1 - flushing current page collection, 2 - flushed current page collection
struct TRegisteredGlob {
TRowId Row;
diff --git a/ydb/core/tablet_flat/test/libs/rows/rows.h b/ydb/core/tablet_flat/test/libs/rows/rows.h
index 8bde29ceac..8a16aa4c66 100644
--- a/ydb/core/tablet_flat/test/libs/rows/rows.h
+++ b/ydb/core/tablet_flat/test/libs/rows/rows.h
@@ -120,6 +120,11 @@ namespace NTest {
return Put(tag, TTypeFor<TString>::Type, buf.data(), buf.size());
}
+ TRow& Do(NTable::TTag tag, const TString &buf, TType type)
+ {
+ return Put(tag, type, buf.data(), buf.size());
+ }
+
TRow& Do(NTable::TTag tag, const NPageCollection::TGlobId &glob)
{
auto *data = static_cast<const void*>(&glob);
diff --git a/ydb/core/tablet_flat/ut/ut_part.cpp b/ydb/core/tablet_flat/ut/ut_part.cpp
index 248e0fd6b5..d6fbd3268e 100644
--- a/ydb/core/tablet_flat/ut/ut_part.cpp
+++ b/ydb/core/tablet_flat/ut/ut_part.cpp
@@ -607,6 +607,350 @@ Y_UNIT_TEST_SUITE(TPart) {
auto cooked2 = TCompaction(new TForwardEnv(512, 1024), conf).Do(subset);
}
+ Y_UNIT_TEST(CutKeys_Seek)
+ {
+ TLayoutCook lay;
+
+ lay
+ .Col(0, 0, NScheme::NTypeIds::Uint32)
+ .Col(0, 1, NScheme::NTypeIds::String)
+ .Key({0, 1});
+
+ TVector<std::pair<ui32, TString>> fullRows = {
+ {1, "aaa"}, // -> (1, "aaa")
+ {1, "aab"},
+ {1, "aac"},
+
+ {1, "baaaa"}, // -> (1, "b")
+ {1, "bab"},
+ {1, "caa"},
+
+ {2, "aaa"}, // -> (2, null)
+ {2, "bbb"},
+ {2, "ccc"},
+
+ {2, "ccx"}, // -> (2, "ccx")
+ {2, "cxy"},
+ {2, "cxz"}, // -> (2, "cxz")
+ };
+
+ NPage::TConf cutConf{ true, 8192 }, fullConf{ true, 8192 };
+ cutConf.CutIndexKeys = true;
+ fullConf.CutIndexKeys = false;
+ cutConf.Group(0).PageRows = fullConf.Group(0).PageRows = 3;
+
+ TPartCook cutCook(lay, cutConf), cutCookR(lay, cutConf), fullCook(lay, fullConf), fullCookR(lay, fullConf);
+ for (auto r : fullRows) {
+ cutCook.Add(*TSchemedCookRow(*lay).Col(r.first, r.second));
+ cutCookR.Add(*TSchemedCookRow(*lay).Col(r.first, r.second));
+ fullCook.Add(*TSchemedCookRow(*lay).Col(r.first, r.second));
+ fullCookR.Add(*TSchemedCookRow(*lay).Col(r.first, r.second));
+ }
+
+ TCheckIt cutWrap(cutCook.Finish(), { }), fullWrap(fullCook.Finish(), { });
+ TCheckReverseIt cutWrapR(cutCookR.Finish(), { }), fullWrapR(fullCookR.Finish(), { });
+
+ const auto cutPart = (*cutWrap).Eggs.Lone();
+ const auto fullPart = (*fullWrap).Eggs.Lone();
+
+ Cerr << "======= CUT =======" << Endl;
+ Cerr << DumpPart(*cutPart, 2) << Endl;
+ Cerr << "======= FULL =======" << Endl;
+ Cerr << DumpPart(*fullPart, 2) << Endl;
+
+ UNIT_ASSERT_GT(fullPart->IndexesRawSize, cutPart->IndexesRawSize);
+
+ const NPage::TCompare<NPage::TIndex::TRecord> cmp(cutPart->Scheme->Groups[0].ColsKeyIdx, *(*lay).Keys);
+ UNIT_ASSERT_VALUES_EQUAL(cmp.Compare(*cutPart->Index->Begin(), TRowTool(*lay).KeyCells(*TSchemedCookRow(*lay).Col(1u, "aaa"))), 0);
+ UNIT_ASSERT_VALUES_EQUAL(cmp.Compare(*(cutPart->Index->Begin() + 1), TRowTool(*lay).KeyCells(*TSchemedCookRow(*lay).Col(1u, "b"))), 0);
+ UNIT_ASSERT_VALUES_EQUAL(cmp.Compare(*(cutPart->Index->Begin() + 2), TRowTool(*lay).KeyCells(*TSchemedCookRow(*lay).Col(2u, nullptr))), 0);
+ UNIT_ASSERT_VALUES_EQUAL(cmp.Compare(*(cutPart->Index->Begin() + 3), TRowTool(*lay).KeyCells(*TSchemedCookRow(*lay).Col(2u, "ccx"))), 0);
+ UNIT_ASSERT_VALUES_EQUAL(cmp.Compare(*cutPart->Index.GetLastKeyRecord(), TRowTool(*lay).KeyCells(*TSchemedCookRow(*lay).Col(2u, "cxz"))), 0);
+
+ for (auto r : fullRows) {
+ cutWrap.Has(*TSchemedCookRow(*lay).Col(r.first, r.second));
+ fullWrap.Has(*TSchemedCookRow(*lay).Col(r.first, r.second));
+ }
+
+ for (size_t rowId = 0; rowId < fullRows.size(); rowId++)
+ for (auto seekMode : {ESeek::Exact, ESeek::Lower, ESeek::Upper})
+ for (auto transformMode : {ESeek::Exact, ESeek::Lower, ESeek::Upper}) {
+ auto str = fullRows[rowId].second;
+
+ switch (transformMode) {
+ case ESeek::Exact:
+ break;
+ case ESeek::Lower:
+ str[str.size() - 1] = '#';
+ UNIT_ASSERT_LT(str, fullRows[rowId].second);
+ break;
+ case ESeek::Upper:
+ str += '#';
+ UNIT_ASSERT_GT(str, fullRows[rowId].second);
+ break;
+ }
+
+ cutWrap.Seek(*TSchemedCookRow(*lay).Col(fullRows[rowId].first, str), seekMode);
+ fullWrap.Seek(*TSchemedCookRow(*lay).Col(fullRows[rowId].first, str), seekMode);
+ UNIT_ASSERT_VALUES_EQUAL(cutWrap->GetRowId(), fullWrap->GetRowId());
+
+ cutWrapR.Seek(*TSchemedCookRow(*lay).Col(fullRows[rowId].first, str), seekMode);
+ fullWrapR.Seek(*TSchemedCookRow(*lay).Col(fullRows[rowId].first, str), seekMode);
+ UNIT_ASSERT_VALUES_EQUAL(cutWrap->GetRowId(), fullWrap->GetRowId());
+ }
+ }
+
+ Y_UNIT_TEST(CutKeys_SeekPages)
+ {
+ TLayoutCook lay;
+
+ lay
+ .Col(0, 0, NScheme::NTypeIds::Uint32)
+ .Col(0, 1, NScheme::NTypeIds::String)
+ .Key({0, 1});
+
+ TVector<std::pair<ui32, TString>> fullRows = {
+ {1, "aaa"}, // -> (1, "aaa")
+ {1, "aba"}, // -> (1, "ab")
+ {1, "aca"}, // -> (1, "ac")
+ {1, "baa"}, // -> (1, "b")
+ {1, "bba"}, // -> (1, "bb")
+ {2, "aaa"}, // -> (2, null)
+ {2, "aba"}, // -> (2, "ab")
+ {2, "aca"}, // -> (2, "ac")
+ {2, "baa"}, // -> (2, "b")
+ {2, "bba"}, // -> (2, "bba")
+ };
+
+ NPage::TConf cutConf{ true, 8192 }, fullConf{ true, 8192 };
+ cutConf.CutIndexKeys = true;
+ fullConf.CutIndexKeys = false;
+ cutConf.Group(0).PageRows = fullConf.Group(0).PageRows = 1;
+
+ TPartCook cutCook(lay, cutConf), cutCookR(lay, cutConf), fullCook(lay, fullConf), fullCookR(lay, fullConf);
+ for (auto r : fullRows) {
+ cutCook.Add(*TSchemedCookRow(*lay).Col(r.first, r.second));
+ cutCookR.Add(*TSchemedCookRow(*lay).Col(r.first, r.second));
+ fullCook.Add(*TSchemedCookRow(*lay).Col(r.first, r.second));
+ fullCookR.Add(*TSchemedCookRow(*lay).Col(r.first, r.second));
+ }
+
+ TCheckIt cutWrap(cutCook.Finish(), { }), fullWrap(fullCook.Finish(), { });
+ TCheckReverseIt cutWrapR(cutCookR.Finish(), { }), fullWrapR(fullCookR.Finish(), { });
+
+ const auto cutPart = (*cutWrap).Eggs.Lone();
+ const auto fullPart = (*fullWrap).Eggs.Lone();
+
+ Cerr << "======= CUT =======" << Endl;
+ Cerr << DumpPart(*cutPart, 2) << Endl;
+ Cerr << "======= FULL =======" << Endl;
+ Cerr << DumpPart(*fullPart, 2) << Endl;
+
+ UNIT_ASSERT_GT(fullPart->IndexesRawSize, cutPart->IndexesRawSize);
+
+ for (auto r : fullRows) {
+ cutWrap.Has(*TSchemedCookRow(*lay).Col(r.first, r.second));
+ fullWrap.Has(*TSchemedCookRow(*lay).Col(r.first, r.second));
+ }
+
+ for (size_t rowId = 0; rowId < fullRows.size(); rowId++)
+ for (auto seekMode : {ESeek::Exact, ESeek::Lower, ESeek::Upper})
+ for (auto transformMode : {ESeek::Exact, ESeek::Lower, ESeek::Upper}) {
+ auto str = fullRows[rowId].second;
+
+ switch (transformMode) {
+ case ESeek::Exact:
+ break;
+ case ESeek::Lower:
+ str[str.size() - 1] = '#';
+ UNIT_ASSERT_LT(str, fullRows[rowId].second);
+ break;
+ case ESeek::Upper:
+ str += '#';
+ UNIT_ASSERT_GT(str, fullRows[rowId].second);
+ break;
+ }
+
+ cutWrap.Seek(*TSchemedCookRow(*lay).Col(fullRows[rowId].first, str), seekMode);
+ fullWrap.Seek(*TSchemedCookRow(*lay).Col(fullRows[rowId].first, str), seekMode);
+ UNIT_ASSERT_VALUES_EQUAL(cutWrap->GetRowId(), fullWrap->GetRowId());
+
+ cutWrapR.Seek(*TSchemedCookRow(*lay).Col(fullRows[rowId].first, str), seekMode);
+ fullWrapR.Seek(*TSchemedCookRow(*lay).Col(fullRows[rowId].first, str), seekMode);
+ UNIT_ASSERT_VALUES_EQUAL(cutWrap->GetRowId(), fullWrap->GetRowId());
+ }
+ }
+
+ Y_UNIT_TEST(CutKeys_CutString)
+ {
+ TLayoutCook lay;
+
+ lay
+ .Col(0, 0, NScheme::NTypeIds::String)
+ .Key({0});
+
+ NPage::TConf conf{ true, 8192 };
+ conf.CutIndexKeys = true;
+ conf.Group(0).PageRows = 1;
+
+ auto check = [&] (ui32 testId, TString a, TString b, TString expected) {
+ TPartCook cook(lay, conf);
+
+ cook.Add(*TSchemedCookRow(*lay).Col(a == "<NULL>" ? nullptr : a));
+ cook.Add(*TSchemedCookRow(*lay).Col(b));
+
+ TCheckIt wrap(cook.Finish(), { });
+
+ const auto part = (*wrap).Eggs.Lone();
+
+ Cerr << DumpPart(*part, 2) << Endl;
+
+ TString actual((part->Index->Begin() + 1).GetRecord()->Cell(part->Scheme->Groups[0].ColsKeyIdx[0]).AsBuf());
+ UNIT_ASSERT_VALUES_EQUAL_C(actual, expected, testId << ": '" << a << "', '" << b << "'");
+ };
+
+ check(0,
+ "cccccc",
+ "ccccccd",
+ "ccccccd");
+
+ check(1,
+ "cccccc",
+ "ccccccddd",
+ "ccccccd");
+
+ check(2,
+ "cccccc",
+ "cccccd",
+ "cccccd");
+
+ check(3,
+ "cccccc",
+ "cccccddd",
+ "cccccd");
+
+ check(4,
+ "cccccc",
+ "ccccd",
+ "ccccd");
+
+ check(5,
+ "cccccc",
+ "ccccddd",
+ "ccccd");
+
+ check(6,
+ "cccccc",
+ "cccd",
+ "cccd");
+
+ check(7,
+ "cccccc",
+ "cccddd",
+ "cccd");
+
+ check(8,
+ "cccccc",
+ "d",
+ "d");
+
+ check(9,
+ "cccccc",
+ "ddd",
+ "d");
+
+ check(10,
+ "",
+ "d",
+ "d");
+
+ check(11,
+ "",
+ "ddd",
+ "d");
+
+ check(12,
+ "<NULL>",
+ "d",
+ "d");
+
+ check(12,
+ "<NULL>",
+ "ddd",
+ "d");
+
+ check(13,
+ TString(100, '_') + "cccddd",
+ TString(100, '_') + "cddddd",
+ TString(100, '_') + "cd");
+ }
+
+ Y_UNIT_TEST(CutKeys_CutUtf8String)
+ {
+ TLayoutCook lay;
+
+ lay
+ .Col(0, 0, NScheme::NTypeIds::Utf8)
+ .Key({0});
+
+ NPage::TConf conf{ true, 8192 };
+ conf.CutIndexKeys = true;
+ conf.Group(0).PageRows = 1;
+
+ auto check = [&] (ui32 testId, TString a, TString b, TString expected) {
+ TPartCook cook(lay, conf);
+
+ TRow rowA, rowB;
+ rowA.Do(0, a, NScheme::NTypeIds::Utf8);
+ rowB.Do(0, b, NScheme::NTypeIds::Utf8);
+ cook.Add(rowA);
+ cook.Add(rowB);
+
+ TCheckIt wrap(cook.Finish(), { });
+
+ const auto part = (*wrap).Eggs.Lone();
+
+ Cerr << DumpPart(*part, 2) << Endl;
+
+ TString actual((part->Index->Begin() + 1).GetRecord()->Cell(part->Scheme->Groups[0].ColsKeyIdx[0]).AsBuf());
+ UNIT_ASSERT_VALUES_EQUAL_C(actual, expected, testId << ": '" << a << "', '" << b << "'");
+ };
+
+ check(0,
+ "cccccc",
+ "cccddd",
+ "cccd");
+
+ check(1,
+ "abc😔😔😔", // \xF0\x9F\x98\x94
+ "abc🉑🉑🉑", // \xF0\x9F\x89\x91
+ "abc🉑");
+
+ check(2,
+ "abc😔😔😔", // \xF0\x9F\x98\x94
+ "abcâš«âš«âš«", // \xE2\x9A\xAB
+ "abcâš«");
+
+ check(3,
+ "abcâš«âš«âš«", // \xE2\x9A\xAB
+ "abc😔😔😔", // \xF0\x9F\x98\x94
+ "abc😔");
+
+ check(4,
+ "abcxxx",
+ "abc😔😔😔", // \xF0\x9F\x98\x94
+ "abc😔");
+
+ check(5,
+ "abc😔😔😔", // \xF0\x9F\x98\x94
+ "abcxxx",
+ "abcx");
+
+ check(6,
+ "abc😔😔😔", // \xF0\x9F\x98\x94
+ "abc😖😖😖", // \xF0\x9F\x98\x96
+ "abc😖");
+ }
}
}