aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAleksei Borzenkov <snaury@ydb.tech>2024-12-24 16:09:58 +0300
committerGitHub <noreply@github.com>2024-12-24 13:09:58 +0000
commit7daac538aaf884e897d9cfe6704e09417ffb4837 (patch)
tree6cee557bdb6b462a7c5c990c9b2a6eebd0641292
parentc594576c1c306b179df8cf75dddd145eca69fdc8 (diff)
downloadydb-7daac538aaf884e897d9cfe6704e09417ffb4837.tar.gz
Allow TOwnedCellVec construction directly from cellvec data (#12913)
-rw-r--r--ydb/core/scheme/scheme_tablecell.cpp279
-rw-r--r--ydb/core/scheme/scheme_tablecell.h8
-rw-r--r--ydb/core/scheme/scheme_tablecell_ut.cpp37
-rw-r--r--ydb/core/tx/locks/locks.cpp6
4 files changed, 261 insertions, 69 deletions
diff --git a/ydb/core/scheme/scheme_tablecell.cpp b/ydb/core/scheme/scheme_tablecell.cpp
index 768ee84e64..4a5e0502ab 100644
--- a/ydb/core/scheme/scheme_tablecell.cpp
+++ b/ydb/core/scheme/scheme_tablecell.cpp
@@ -8,6 +8,110 @@
namespace NKikimr {
+namespace {
+
+ struct TCellHeader {
+ static constexpr ui32 NullFlag = ui32(1) << 31;
+
+ ui32 RawValue = 0;
+
+ TCellHeader() = default;
+
+ TCellHeader(ui32 rawValue) : RawValue(rawValue) {}
+
+ TCellHeader(ui32 cellSize, bool isNull)
+ : RawValue(cellSize | (isNull ? NullFlag : 0))
+ {}
+
+ ui32 CellSize() const { return RawValue & ~NullFlag; }
+
+ bool IsNull() const { return RawValue & NullFlag; };
+ };
+
+ static_assert(sizeof(TCellHeader) == sizeof(ui32));
+
+ class TSerializedCellReader {
+ public:
+ TSerializedCellReader(std::string_view data) noexcept
+ : Ptr(data.data())
+ , Size(data.size())
+ {}
+
+ TSerializedCellReader(const char* p, size_t size) noexcept
+ : Ptr(p)
+ , Size(size)
+ {}
+
+ std::string_view Snapshot() const noexcept {
+ return std::string_view(Ptr, Size);
+ }
+
+ void Reset(std::string_view data) noexcept {
+ Ptr = data.data();
+ Size = data.size();
+ }
+
+ bool Skip(size_t size) noexcept {
+ if (Y_UNLIKELY(Size < size)) {
+ return false;
+ }
+
+ Ptr += size;
+ Size -= size;
+ return true;
+ }
+
+ bool Skip(size_t size, const char** p) noexcept {
+ if (Y_UNLIKELY(Size < size)) {
+ return false;
+ }
+
+ *p = Ptr;
+ Ptr += size;
+ Size -= size;
+ return true;
+ }
+
+ template<class T>
+ bool Read(T* dst) noexcept {
+ if (Y_UNLIKELY(Size < sizeof(T))) {
+ return false;
+ }
+
+ ::memcpy(dst, Ptr, sizeof(T));
+ Ptr += sizeof(T);
+ Size -= sizeof(T);
+ return true;
+ }
+
+ bool ReadNewCell(TCell* cell) noexcept {
+ TCellHeader header;
+ if (!Read(&header)) {
+ return false;
+ }
+
+ if (Y_UNLIKELY(Size < header.CellSize())) {
+ return false;
+ }
+
+ if (header.IsNull()) {
+ new (cell) TCell();
+ } else {
+ new (cell) TCell(Ptr, header.CellSize());
+ }
+
+ Ptr += header.CellSize();
+ Size -= header.CellSize();
+ return true;
+ }
+
+ private:
+ const char* Ptr;
+ size_t Size;
+ };
+
+} // namespace
+
void TOwnedCellVec::TData::operator delete(void* mem) noexcept {
::free(mem);
}
@@ -28,7 +132,7 @@ TOwnedCellVec::TInit TOwnedCellVec::Allocate(TOwnedCellVec::TCellVec cells) {
for (auto& x : cells) {
if (!x.IsNull() && !x.IsInline()) {
const size_t xsz = x.Size();
- size += AlignUp(xsz);
+ size += AlignUp(xsz, size_t(4));
}
}
@@ -53,7 +157,87 @@ TOwnedCellVec::TInit TOwnedCellVec::Allocate(TOwnedCellVec::TCellVec cells) {
::memcpy(ptrData, x.Data(), cellSize);
}
new (ptrCell) TCell(ptrData, cellSize);
- ptrData += AlignUp(cellSize);
+ ptrData += AlignUp(cellSize, size_t(4));
+ }
+
+ ++ptrCell;
+ }
+
+ return TInit {
+ cellvec,
+ new (mem) TData(),
+ size,
+ };
+}
+
+TOwnedCellVec::TInit TOwnedCellVec::AllocateFromSerialized(std::string_view data) {
+ if (data.empty()) {
+ // Leave the data field empty
+ return TInit{
+ TCellVec(),
+ nullptr,
+ 0,
+ };
+ }
+
+ TSerializedCellReader reader(data);
+
+ ui16 cellCount;
+ if (!reader.Read(&cellCount)) {
+ throw std::invalid_argument("cannot deserialize cellvec header");
+ }
+
+ if (cellCount == 0) {
+ // Leave the data field empty
+ return TInit{
+ TCellVec(),
+ nullptr,
+ 0,
+ };
+ }
+
+ size_t size = sizeof(TData) + sizeof(TCell) * cellCount;
+
+ auto snapshot = reader.Snapshot();
+ for (ui16 i = 0; i < cellCount; ++i) {
+ TCellHeader cellHeader;
+ if (!reader.Read(&cellHeader) || !reader.Skip(cellHeader.CellSize())) {
+ throw std::invalid_argument("cannot deserialize cell data");
+ }
+ size_t cellSize = cellHeader.CellSize();
+ if (!cellHeader.IsNull() && !TCell::CanInline(cellSize)) {
+ size += AlignUp(cellSize, size_t(4));
+ }
+ }
+
+ void* mem = ::malloc(size);
+ if (Y_UNLIKELY(!mem)) {
+ throw std::bad_alloc();
+ }
+
+ TCell* ptrCell = (TCell*)((TData*)mem + 1);
+ char* ptrData = (char*)(ptrCell + cellCount);
+
+ TConstArrayRef<TCell> cellvec(ptrCell, ptrCell + cellCount);
+
+ reader.Reset(snapshot);
+ for (ui16 i = 0; i < cellCount; ++i) {
+ TCellHeader cellHeader;
+ const char* src;
+ if (!reader.Read(&cellHeader) || !reader.Skip(cellHeader.CellSize(), &src)) {
+ Y_ABORT("Unexpected failure to deserialize cell data a second time");
+ }
+ size_t cellSize = cellHeader.CellSize();
+ if (cellHeader.IsNull()) {
+ new (ptrCell) TCell();
+ } else if (TCell::CanInline(cellSize)) {
+ new (ptrCell) TCell(src, cellSize);
+ } else {
+ if (Y_LIKELY(cellSize > 0)) {
+ ::memcpy(ptrData, src, cellSize);
+ }
+ new (ptrCell) TCell(ptrData, cellSize);
+ ptrData += AlignUp(cellSize, size_t(4));
}
++ptrCell;
@@ -125,24 +309,6 @@ bool TCellVectorsEquals::operator() (TConstArrayRef<TCell> a, TConstArrayRef<TCe
namespace {
- struct TCellHeader {
- TCellHeader() = default;
-
- TCellHeader(ui32 rawValue) : RawValue(rawValue) {}
-
- TCellHeader(ui32 cellSize, bool isNull)
- : RawValue(cellSize | (static_cast<ui32>(isNull) << 31))
- {}
-
- ui32 CellSize() const { return RawValue & ~(1ULL << 31); }
-
- bool IsNull() const { return RawValue & (1ULL << 31); };
-
- ui32 RawValue = 0;
- };
-
- static_assert(sizeof(TCellHeader) == sizeof(ui32));
-
Y_FORCE_INLINE void SerializeCellVecBody(TConstArrayRef<TCell> cells, char* resultBufferData, TVector<TCell>* resultCells) {
if (resultCells)
resultCells->resize_uninitialized(cells.size());
@@ -216,30 +382,17 @@ namespace {
SerializeCellVecBody(cells, resultBufferData, resultCells);
}
- Y_FORCE_INLINE bool TryDeserializeCellVecBody(const char* buf, const char* bufEnd, ui64 cellCount, TVector<TCell>& resultCells) {
+ Y_FORCE_INLINE bool TryDeserializeCellVecBody(std::string_view buf, ui64 cellCount, TVector<TCell>& resultCells) {
resultCells.resize_uninitialized(cellCount);
- TCell* resultCellsData = resultCells.data();
+ TCell* dst = resultCells.data();
+ TSerializedCellReader reader(buf);
for (ui64 i = 0; i < cellCount; ++i) {
- if (Y_UNLIKELY(bufEnd - buf < static_cast<ptrdiff_t>(sizeof(TCellHeader)))) {
- resultCells.clear();
- return false;
- }
-
- TCellHeader cellHeader = ReadUnaligned<TCellHeader>(buf);
- buf += sizeof(cellHeader);
-
- if (Y_UNLIKELY(bufEnd - buf < static_cast<ptrdiff_t>(cellHeader.CellSize()))) {
+ if (!reader.ReadNewCell(dst)) {
resultCells.clear();
return false;
}
-
- if (cellHeader.IsNull())
- new (resultCellsData + i) TCell();
- else
- new (resultCellsData + i) TCell(buf, cellHeader.CellSize());
-
- buf += cellHeader.CellSize();
+ ++dst;
}
return true;
@@ -249,49 +402,45 @@ namespace {
resultBuffer.clear();
resultCells.clear();
- if (data.empty())
+ if (data.empty()) {
return true;
+ }
- const char* buf = data.data();
- const char* bufEnd = data.data() + data.size();
- if (Y_UNLIKELY(bufEnd - buf < static_cast<ptrdiff_t>(sizeof(ui16))))
- return false;
+ TSerializedCellReader reader(data);
- ui16 cellCount = ReadUnaligned<ui16>(buf);
- buf += sizeof(cellCount);
+ ui16 cellCount;
+ if (!reader.Read(&cellCount)) {
+ return false;
+ }
- if (TryDeserializeCellVecBody(buf, bufEnd, cellCount, resultCells)) {
- resultBuffer = data;
- return true;
+ if (!TryDeserializeCellVecBody(reader.Snapshot(), cellCount, resultCells)) {
+ return false;
}
- return false;
+ resultBuffer = data;
+ return true;
}
Y_FORCE_INLINE bool TryDeserializeCellMatrix(const TString& data, TString& resultBuffer, TVector<TCell>& resultCells, ui32& rowCount, ui16& colCount) {
resultBuffer.clear();
resultCells.clear();
- if (data.empty())
+ if (data.empty()) {
return true;
+ }
- const char* buf = data.data();
- const char* bufEnd = data.data() + data.size();
- if (Y_UNLIKELY(bufEnd - buf < static_cast<ptrdiff_t>(sizeof(ui16))))
+ TSerializedCellReader reader(data);
+ if (!reader.Read(&rowCount) || !reader.Read(&colCount)) {
return false;
-
- rowCount = ReadUnaligned<ui32>(buf);
- buf += sizeof(rowCount);
- colCount = ReadUnaligned<ui16>(buf);
- buf += sizeof(colCount);
+ }
ui64 cellCount = (ui64)rowCount * (ui64)colCount;
- if (TryDeserializeCellVecBody(buf, bufEnd, cellCount, resultCells)) {
- resultBuffer = data;
- return true;
+ if (!TryDeserializeCellVecBody(reader.Snapshot(), cellCount, resultCells)) {
+ return false;
}
- return false;
+ resultBuffer = data;
+ return true;
}
}
@@ -419,7 +568,7 @@ void TCellsStorage::Reset(TArrayRef<const TCell> cells)
for (size_t i = 0; i < cellsSize; ++i) {
const auto & cell = cells[i];
if (!cell.IsNull() && !cell.IsInline() && cell.Size() != 0) {
- cellsDataSize += AlignUp(static_cast<size_t>(cell.Size()));
+ cellsDataSize += AlignUp(static_cast<size_t>(cell.Size()), size_t(4));
}
}
@@ -435,7 +584,7 @@ void TCellsStorage::Reset(TArrayRef<const TCell> cells)
if (!cell.IsNull() && !cell.IsInline() && cell.Size() != 0) {
memcpy(cellsData, cell.Data(), cell.Size());
Cells[i] = TCell(cellsData, cell.Size());
- cellsData += AlignUp(static_cast<size_t>(cell.Size()));
+ cellsData += AlignUp(static_cast<size_t>(cell.Size()), size_t(4));
} else {
Cells[i] = cell;
}
@@ -473,7 +622,7 @@ size_t TOwnedCellVecBatch::Append(TConstArrayRef<TCell> cells) {
::memcpy(ptrData, cell.Data(), cellSize);
}
new (ptrCell) TCell(ptrData, cellSize);
- ptrData += AlignUp(cellSize);
+ ptrData += AlignUp(cellSize, size_t(4));
}
++ptrCell;
diff --git a/ydb/core/scheme/scheme_tablecell.h b/ydb/core/scheme/scheme_tablecell.h
index f0bf5f7fd8..a4cb6a4c34 100644
--- a/ydb/core/scheme/scheme_tablecell.h
+++ b/ydb/core/scheme/scheme_tablecell.h
@@ -176,7 +176,7 @@ inline size_t EstimateSize(TCellsRef cells) {
for (auto& cell : cells) {
if (!cell.IsNull() && !cell.IsInline()) {
const size_t cellSize = cell.Size();
- size += AlignUp(cellSize);
+ size += AlignUp(cellSize, size_t(4));
}
}
@@ -439,6 +439,8 @@ private:
static TInit Allocate(TCellVec cells);
+ static TInit AllocateFromSerialized(std::string_view data);
+
TCellVec& CellVec() {
return static_cast<TCellVec&>(*this);
}
@@ -457,6 +459,10 @@ public:
return TOwnedCellVec(Allocate(cells));
}
+ static TOwnedCellVec FromSerialized(std::string_view data) {
+ return TOwnedCellVec(AllocateFromSerialized(data));
+ }
+
TOwnedCellVec(const TOwnedCellVec& rhs) noexcept
: TCellVec(rhs)
, Data(rhs.Data)
diff --git a/ydb/core/scheme/scheme_tablecell_ut.cpp b/ydb/core/scheme/scheme_tablecell_ut.cpp
index b75e288bcf..fcc21a16cd 100644
--- a/ydb/core/scheme/scheme_tablecell_ut.cpp
+++ b/ydb/core/scheme/scheme_tablecell_ut.cpp
@@ -63,6 +63,43 @@ Y_UNIT_TEST_SUITE(Scheme) {
UNIT_ASSERT_VALUES_EQUAL(moved[1].AsBuf(), TStringBuf(bigStrVal, sizeof(bigStrVal)));
}
+ Y_UNIT_TEST(OwnedCellVecFromSerialized) {
+ TOwnedCellVec empty1 = TOwnedCellVec::FromSerialized(TString());
+ UNIT_ASSERT_VALUES_EQUAL(empty1.size(), 0u);
+
+ TOwnedCellVec empty2 = TOwnedCellVec::FromSerialized(TString("\x00\x00", 2));
+ UNIT_ASSERT_VALUES_EQUAL(empty2.size(), 0u);
+
+ // Test truncated buffers don't cause buffer overflows
+ UNIT_ASSERT_EXCEPTION(TOwnedCellVec::FromSerialized(TString("\x00", 1)), std::invalid_argument);
+ UNIT_ASSERT_EXCEPTION(TOwnedCellVec::FromSerialized(TString("\x01\x00", 2)), std::invalid_argument);
+ UNIT_ASSERT_EXCEPTION(TOwnedCellVec::FromSerialized(TString("\x01\x00\x00\x00\x00", 5)), std::invalid_argument);
+ UNIT_ASSERT_EXCEPTION(TOwnedCellVec::FromSerialized(TString("\x01\x00\x04\x00\x00\x00xyz", 9)), std::invalid_argument);
+
+ TOwnedCellVec emptyCell = TOwnedCellVec::FromSerialized(TString("\x01\x00\x00\x00\x00\x00", 6));
+ UNIT_ASSERT_VALUES_EQUAL(emptyCell.size(), 1u);
+ UNIT_ASSERT_VALUES_EQUAL(emptyCell[0].IsNull(), false);
+ UNIT_ASSERT_VALUES_EQUAL(emptyCell[0].AsBuf(), TStringBuf());
+
+ TOwnedCellVec nullCell = TOwnedCellVec::FromSerialized(TString("\x01\x00\x00\x00\x00\x80", 6));
+ UNIT_ASSERT_VALUES_EQUAL(nullCell.size(), 1u);
+ UNIT_ASSERT_VALUES_EQUAL(nullCell[0].IsNull(), true);
+
+ TOwnedCellVec twoCells = TOwnedCellVec::FromSerialized(TString("\x02\x00\x03\x00\x00\x00xyz\x03\x00\x00\x00uvw", 16));
+ UNIT_ASSERT_VALUES_EQUAL(twoCells.size(), 2u);
+ UNIT_ASSERT_VALUES_EQUAL(twoCells[0].AsBuf(), TStringBuf("xyz"));
+ UNIT_ASSERT_VALUES_EQUAL(twoCells[1].AsBuf(), TStringBuf("uvw"));
+
+ TOwnedCellVec twoLargeCells = TOwnedCellVec::FromSerialized(TString("\x02\x00\x23\x00\x00\x00zyxwvutsrqponmlkjihgfedcba987654321\x23\x00\x00\x00ZYXWVUTSRQPONMLKJIHGFEDCBA987654321", 80));
+ UNIT_ASSERT_VALUES_EQUAL(twoLargeCells.size(), 2u);
+ UNIT_ASSERT_VALUES_EQUAL(twoLargeCells[0].AsBuf(), TStringBuf("zyxwvutsrqponmlkjihgfedcba987654321"));
+ UNIT_ASSERT_VALUES_EQUAL(twoLargeCells[1].AsBuf(), TStringBuf("ZYXWVUTSRQPONMLKJIHGFEDCBA987654321"));
+ // Ensure cell data is aligned (36 bytes per cell instead of 35)
+ UNIT_ASSERT_VALUES_EQUAL(twoLargeCells.DataSize(), 8 + sizeof(TCell) * 2 + 36 * 2);
+ UNIT_ASSERT_VALUES_EQUAL(uintptr_t(twoLargeCells[0].Data()) & 3, 0);
+ UNIT_ASSERT_VALUES_EQUAL(uintptr_t(twoLargeCells[1].Data()) & 3, 0);
+ }
+
Y_UNIT_TEST(TSerializedCellVec) {
ui64 intVal = 42;
char smallStrVal[] = "str1";
diff --git a/ydb/core/tx/locks/locks.cpp b/ydb/core/tx/locks/locks.cpp
index 15c2857ac2..d31b1734e5 100644
--- a/ydb/core/tx/locks/locks.cpp
+++ b/ydb/core/tx/locks/locks.cpp
@@ -44,7 +44,7 @@ ILocksDb::TLockRange TPointKey::ToSerializedLockRange() const {
bool TPointKey::ParseSerializedLockRange(const ILocksDb::TLockRange& range) {
if (range.Data) {
- Key = TOwnedCellVec::Make(TSerializedCellVec(range.Data).GetCells());
+ Key = TOwnedCellVec::FromSerialized(range.Data);
}
return true;
}
@@ -80,10 +80,10 @@ bool TRangeKey::ParseSerializedLockRange(const ILocksDb::TLockRange& range) {
return false;
}
if (protoRange.HasFrom()) {
- From = TOwnedCellVec::Make(TSerializedCellVec(protoRange.GetFrom()).GetCells());
+ From = TOwnedCellVec::FromSerialized(protoRange.GetFrom());
}
if (protoRange.HasTo()) {
- To = TOwnedCellVec::Make(TSerializedCellVec(protoRange.GetTo()).GetCells());
+ To = TOwnedCellVec::FromSerialized(protoRange.GetTo());
}
InclusiveFrom = protoRange.GetFromInclusive();
InclusiveTo = protoRange.GetToInclusive();