aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/on_disk/chunks/chunked_helpers.h
diff options
context:
space:
mode:
authordenplusplus <denplusplus@yandex-team.ru>2022-02-10 16:47:34 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:34 +0300
commitaddb3626ed629a8c7d9c8c30e87365b478a8c266 (patch)
treec0748b5dcbade83af788c0abfa89c0383d6b779c /library/cpp/on_disk/chunks/chunked_helpers.h
parent57c20d143e8a438cd76b9fdc3ca2e8ee3ac1f32a (diff)
downloadydb-addb3626ed629a8c7d9c8c30e87365b478a8c266.tar.gz
Restoring authorship annotation for <denplusplus@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/on_disk/chunks/chunked_helpers.h')
-rw-r--r--library/cpp/on_disk/chunks/chunked_helpers.h604
1 files changed, 302 insertions, 302 deletions
diff --git a/library/cpp/on_disk/chunks/chunked_helpers.h b/library/cpp/on_disk/chunks/chunked_helpers.h
index fa7febf080..5fa96afdca 100644
--- a/library/cpp/on_disk/chunks/chunked_helpers.h
+++ b/library/cpp/on_disk/chunks/chunked_helpers.h
@@ -1,114 +1,114 @@
#pragma once
-
+
#include <util/generic/vector.h>
-#include <util/generic/buffer.h>
+#include <util/generic/buffer.h>
#include <util/generic/hash_set.h>
-#include <util/generic/cast.h>
+#include <util/generic/cast.h>
#include <util/generic/ymath.h>
#include <util/memory/blob.h>
#include <util/stream/buffer.h>
-#include <util/stream/mem.h>
+#include <util/stream/mem.h>
#include <util/system/unaligned_mem.h>
#include <util/ysaveload.h>
-
-#include "reader.h"
-#include "writer.h"
-
+
+#include "reader.h"
+#include "writer.h"
+
#include <cmath>
#include <cstddef>
template <typename T>
-class TYVector {
-private:
- ui32 Size;
- const T* Data;
-
-public:
- TYVector(const TBlob& blob)
+class TYVector {
+private:
+ ui32 Size;
+ const T* Data;
+
+public:
+ TYVector(const TBlob& blob)
: Size(IntegerCast<ui32>(ReadUnaligned<ui64>(blob.Data())))
, Data((const T*)((const char*)blob.Data() + sizeof(ui64)))
- {
- }
-
+ {
+ }
+
void Get(size_t idx, T& t) const {
assert(idx < (size_t)Size);
t = ReadUnaligned<T>(Data + idx);
}
- const T& At(size_t idx) const {
- assert(idx < (size_t)Size);
- return Data[idx];
- }
-
- size_t GetSize() const {
- return Size;
- }
-
+ const T& At(size_t idx) const {
+ assert(idx < (size_t)Size);
+ return Data[idx];
+ }
+
+ size_t GetSize() const {
+ return Size;
+ }
+
size_t RealSize() const {
return sizeof(ui64) + Size * sizeof(T);
}
~TYVector() = default;
-};
-
+};
+
template <typename T>
-class TYVectorWriter {
-private:
+class TYVectorWriter {
+private:
TVector<T> Vector;
-
-public:
+
+public:
TYVectorWriter() = default;
-
- void PushBack(const T& value) {
- Vector.push_back(value);
- }
-
+
+ void PushBack(const T& value) {
+ Vector.push_back(value);
+ }
+
void Save(IOutputStream& out) const {
- ui64 uSize = (ui64)Vector.size();
- out.Write(&uSize, sizeof(uSize));
+ ui64 uSize = (ui64)Vector.size();
+ out.Write(&uSize, sizeof(uSize));
out.Write(Vector.data(), Vector.size() * sizeof(T));
- }
-
- const T& At(size_t idx) const {
- assert(idx < Size());
- return Vector[idx];
- }
-
- T& At(size_t idx) {
- assert(idx < Size());
- return Vector[idx];
- }
-
- void Clear() {
- Vector.clear();
- }
-
- size_t Size() const {
- return Vector.size();
- }
-
- void Resize(size_t size) {
- Vector.resize(size);
- }
-
- void Resize(size_t size, const T& value) {
- Vector.resize(size, value);
- }
-};
-
+ }
+
+ const T& At(size_t idx) const {
+ assert(idx < Size());
+ return Vector[idx];
+ }
+
+ T& At(size_t idx) {
+ assert(idx < Size());
+ return Vector[idx];
+ }
+
+ void Clear() {
+ Vector.clear();
+ }
+
+ size_t Size() const {
+ return Vector.size();
+ }
+
+ void Resize(size_t size) {
+ Vector.resize(size);
+ }
+
+ void Resize(size_t size, const T& value) {
+ Vector.resize(size, value);
+ }
+};
+
template <typename T, bool>
-struct TYVectorG;
-
+struct TYVectorG;
+
template <typename X>
-struct TYVectorG<X, false> {
- typedef TYVector<X> T;
-};
-
+struct TYVectorG<X, false> {
+ typedef TYVector<X> T;
+};
+
template <typename X>
-struct TYVectorG<X, true> {
- typedef TYVectorWriter<X> T;
-};
-
+struct TYVectorG<X, true> {
+ typedef TYVectorWriter<X> T;
+};
+
template <typename T>
struct TIsMemsetThisWithZeroesSupported {
enum {
@@ -124,16 +124,16 @@ struct TIsMemsetThisWithZeroesSupported {
}; \
};
-class TPlainHashCommon {
-protected:
-#pragma pack(push, 8)
+class TPlainHashCommon {
+protected:
+#pragma pack(push, 8)
template <typename TKey, typename TValue>
- class TPackedPair {
- private:
- typedef TPackedPair<TKey, TValue> TThis;
- TKey Key;
- TValue Value;
-
+ class TPackedPair {
+ private:
+ typedef TPackedPair<TKey, TValue> TThis;
+ TKey Key;
+ TValue Value;
+
private:
static_assert(TIsMemsetThisWithZeroesSupported<TKey>::Result, "expect TIsMemsetThisWithZeroesSupported<TKey>::Result");
static_assert(TIsMemsetThisWithZeroesSupported<TValue>::Result, "expect TIsMemsetThisWithZeroesSupported<TValue>::Result");
@@ -145,33 +145,33 @@ protected:
Value = value;
}
- public:
+ public:
TPackedPair(typename TTypeTraits<TKey>::TFuncParam key, typename TTypeTraits<TValue>::TFuncParam value) {
Init(key, value);
- }
-
+ }
+
TPackedPair(const TThis& rhs) {
Init(rhs.Key, rhs.Value);
- }
-
- TPackedPair& operator=(const TThis& rhs) {
- if (this != &rhs) {
+ }
+
+ TPackedPair& operator=(const TThis& rhs) {
+ if (this != &rhs) {
Init(rhs.Key, rhs.Value);
- }
- return *this;
- }
-
+ }
+ return *this;
+ }
+
TPackedPair() {
Init(TKey(), TValue());
- }
+ }
- typename TTypeTraits<TKey>::TFuncParam First() const {
- return Key;
- }
-
- typename TTypeTraits<TValue>::TFuncParam Second() const {
- return Value;
- }
+ typename TTypeTraits<TKey>::TFuncParam First() const {
+ return Key;
+ }
+
+ typename TTypeTraits<TValue>::TFuncParam Second() const {
+ return Value;
+ }
static TKey GetFirst(const void* self) {
static constexpr size_t offset = offsetof(TThis, Key);
@@ -182,29 +182,29 @@ protected:
static constexpr size_t offset = offsetof(TThis, Value);
return ReadUnaligned<TValue>(reinterpret_cast<const char*>(self) + offset);
}
- };
-#pragma pack(pop)
-
-protected:
+ };
+#pragma pack(pop)
+
+protected:
static const ui16 VERSION_ID = 2;
-#pragma pack(push, 8)
- struct TInterval {
- static const ui32 INVALID = (ui32)-1;
- ui32 Offset;
- ui32 Length;
-
- TInterval()
- : Offset(INVALID)
- , Length(INVALID)
- {
- }
-
- TInterval(ui32 offset, ui32 length)
- : Offset(offset)
- , Length(length)
- {
- }
+#pragma pack(push, 8)
+ struct TInterval {
+ static const ui32 INVALID = (ui32)-1;
+ ui32 Offset;
+ ui32 Length;
+
+ TInterval()
+ : Offset(INVALID)
+ , Length(INVALID)
+ {
+ }
+
+ TInterval(ui32 offset, ui32 length)
+ : Offset(offset)
+ , Length(length)
+ {
+ }
static inline ui32 GetOffset(const TInterval* self) {
static constexpr size_t offset = offsetof(TInterval, Offset);
@@ -215,244 +215,244 @@ protected:
static constexpr size_t offset = offsetof(TInterval, Length);
return ReadUnaligned<ui32>(reinterpret_cast<const char*>(self) + offset);
}
- };
-#pragma pack(pop)
+ };
+#pragma pack(pop)
static_assert(8 == sizeof(TInterval), "expect 8 == sizeof(TInterval)");
template <typename TKey>
- static ui32 KeyHash(typename TTypeTraits<TKey>::TFuncParam key, ui16 bits) {
+ static ui32 KeyHash(typename TTypeTraits<TKey>::TFuncParam key, ui16 bits) {
Y_ASSERT(bits < 32);
- const ui32 res = ui32(key) & ((ui32(1) << bits) - 1);
+ const ui32 res = ui32(key) & ((ui32(1) << bits) - 1);
Y_ASSERT(res < (ui32(1) << bits));
- return res;
- }
-};
-
+ return res;
+ }
+};
+
template <typename TKey, typename TValue>
-class TPlainHashWriter : TPlainHashCommon {
-private:
- typedef TPackedPair<TKey, TValue> TKeyValuePair;
+class TPlainHashWriter : TPlainHashCommon {
+private:
+ typedef TPackedPair<TKey, TValue> TKeyValuePair;
typedef TVector<TKeyValuePair> TData;
- TData Data;
+ TData Data;
typedef TVector<TData> TData2;
-
- bool IsPlainEnought(ui16 bits) const {
+
+ bool IsPlainEnought(ui16 bits) const {
TVector<size_t> counts(1LL << bits, 0);
for (size_t i = 0; i < Data.size(); ++i) {
size_t& count = counts[KeyHash<TKey>(TKeyValuePair::GetFirst(&Data[i]), bits)];
- ++count;
- if (count > 2)
- return false;
- }
- return true;
- }
-
-public:
- void Add(const TKey& key, const TValue& value) {
+ ++count;
+ if (count > 2)
+ return false;
+ }
+ return true;
+ }
+
+public:
+ void Add(const TKey& key, const TValue& value) {
Data.push_back(TKeyValuePair(key, value));
- }
-
+ }
+
void Save(IOutputStream& out) const {
Y_ASSERT(Data.size() < Max<ui32>());
WriteBin<ui16>(&out, VERSION_ID);
- static const ui32 PAIR_SIZE = sizeof(TKeyValuePair);
- WriteBin<ui32>(&out, PAIR_SIZE);
+ static const ui32 PAIR_SIZE = sizeof(TKeyValuePair);
+ WriteBin<ui32>(&out, PAIR_SIZE);
- ui16 bits;
- if (!Data.empty()) {
+ ui16 bits;
+ if (!Data.empty()) {
bits = (ui16)(log((float)Data.size()) / log(2.f));
while ((bits < 22) && !IsPlainEnought(bits))
- ++bits;
- } else {
- bits = 0;
- }
- WriteBin<ui16>(&out, bits);
+ ++bits;
+ } else {
+ bits = 0;
+ }
+ WriteBin<ui16>(&out, bits);
WriteBin<ui32>(&out, (ui32)Data.size());
- const ui32 nBuckets = ui32(1) << bits;
- TData2 data2(nBuckets);
+ const ui32 nBuckets = ui32(1) << bits;
+ TData2 data2(nBuckets);
for (size_t i = 0; i < Data.size(); ++i)
data2[KeyHash<TKey>(TKeyValuePair::GetFirst(&Data[i]), bits)].push_back(Data[i]);
typedef TVector<TInterval> TIntervals;
- TIntervals intervals(nBuckets);
- ui32 offset = 0;
- for (ui32 i = 0; i < nBuckets; ++i) {
- intervals[i].Offset = offset;
+ TIntervals intervals(nBuckets);
+ ui32 offset = 0;
+ for (ui32 i = 0; i < nBuckets; ++i) {
+ intervals[i].Offset = offset;
intervals[i].Length = (ui32)data2[i].size();
offset += (ui32)data2[i].size();
- }
-#ifndef NDEBUG
- for (ui32 i = 0; i < nBuckets; ++i) {
- for (size_t j = 0; j < data2[i].size(); ++j)
- for (size_t k = j + 1; k < data2[i].size(); ++k)
+ }
+#ifndef NDEBUG
+ for (ui32 i = 0; i < nBuckets; ++i) {
+ for (size_t j = 0; j < data2[i].size(); ++j)
+ for (size_t k = j + 1; k < data2[i].size(); ++k)
if (TKeyValuePair::GetFirst(&data2[i][j]) == TKeyValuePair::GetFirst(&data2[i][k]))
ythrow yexception() << "key clash";
- }
-#endif
+ }
+#endif
out.Write(intervals.data(), intervals.size() * sizeof(intervals[0]));
- for (ui32 i = 0; i < nBuckets; ++i)
+ for (ui32 i = 0; i < nBuckets; ++i)
out.Write(data2[i].data(), data2[i].size() * sizeof(data2[i][0]));
- }
-};
-
+ }
+};
+
template <typename TKey, typename TValue>
-class TPlainHash : TPlainHashCommon {
-private:
- typedef TPackedPair<TKey, TValue> TKeyValuePair;
+class TPlainHash : TPlainHashCommon {
+private:
+ typedef TPackedPair<TKey, TValue> TKeyValuePair;
- const char* P;
+ const char* P;
- ui16 GetBits() const {
+ ui16 GetBits() const {
return ReadUnaligned<ui16>(P + 6);
- }
+ }
- ui32 GetSize() const {
+ ui32 GetSize() const {
return ReadUnaligned<ui32>(P + 8);
- }
+ }
+
+ const TInterval* GetIntervals() const {
+ return (const TInterval*)(P + 12);
+ }
- const TInterval* GetIntervals() const {
- return (const TInterval*)(P + 12);
- }
+ const TKeyValuePair* GetData() const {
+ return (const TKeyValuePair*)(GetIntervals() + (1ULL << GetBits()));
+ }
- const TKeyValuePair* GetData() const {
- return (const TKeyValuePair*)(GetIntervals() + (1ULL << GetBits()));
- }
-
template <typename T>
- void Init(const T* p) {
+ void Init(const T* p) {
static_assert(sizeof(T) == 1, "expect sizeof(T) == 1");
- P = reinterpret_cast<const char*>(p);
-#ifndef NDEBUG
+ P = reinterpret_cast<const char*>(p);
+#ifndef NDEBUG
ui16 version = ReadUnaligned<ui16>(p);
if (version != VERSION_ID)
- ythrow yexception() << "bad version: " << version;
- static const ui32 PAIR_SIZE = sizeof(TKeyValuePair);
+ ythrow yexception() << "bad version: " << version;
+ static const ui32 PAIR_SIZE = sizeof(TKeyValuePair);
const ui32 size = ReadUnaligned<ui32>(p + 2);
- if (size != PAIR_SIZE)
- ythrow yexception() << "bad size " << size << " instead of " << PAIR_SIZE;
-#endif
- }
-
-public:
- typedef const TKeyValuePair* TConstIterator;
-
- TPlainHash(const char* p) {
- Init(p);
- }
-
- TPlainHash(const TBlob& blob) {
- Init(blob.Begin());
- }
-
- bool Find(typename TTypeTraits<TKey>::TFuncParam key, TValue* res) const {
- // Cerr << GetBits() << "\t" << (1 << GetBits()) << "\t" << GetSize() << Endl;
- const ui32 hash = KeyHash<TKey>(key, GetBits());
+ if (size != PAIR_SIZE)
+ ythrow yexception() << "bad size " << size << " instead of " << PAIR_SIZE;
+#endif
+ }
+
+public:
+ typedef const TKeyValuePair* TConstIterator;
+
+ TPlainHash(const char* p) {
+ Init(p);
+ }
+
+ TPlainHash(const TBlob& blob) {
+ Init(blob.Begin());
+ }
+
+ bool Find(typename TTypeTraits<TKey>::TFuncParam key, TValue* res) const {
+ // Cerr << GetBits() << "\t" << (1 << GetBits()) << "\t" << GetSize() << Endl;
+ const ui32 hash = KeyHash<TKey>(key, GetBits());
const TInterval* intervalPtr = GetIntervals();
const TKeyValuePair* pair = GetData() + TInterval::GetOffset(intervalPtr + hash);
const ui32 length = TInterval::GetLength(intervalPtr + hash);
for (ui32 i = 0; i < length; ++i, ++pair) {
if (TKeyValuePair::GetFirst(pair) == key) {
*res = TKeyValuePair::GetSecond(pair);
- return true;
- }
- }
- return false;
- }
-
- TValue Get(typename TTypeTraits<TKey>::TFuncParam key) const {
- TValue res;
- if (Find(key, &res))
- return res;
- else
- ythrow yexception() << "key not found";
- }
-
- TConstIterator Begin() const {
- return GetData();
- }
-
- TConstIterator End() const {
- return GetData() + GetSize();
- }
-
- const char* ByteEnd() const {
- return (const char*)(GetData() + GetSize());
- }
-
- size_t ByteSize() const {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ TValue Get(typename TTypeTraits<TKey>::TFuncParam key) const {
+ TValue res;
+ if (Find(key, &res))
+ return res;
+ else
+ ythrow yexception() << "key not found";
+ }
+
+ TConstIterator Begin() const {
+ return GetData();
+ }
+
+ TConstIterator End() const {
+ return GetData() + GetSize();
+ }
+
+ const char* ByteEnd() const {
+ return (const char*)(GetData() + GetSize());
+ }
+
+ size_t ByteSize() const {
return 12 + sizeof(TInterval) * (size_t(1) << GetBits()) + sizeof(TKeyValuePair) * GetSize();
- }
-};
-
+ }
+};
+
template <typename Key, typename Value, bool>
-struct TPlainHashG;
-
+struct TPlainHashG;
+
template <typename Key, typename Value>
-struct TPlainHashG<Key, Value, false> {
- typedef TPlainHash<Key, Value> T;
-};
-
+struct TPlainHashG<Key, Value, false> {
+ typedef TPlainHash<Key, Value> T;
+};
+
template <typename Key, typename Value>
-struct TPlainHashG<Key, Value, true> {
- typedef TPlainHashWriter<Key, Value> T;
-};
-
+struct TPlainHashG<Key, Value, true> {
+ typedef TPlainHashWriter<Key, Value> T;
+};
+
template <typename T>
-class TSingleValue {
-private:
- const T* Value;
-
-public:
- TSingleValue(const TBlob& blob) {
+class TSingleValue {
+private:
+ const T* Value;
+
+public:
+ TSingleValue(const TBlob& blob) {
Y_ASSERT(blob.Length() >= sizeof(T));
Y_ASSERT(blob.Length() <= sizeof(T) + 16);
- Value = reinterpret_cast<const T*>(blob.Begin());
- }
-
- const T& Get() const {
- return *Value;
- }
-};
-
+ Value = reinterpret_cast<const T*>(blob.Begin());
+ }
+
+ const T& Get() const {
+ return *Value;
+ }
+};
+
template <typename T>
-class TSingleValueWriter {
-private:
- T Value;
-
-public:
+class TSingleValueWriter {
+private:
+ T Value;
+
+public:
TSingleValueWriter() = default;
-
+
TSingleValueWriter(const T& value)
- : Value(value)
- {
- }
-
- void Set(const T& value) {
- Value = value;
- }
-
+ : Value(value)
+ {
+ }
+
+ void Set(const T& value) {
+ Value = value;
+ }
+
void Save(IOutputStream& out) const {
- out.Write(&Value, sizeof(Value));
- }
-};
-
-TBlob GetBlock(const TBlob& data, size_t index);
-
+ out.Write(&Value, sizeof(Value));
+ }
+};
+
+TBlob GetBlock(const TBlob& data, size_t index);
+
template <class T>
-void WriteBlock(TChunkedDataWriter& writer, const T& t) {
- writer.NewBlock();
- t.Save(writer);
-}
-
+void WriteBlock(TChunkedDataWriter& writer, const T& t) {
+ writer.NewBlock();
+ t.Save(writer);
+}
+
template <class T>
-void WriteBlock(TChunkedDataWriter& writer, T& t) {
- writer.NewBlock();
- t.Save(writer);
-}
-
+void WriteBlock(TChunkedDataWriter& writer, T& t) {
+ writer.NewBlock();
+ t.Save(writer);
+}
+
// Extends TChunkedDataWriter, allowing user to name blocks with arbitrary strings.
class TNamedChunkedDataWriter: public TChunkedDataWriter {
public: