diff options
author | denplusplus <denplusplus@yandex-team.ru> | 2022-02-10 16:47:34 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:34 +0300 |
commit | addb3626ed629a8c7d9c8c30e87365b478a8c266 (patch) | |
tree | c0748b5dcbade83af788c0abfa89c0383d6b779c /library/cpp/on_disk/chunks | |
parent | 57c20d143e8a438cd76b9fdc3ca2e8ee3ac1f32a (diff) | |
download | ydb-addb3626ed629a8c7d9c8c30e87365b478a8c266.tar.gz |
Restoring authorship annotation for <denplusplus@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/on_disk/chunks')
-rw-r--r-- | library/cpp/on_disk/chunks/chunked_helpers.cpp | 22 | ||||
-rw-r--r-- | library/cpp/on_disk/chunks/chunked_helpers.h | 604 | ||||
-rw-r--r-- | library/cpp/on_disk/chunks/chunks_ut.cpp | 28 | ||||
-rw-r--r-- | library/cpp/on_disk/chunks/reader.cpp | 70 | ||||
-rw-r--r-- | library/cpp/on_disk/chunks/reader.h | 38 | ||||
-rw-r--r-- | library/cpp/on_disk/chunks/writer.cpp | 80 | ||||
-rw-r--r-- | library/cpp/on_disk/chunks/writer.h | 44 | ||||
-rw-r--r-- | library/cpp/on_disk/chunks/ya.make | 18 |
8 files changed, 452 insertions, 452 deletions
diff --git a/library/cpp/on_disk/chunks/chunked_helpers.cpp b/library/cpp/on_disk/chunks/chunked_helpers.cpp index ad9f918751..b7adba2753 100644 --- a/library/cpp/on_disk/chunks/chunked_helpers.cpp +++ b/library/cpp/on_disk/chunks/chunked_helpers.cpp @@ -1,21 +1,21 @@ -#include <util/ysaveload.h> - -#include "chunked_helpers.h" - -TBlob GetBlock(const TBlob& blob, size_t index) { - TChunkedDataReader reader(blob); +#include <util/ysaveload.h> + +#include "chunked_helpers.h" + +TBlob GetBlock(const TBlob& blob, size_t index) { + TChunkedDataReader reader(blob); if (index >= reader.GetBlocksCount()) ythrow yexception() << "index " << index << " is >= than block count " << reader.GetBlocksCount(); - size_t begin = (const char*)reader.GetBlock(index) - (const char*)blob.Data(); - return blob.SubBlob(begin, begin + reader.GetBlockLen(index)); -} + size_t begin = (const char*)reader.GetBlock(index) - (const char*)blob.Data(); + return blob.SubBlob(begin, begin + reader.GetBlockLen(index)); +} /*************************** TNamedChunkedDataReader ***************************/ static const char* NamedChunkedDataMagic = "NamedChunkedData"; -TNamedChunkedDataReader::TNamedChunkedDataReader(const TBlob& blob) - : TChunkedDataReader(blob) +TNamedChunkedDataReader::TNamedChunkedDataReader(const TBlob& blob) + : TChunkedDataReader(blob) { if (TChunkedDataReader::GetBlocksCount() < 1) throw yexception() << "Too few blocks"; diff --git a/library/cpp/on_disk/chunks/chunked_helpers.h b/library/cpp/on_disk/chunks/chunked_helpers.h index fa7febf080..5fa96afdca 100644 --- a/library/cpp/on_disk/chunks/chunked_helpers.h +++ b/library/cpp/on_disk/chunks/chunked_helpers.h @@ -1,114 +1,114 @@ #pragma once - + #include <util/generic/vector.h> -#include <util/generic/buffer.h> +#include <util/generic/buffer.h> #include <util/generic/hash_set.h> -#include <util/generic/cast.h> +#include <util/generic/cast.h> #include <util/generic/ymath.h> #include <util/memory/blob.h> #include <util/stream/buffer.h> -#include <util/stream/mem.h> +#include <util/stream/mem.h> #include <util/system/unaligned_mem.h> #include <util/ysaveload.h> - -#include "reader.h" -#include "writer.h" - + +#include "reader.h" +#include "writer.h" + #include <cmath> #include <cstddef> template <typename T> -class TYVector { -private: - ui32 Size; - const T* Data; - -public: - TYVector(const TBlob& blob) +class TYVector { +private: + ui32 Size; + const T* Data; + +public: + TYVector(const TBlob& blob) : Size(IntegerCast<ui32>(ReadUnaligned<ui64>(blob.Data()))) , Data((const T*)((const char*)blob.Data() + sizeof(ui64))) - { - } - + { + } + void Get(size_t idx, T& t) const { assert(idx < (size_t)Size); t = ReadUnaligned<T>(Data + idx); } - const T& At(size_t idx) const { - assert(idx < (size_t)Size); - return Data[idx]; - } - - size_t GetSize() const { - return Size; - } - + const T& At(size_t idx) const { + assert(idx < (size_t)Size); + return Data[idx]; + } + + size_t GetSize() const { + return Size; + } + size_t RealSize() const { return sizeof(ui64) + Size * sizeof(T); } ~TYVector() = default; -}; - +}; + template <typename T> -class TYVectorWriter { -private: +class TYVectorWriter { +private: TVector<T> Vector; - -public: + +public: TYVectorWriter() = default; - - void PushBack(const T& value) { - Vector.push_back(value); - } - + + void PushBack(const T& value) { + Vector.push_back(value); + } + void Save(IOutputStream& out) const { - ui64 uSize = (ui64)Vector.size(); - out.Write(&uSize, sizeof(uSize)); + ui64 uSize = (ui64)Vector.size(); + out.Write(&uSize, sizeof(uSize)); out.Write(Vector.data(), Vector.size() * sizeof(T)); - } - - const T& At(size_t idx) const { - assert(idx < Size()); - return Vector[idx]; - } - - T& At(size_t idx) { - assert(idx < Size()); - return Vector[idx]; - } - - void Clear() { - Vector.clear(); - } - - size_t Size() const { - return Vector.size(); - } - - void Resize(size_t size) { - Vector.resize(size); - } - - void Resize(size_t size, const T& value) { - Vector.resize(size, value); - } -}; - + } + + const T& At(size_t idx) const { + assert(idx < Size()); + return Vector[idx]; + } + + T& At(size_t idx) { + assert(idx < Size()); + return Vector[idx]; + } + + void Clear() { + Vector.clear(); + } + + size_t Size() const { + return Vector.size(); + } + + void Resize(size_t size) { + Vector.resize(size); + } + + void Resize(size_t size, const T& value) { + Vector.resize(size, value); + } +}; + template <typename T, bool> -struct TYVectorG; - +struct TYVectorG; + template <typename X> -struct TYVectorG<X, false> { - typedef TYVector<X> T; -}; - +struct TYVectorG<X, false> { + typedef TYVector<X> T; +}; + template <typename X> -struct TYVectorG<X, true> { - typedef TYVectorWriter<X> T; -}; - +struct TYVectorG<X, true> { + typedef TYVectorWriter<X> T; +}; + template <typename T> struct TIsMemsetThisWithZeroesSupported { enum { @@ -124,16 +124,16 @@ struct TIsMemsetThisWithZeroesSupported { }; \ }; -class TPlainHashCommon { -protected: -#pragma pack(push, 8) +class TPlainHashCommon { +protected: +#pragma pack(push, 8) template <typename TKey, typename TValue> - class TPackedPair { - private: - typedef TPackedPair<TKey, TValue> TThis; - TKey Key; - TValue Value; - + class TPackedPair { + private: + typedef TPackedPair<TKey, TValue> TThis; + TKey Key; + TValue Value; + private: static_assert(TIsMemsetThisWithZeroesSupported<TKey>::Result, "expect TIsMemsetThisWithZeroesSupported<TKey>::Result"); static_assert(TIsMemsetThisWithZeroesSupported<TValue>::Result, "expect TIsMemsetThisWithZeroesSupported<TValue>::Result"); @@ -145,33 +145,33 @@ protected: Value = value; } - public: + public: TPackedPair(typename TTypeTraits<TKey>::TFuncParam key, typename TTypeTraits<TValue>::TFuncParam value) { Init(key, value); - } - + } + TPackedPair(const TThis& rhs) { Init(rhs.Key, rhs.Value); - } - - TPackedPair& operator=(const TThis& rhs) { - if (this != &rhs) { + } + + TPackedPair& operator=(const TThis& rhs) { + if (this != &rhs) { Init(rhs.Key, rhs.Value); - } - return *this; - } - + } + return *this; + } + TPackedPair() { Init(TKey(), TValue()); - } + } - typename TTypeTraits<TKey>::TFuncParam First() const { - return Key; - } - - typename TTypeTraits<TValue>::TFuncParam Second() const { - return Value; - } + typename TTypeTraits<TKey>::TFuncParam First() const { + return Key; + } + + typename TTypeTraits<TValue>::TFuncParam Second() const { + return Value; + } static TKey GetFirst(const void* self) { static constexpr size_t offset = offsetof(TThis, Key); @@ -182,29 +182,29 @@ protected: static constexpr size_t offset = offsetof(TThis, Value); return ReadUnaligned<TValue>(reinterpret_cast<const char*>(self) + offset); } - }; -#pragma pack(pop) - -protected: + }; +#pragma pack(pop) + +protected: static const ui16 VERSION_ID = 2; -#pragma pack(push, 8) - struct TInterval { - static const ui32 INVALID = (ui32)-1; - ui32 Offset; - ui32 Length; - - TInterval() - : Offset(INVALID) - , Length(INVALID) - { - } - - TInterval(ui32 offset, ui32 length) - : Offset(offset) - , Length(length) - { - } +#pragma pack(push, 8) + struct TInterval { + static const ui32 INVALID = (ui32)-1; + ui32 Offset; + ui32 Length; + + TInterval() + : Offset(INVALID) + , Length(INVALID) + { + } + + TInterval(ui32 offset, ui32 length) + : Offset(offset) + , Length(length) + { + } static inline ui32 GetOffset(const TInterval* self) { static constexpr size_t offset = offsetof(TInterval, Offset); @@ -215,244 +215,244 @@ protected: static constexpr size_t offset = offsetof(TInterval, Length); return ReadUnaligned<ui32>(reinterpret_cast<const char*>(self) + offset); } - }; -#pragma pack(pop) + }; +#pragma pack(pop) static_assert(8 == sizeof(TInterval), "expect 8 == sizeof(TInterval)"); template <typename TKey> - static ui32 KeyHash(typename TTypeTraits<TKey>::TFuncParam key, ui16 bits) { + static ui32 KeyHash(typename TTypeTraits<TKey>::TFuncParam key, ui16 bits) { Y_ASSERT(bits < 32); - const ui32 res = ui32(key) & ((ui32(1) << bits) - 1); + const ui32 res = ui32(key) & ((ui32(1) << bits) - 1); Y_ASSERT(res < (ui32(1) << bits)); - return res; - } -}; - + return res; + } +}; + template <typename TKey, typename TValue> -class TPlainHashWriter : TPlainHashCommon { -private: - typedef TPackedPair<TKey, TValue> TKeyValuePair; +class TPlainHashWriter : TPlainHashCommon { +private: + typedef TPackedPair<TKey, TValue> TKeyValuePair; typedef TVector<TKeyValuePair> TData; - TData Data; + TData Data; typedef TVector<TData> TData2; - - bool IsPlainEnought(ui16 bits) const { + + bool IsPlainEnought(ui16 bits) const { TVector<size_t> counts(1LL << bits, 0); for (size_t i = 0; i < Data.size(); ++i) { size_t& count = counts[KeyHash<TKey>(TKeyValuePair::GetFirst(&Data[i]), bits)]; - ++count; - if (count > 2) - return false; - } - return true; - } - -public: - void Add(const TKey& key, const TValue& value) { + ++count; + if (count > 2) + return false; + } + return true; + } + +public: + void Add(const TKey& key, const TValue& value) { Data.push_back(TKeyValuePair(key, value)); - } - + } + void Save(IOutputStream& out) const { Y_ASSERT(Data.size() < Max<ui32>()); WriteBin<ui16>(&out, VERSION_ID); - static const ui32 PAIR_SIZE = sizeof(TKeyValuePair); - WriteBin<ui32>(&out, PAIR_SIZE); + static const ui32 PAIR_SIZE = sizeof(TKeyValuePair); + WriteBin<ui32>(&out, PAIR_SIZE); - ui16 bits; - if (!Data.empty()) { + ui16 bits; + if (!Data.empty()) { bits = (ui16)(log((float)Data.size()) / log(2.f)); while ((bits < 22) && !IsPlainEnought(bits)) - ++bits; - } else { - bits = 0; - } - WriteBin<ui16>(&out, bits); + ++bits; + } else { + bits = 0; + } + WriteBin<ui16>(&out, bits); WriteBin<ui32>(&out, (ui32)Data.size()); - const ui32 nBuckets = ui32(1) << bits; - TData2 data2(nBuckets); + const ui32 nBuckets = ui32(1) << bits; + TData2 data2(nBuckets); for (size_t i = 0; i < Data.size(); ++i) data2[KeyHash<TKey>(TKeyValuePair::GetFirst(&Data[i]), bits)].push_back(Data[i]); typedef TVector<TInterval> TIntervals; - TIntervals intervals(nBuckets); - ui32 offset = 0; - for (ui32 i = 0; i < nBuckets; ++i) { - intervals[i].Offset = offset; + TIntervals intervals(nBuckets); + ui32 offset = 0; + for (ui32 i = 0; i < nBuckets; ++i) { + intervals[i].Offset = offset; intervals[i].Length = (ui32)data2[i].size(); offset += (ui32)data2[i].size(); - } -#ifndef NDEBUG - for (ui32 i = 0; i < nBuckets; ++i) { - for (size_t j = 0; j < data2[i].size(); ++j) - for (size_t k = j + 1; k < data2[i].size(); ++k) + } +#ifndef NDEBUG + for (ui32 i = 0; i < nBuckets; ++i) { + for (size_t j = 0; j < data2[i].size(); ++j) + for (size_t k = j + 1; k < data2[i].size(); ++k) if (TKeyValuePair::GetFirst(&data2[i][j]) == TKeyValuePair::GetFirst(&data2[i][k])) ythrow yexception() << "key clash"; - } -#endif + } +#endif out.Write(intervals.data(), intervals.size() * sizeof(intervals[0])); - for (ui32 i = 0; i < nBuckets; ++i) + for (ui32 i = 0; i < nBuckets; ++i) out.Write(data2[i].data(), data2[i].size() * sizeof(data2[i][0])); - } -}; - + } +}; + template <typename TKey, typename TValue> -class TPlainHash : TPlainHashCommon { -private: - typedef TPackedPair<TKey, TValue> TKeyValuePair; +class TPlainHash : TPlainHashCommon { +private: + typedef TPackedPair<TKey, TValue> TKeyValuePair; - const char* P; + const char* P; - ui16 GetBits() const { + ui16 GetBits() const { return ReadUnaligned<ui16>(P + 6); - } + } - ui32 GetSize() const { + ui32 GetSize() const { return ReadUnaligned<ui32>(P + 8); - } + } + + const TInterval* GetIntervals() const { + return (const TInterval*)(P + 12); + } - const TInterval* GetIntervals() const { - return (const TInterval*)(P + 12); - } + const TKeyValuePair* GetData() const { + return (const TKeyValuePair*)(GetIntervals() + (1ULL << GetBits())); + } - const TKeyValuePair* GetData() const { - return (const TKeyValuePair*)(GetIntervals() + (1ULL << GetBits())); - } - template <typename T> - void Init(const T* p) { + void Init(const T* p) { static_assert(sizeof(T) == 1, "expect sizeof(T) == 1"); - P = reinterpret_cast<const char*>(p); -#ifndef NDEBUG + P = reinterpret_cast<const char*>(p); +#ifndef NDEBUG ui16 version = ReadUnaligned<ui16>(p); if (version != VERSION_ID) - ythrow yexception() << "bad version: " << version; - static const ui32 PAIR_SIZE = sizeof(TKeyValuePair); + ythrow yexception() << "bad version: " << version; + static const ui32 PAIR_SIZE = sizeof(TKeyValuePair); const ui32 size = ReadUnaligned<ui32>(p + 2); - if (size != PAIR_SIZE) - ythrow yexception() << "bad size " << size << " instead of " << PAIR_SIZE; -#endif - } - -public: - typedef const TKeyValuePair* TConstIterator; - - TPlainHash(const char* p) { - Init(p); - } - - TPlainHash(const TBlob& blob) { - Init(blob.Begin()); - } - - bool Find(typename TTypeTraits<TKey>::TFuncParam key, TValue* res) const { - // Cerr << GetBits() << "\t" << (1 << GetBits()) << "\t" << GetSize() << Endl; - const ui32 hash = KeyHash<TKey>(key, GetBits()); + if (size != PAIR_SIZE) + ythrow yexception() << "bad size " << size << " instead of " << PAIR_SIZE; +#endif + } + +public: + typedef const TKeyValuePair* TConstIterator; + + TPlainHash(const char* p) { + Init(p); + } + + TPlainHash(const TBlob& blob) { + Init(blob.Begin()); + } + + bool Find(typename TTypeTraits<TKey>::TFuncParam key, TValue* res) const { + // Cerr << GetBits() << "\t" << (1 << GetBits()) << "\t" << GetSize() << Endl; + const ui32 hash = KeyHash<TKey>(key, GetBits()); const TInterval* intervalPtr = GetIntervals(); const TKeyValuePair* pair = GetData() + TInterval::GetOffset(intervalPtr + hash); const ui32 length = TInterval::GetLength(intervalPtr + hash); for (ui32 i = 0; i < length; ++i, ++pair) { if (TKeyValuePair::GetFirst(pair) == key) { *res = TKeyValuePair::GetSecond(pair); - return true; - } - } - return false; - } - - TValue Get(typename TTypeTraits<TKey>::TFuncParam key) const { - TValue res; - if (Find(key, &res)) - return res; - else - ythrow yexception() << "key not found"; - } - - TConstIterator Begin() const { - return GetData(); - } - - TConstIterator End() const { - return GetData() + GetSize(); - } - - const char* ByteEnd() const { - return (const char*)(GetData() + GetSize()); - } - - size_t ByteSize() const { + return true; + } + } + return false; + } + + TValue Get(typename TTypeTraits<TKey>::TFuncParam key) const { + TValue res; + if (Find(key, &res)) + return res; + else + ythrow yexception() << "key not found"; + } + + TConstIterator Begin() const { + return GetData(); + } + + TConstIterator End() const { + return GetData() + GetSize(); + } + + const char* ByteEnd() const { + return (const char*)(GetData() + GetSize()); + } + + size_t ByteSize() const { return 12 + sizeof(TInterval) * (size_t(1) << GetBits()) + sizeof(TKeyValuePair) * GetSize(); - } -}; - + } +}; + template <typename Key, typename Value, bool> -struct TPlainHashG; - +struct TPlainHashG; + template <typename Key, typename Value> -struct TPlainHashG<Key, Value, false> { - typedef TPlainHash<Key, Value> T; -}; - +struct TPlainHashG<Key, Value, false> { + typedef TPlainHash<Key, Value> T; +}; + template <typename Key, typename Value> -struct TPlainHashG<Key, Value, true> { - typedef TPlainHashWriter<Key, Value> T; -}; - +struct TPlainHashG<Key, Value, true> { + typedef TPlainHashWriter<Key, Value> T; +}; + template <typename T> -class TSingleValue { -private: - const T* Value; - -public: - TSingleValue(const TBlob& blob) { +class TSingleValue { +private: + const T* Value; + +public: + TSingleValue(const TBlob& blob) { Y_ASSERT(blob.Length() >= sizeof(T)); Y_ASSERT(blob.Length() <= sizeof(T) + 16); - Value = reinterpret_cast<const T*>(blob.Begin()); - } - - const T& Get() const { - return *Value; - } -}; - + Value = reinterpret_cast<const T*>(blob.Begin()); + } + + const T& Get() const { + return *Value; + } +}; + template <typename T> -class TSingleValueWriter { -private: - T Value; - -public: +class TSingleValueWriter { +private: + T Value; + +public: TSingleValueWriter() = default; - + TSingleValueWriter(const T& value) - : Value(value) - { - } - - void Set(const T& value) { - Value = value; - } - + : Value(value) + { + } + + void Set(const T& value) { + Value = value; + } + void Save(IOutputStream& out) const { - out.Write(&Value, sizeof(Value)); - } -}; - -TBlob GetBlock(const TBlob& data, size_t index); - + out.Write(&Value, sizeof(Value)); + } +}; + +TBlob GetBlock(const TBlob& data, size_t index); + template <class T> -void WriteBlock(TChunkedDataWriter& writer, const T& t) { - writer.NewBlock(); - t.Save(writer); -} - +void WriteBlock(TChunkedDataWriter& writer, const T& t) { + writer.NewBlock(); + t.Save(writer); +} + template <class T> -void WriteBlock(TChunkedDataWriter& writer, T& t) { - writer.NewBlock(); - t.Save(writer); -} - +void WriteBlock(TChunkedDataWriter& writer, T& t) { + writer.NewBlock(); + t.Save(writer); +} + // Extends TChunkedDataWriter, allowing user to name blocks with arbitrary strings. class TNamedChunkedDataWriter: public TChunkedDataWriter { public: diff --git a/library/cpp/on_disk/chunks/chunks_ut.cpp b/library/cpp/on_disk/chunks/chunks_ut.cpp index 8ef8f812a5..f727647f7f 100644 --- a/library/cpp/on_disk/chunks/chunks_ut.cpp +++ b/library/cpp/on_disk/chunks/chunks_ut.cpp @@ -1,11 +1,11 @@ #include <library/cpp/testing/unittest/registar.h> - + #include <util/stream/file.h> #include <util/system/filemap.h> #include <util/system/tempfile.h> - -#include "chunked_helpers.h" - + +#include "chunked_helpers.h" + /// Data for TChunkedHelpersTest::TestGeneralVector struct TPodStruct { int x; @@ -82,13 +82,13 @@ public: void TestGeneralVector() { { /// ui32 const size_t N = 3; - TBufferStream stream; - { + TBufferStream stream; + { TGeneralVectorWriter<ui32> writer; for (size_t i = 0; i < N; ++i) writer.PushBack(i); - writer.Save(stream); - } + writer.Save(stream); + } { TBlob temp = TBlob::FromStreamSingleThreaded(stream); TGeneralVector<ui32> reader(temp); @@ -138,7 +138,7 @@ public: TBlob temp = TBlob::FromStreamSingleThreaded(stream); TGeneralVector<TItem> reader(temp); UNIT_ASSERT_EQUAL(reader.GetSize(), N); - + TItem value; reader.Get(0, value); UNIT_ASSERT(value.x == 1 && value.y == 2.0); @@ -154,14 +154,14 @@ public: TVector<int> data_holder(N); int* a = &(data_holder[0]); TBufferStream stream; - { + { TGeneralVectorWriter<int*> writer; for (size_t i = 0; i < N; ++i) { a[i] = i; writer.PushBack(a + i); } writer.Save(stream); - } + } { TBlob temp = TBlob::FromStreamSingleThreaded(stream); TGeneralVector<int*> reader(temp); @@ -174,7 +174,7 @@ public: } UNIT_ASSERT_EQUAL(reader.RealSize(), sizeof(ui64) + N * sizeof(int*)); } - } + } { /// std::pair<int, int> typedef std::pair<int, int> TItem; const size_t N = 3; @@ -197,8 +197,8 @@ public: UNIT_ASSERT_EQUAL(reader.RealSize(), sizeof(ui64) + N * sizeof(TItem)); } } - } - + } + void TestStrings() { const TString FILENAME = "chunked_helpers_test.bin"; TTempFileHandle file(FILENAME.c_str()); diff --git a/library/cpp/on_disk/chunks/reader.cpp b/library/cpp/on_disk/chunks/reader.cpp index af4fef0ecf..6e28cbf367 100644 --- a/library/cpp/on_disk/chunks/reader.cpp +++ b/library/cpp/on_disk/chunks/reader.cpp @@ -1,52 +1,52 @@ #include <util/generic/cast.h> -#include <util/memory/blob.h> +#include <util/memory/blob.h> #include <util/system/unaligned_mem.h> - -#include "reader.h" - -template <typename T> + +#include "reader.h" + +template <typename T> static inline void ReadAux(const char* data, T* aux, T count, TVector<const char*>* result) { - result->resize(count); - for (size_t i = 0; i < count; ++i) { + result->resize(count); + for (size_t i = 0; i < count; ++i) { (*result)[i] = data + ReadUnaligned<T>(aux + i); - } -} - + } +} + TChunkedDataReader::TChunkedDataReader(const TBlob& blob) { - const char* cdata = blob.AsCharPtr(); - const size_t size = blob.Size(); + const char* cdata = blob.AsCharPtr(); + const size_t size = blob.Size(); Y_ENSURE(size >= sizeof(ui32), "Empty file with chunks. "); ui32 last = ReadUnaligned<ui32>((ui32*)(cdata + size) - 1); - - if (last != 0) { // old version file - ui32* aux = (ui32*)(cdata + size); - ui32 count = last; - Size = size - (count + 1) * sizeof(ui32); - - aux -= (count + 1); - ReadAux<ui32>(cdata, aux, count, &Offsets); - return; - } - + + if (last != 0) { // old version file + ui32* aux = (ui32*)(cdata + size); + ui32 count = last; + Size = size - (count + 1) * sizeof(ui32); + + aux -= (count + 1); + ReadAux<ui32>(cdata, aux, count, &Offsets); + return; + } + Y_ENSURE(size >= 3 * sizeof(ui64), "Blob size must be >= 3 * sizeof(ui64). "); - ui64* aux = (ui64*)(cdata + size); + ui64* aux = (ui64*)(cdata + size); Version = ReadUnaligned<ui64>(aux - 2); Y_ENSURE(Version > 0, "Invalid chunked array version. "); - + ui64 count = ReadUnaligned<ui64>(aux - 3); - - aux -= (count + 3); - ReadAux<ui64>(cdata, aux, count, &Offsets); - - aux -= count; - Lengths.resize(count); - for (size_t i = 0; i < count; ++i) { + + aux -= (count + 3); + ReadAux<ui64>(cdata, aux, count, &Offsets); + + aux -= count; + Lengths.resize(count); + for (size_t i = 0; i < count; ++i) { Lengths[i] = IntegerCast<size_t>(ReadUnaligned<ui64>(aux + i)); - } -} - + } +} + TBlob TChunkedDataReader::GetBlob(size_t index) const { return TBlob::NoCopy(GetBlock(index), GetBlockLen(index)); } diff --git a/library/cpp/on_disk/chunks/reader.h b/library/cpp/on_disk/chunks/reader.h index 66b0155995..c5fe783319 100644 --- a/library/cpp/on_disk/chunks/reader.h +++ b/library/cpp/on_disk/chunks/reader.h @@ -1,31 +1,31 @@ -#pragma once - +#pragma once + #include <util/generic/array_ref.h> -#include <util/generic/vector.h> -#include <util/generic/yexception.h> - -class TBlob; - -class TChunkedDataReader { +#include <util/generic/vector.h> +#include <util/generic/yexception.h> + +class TBlob; + +class TChunkedDataReader { public: TChunkedDataReader(const TBlob& blob); - + inline const void* GetBlock(size_t index) const { CheckIndex(index); return Offsets[index]; } - + inline size_t GetBlockLen(size_t index) const { CheckIndex(index); - + if (Version == 0) { if (index + 1 < Offsets.size()) { return Offsets[index + 1] - Offsets[index]; - } - + } + return Size - (Offsets.back() - Offsets.front()); - } - + } + return Lengths[index]; } @@ -41,17 +41,17 @@ public: inline size_t GetBlocksCount() const { return Offsets.size(); } - + private: inline void CheckIndex(size_t index) const { if (index >= GetBlocksCount()) { ythrow yexception() << "requested block " << index << " of " << GetBlocksCount() << " blocks"; - } + } } - + private: ui64 Version = 0; TVector<const char*> Offsets; TVector<size_t> Lengths; size_t Size = 0; -}; +}; diff --git a/library/cpp/on_disk/chunks/writer.cpp b/library/cpp/on_disk/chunks/writer.cpp index 3c7747de0b..6dc7397f09 100644 --- a/library/cpp/on_disk/chunks/writer.cpp +++ b/library/cpp/on_disk/chunks/writer.cpp @@ -1,46 +1,46 @@ -#include <util/ysaveload.h> - -#include "writer.h" - +#include <util/ysaveload.h> + +#include "writer.h" + static inline void WriteAux(IOutputStream* out, const TVector<ui64>& data) { ::SavePodArray(out, data.data(), data.size()); -} - -/*************************** TBuffersWriter ***************************/ - +} + +/*************************** TBuffersWriter ***************************/ + TChunkedDataWriter::TChunkedDataWriter(IOutputStream& slave) - : Slave(slave) - , Offset(0) -{ -} - + : Slave(slave) + , Offset(0) +{ +} + TChunkedDataWriter::~TChunkedDataWriter() { -} - -void TChunkedDataWriter::NewBlock() { - if (Offsets.size()) { - Lengths.push_back(Offset - Offsets.back()); - } - - Pad(16); - Offsets.push_back(Offset); -} - -void TChunkedDataWriter::WriteFooter() { - Lengths.push_back(Offset - Offsets.back()); - WriteAux(this, Lengths); - WriteAux(this, Offsets); - WriteBinary<ui64>(Offsets.size()); - WriteBinary<ui64>(Version); - WriteBinary<ui64>(0); -} - -size_t TChunkedDataWriter::GetCurrentBlockOffset() const { +} + +void TChunkedDataWriter::NewBlock() { + if (Offsets.size()) { + Lengths.push_back(Offset - Offsets.back()); + } + + Pad(16); + Offsets.push_back(Offset); +} + +void TChunkedDataWriter::WriteFooter() { + Lengths.push_back(Offset - Offsets.back()); + WriteAux(this, Lengths); + WriteAux(this, Offsets); + WriteBinary<ui64>(Offsets.size()); + WriteBinary<ui64>(Version); + WriteBinary<ui64>(0); +} + +size_t TChunkedDataWriter::GetCurrentBlockOffset() const { Y_ASSERT(!Offsets.empty()); Y_ASSERT(Offset >= Offsets.back()); - return Offset - Offsets.back(); -} - -size_t TChunkedDataWriter::GetBlockCount() const { - return Offsets.size(); -} + return Offset - Offsets.back(); +} + +size_t TChunkedDataWriter::GetBlockCount() const { + return Offsets.size(); +} diff --git a/library/cpp/on_disk/chunks/writer.h b/library/cpp/on_disk/chunks/writer.h index ee0d7983c7..ab14522bdd 100644 --- a/library/cpp/on_disk/chunks/writer.h +++ b/library/cpp/on_disk/chunks/writer.h @@ -1,57 +1,57 @@ -#pragma once - -#include <util/generic/vector.h> -#include <util/stream/output.h> - -template <typename T> +#pragma once + +#include <util/generic/vector.h> +#include <util/stream/output.h> + +template <typename T> inline void WriteBin(IOutputStream* out, typename TTypeTraits<T>::TFuncParam t) { - out->Write(&t, sizeof(T)); -} - + out->Write(&t, sizeof(T)); +} + class TChunkedDataWriter: public IOutputStream { public: TChunkedDataWriter(IOutputStream& slave); ~TChunkedDataWriter() override; - + void NewBlock(); - + template <typename T> inline void WriteBinary(typename TTypeTraits<T>::TFuncParam t) { this->Write(&t, sizeof(T)); } - + void WriteFooter(); size_t GetCurrentBlockOffset() const; size_t GetBlockCount() const; - + protected: void DoWrite(const void* buf, size_t len) override { Slave.Write(buf, len); Offset += len; } - + private: static inline size_t PaddingSize(size_t size, size_t boundary) noexcept { const size_t boundaryViolation = size % boundary; - + return boundaryViolation == 0 ? 0 : boundary - boundaryViolation; } - + inline void Pad(size_t boundary) { const size_t newOffset = Offset + PaddingSize(Offset, boundary); - + while (Offset < newOffset) { Write('\0'); - } + } } - + private: static const ui64 Version = 1; - + IOutputStream& Slave; - + size_t Offset; TVector<ui64> Offsets; TVector<ui64> Lengths; -}; +}; diff --git a/library/cpp/on_disk/chunks/ya.make b/library/cpp/on_disk/chunks/ya.make index 33d6f1e058..acb52df5b0 100644 --- a/library/cpp/on_disk/chunks/ya.make +++ b/library/cpp/on_disk/chunks/ya.make @@ -1,11 +1,11 @@ -LIBRARY() - +LIBRARY() + OWNER(g:util) -SRCS( - chunked_helpers.cpp - reader.cpp - writer.cpp -) - -END() +SRCS( + chunked_helpers.cpp + reader.cpp + writer.cpp +) + +END() |