diff options
| author | denplusplus <[email protected]> | 2022-02-10 16:47:34 +0300 | 
|---|---|---|
| committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:47:34 +0300 | 
| commit | 57c20d143e8a438cd76b9fdc3ca2e8ee3ac1f32a (patch) | |
| tree | cc63639f8e502db19a82c20e2861c6d1edbf9fea /library/cpp/on_disk | |
| parent | 464ba3814a83db4f2d5327393b0b6eaf0c86bfd7 (diff) | |
Restoring authorship annotation for <[email protected]>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/on_disk')
| -rw-r--r-- | library/cpp/on_disk/chunks/chunked_helpers.cpp | 22 | ||||
| -rw-r--r-- | library/cpp/on_disk/chunks/chunked_helpers.h | 604 | ||||
| -rw-r--r-- | library/cpp/on_disk/chunks/chunks_ut.cpp | 28 | ||||
| -rw-r--r-- | library/cpp/on_disk/chunks/reader.cpp | 70 | ||||
| -rw-r--r-- | library/cpp/on_disk/chunks/reader.h | 38 | ||||
| -rw-r--r-- | library/cpp/on_disk/chunks/writer.cpp | 80 | ||||
| -rw-r--r-- | library/cpp/on_disk/chunks/writer.h | 44 | ||||
| -rw-r--r-- | library/cpp/on_disk/chunks/ya.make | 18 | 
8 files changed, 452 insertions, 452 deletions
| diff --git a/library/cpp/on_disk/chunks/chunked_helpers.cpp b/library/cpp/on_disk/chunks/chunked_helpers.cpp index b7adba27535..ad9f918751c 100644 --- a/library/cpp/on_disk/chunks/chunked_helpers.cpp +++ b/library/cpp/on_disk/chunks/chunked_helpers.cpp @@ -1,21 +1,21 @@ -#include <util/ysaveload.h> - -#include "chunked_helpers.h" - -TBlob GetBlock(const TBlob& blob, size_t index) { -    TChunkedDataReader reader(blob); +#include <util/ysaveload.h>  +  +#include "chunked_helpers.h"  +  +TBlob GetBlock(const TBlob& blob, size_t index) {  +    TChunkedDataReader reader(blob);       if (index >= reader.GetBlocksCount())          ythrow yexception() << "index " << index << " is >= than block count " << reader.GetBlocksCount(); -    size_t begin = (const char*)reader.GetBlock(index) - (const char*)blob.Data(); -    return blob.SubBlob(begin, begin + reader.GetBlockLen(index)); -} +    size_t begin = (const char*)reader.GetBlock(index) - (const char*)blob.Data();  +    return blob.SubBlob(begin, begin + reader.GetBlockLen(index));  +}   /*************************** TNamedChunkedDataReader ***************************/  static const char* NamedChunkedDataMagic = "NamedChunkedData"; -TNamedChunkedDataReader::TNamedChunkedDataReader(const TBlob& blob) -    : TChunkedDataReader(blob) +TNamedChunkedDataReader::TNamedChunkedDataReader(const TBlob& blob)  +    : TChunkedDataReader(blob)   {      if (TChunkedDataReader::GetBlocksCount() < 1)          throw yexception() << "Too few blocks"; diff --git a/library/cpp/on_disk/chunks/chunked_helpers.h b/library/cpp/on_disk/chunks/chunked_helpers.h index 5fa96afdca0..fa7febf0803 100644 --- a/library/cpp/on_disk/chunks/chunked_helpers.h +++ b/library/cpp/on_disk/chunks/chunked_helpers.h @@ -1,114 +1,114 @@  #pragma once - +   #include <util/generic/vector.h> -#include <util/generic/buffer.h> +#include <util/generic/buffer.h>   #include <util/generic/hash_set.h> -#include <util/generic/cast.h> +#include <util/generic/cast.h>   #include <util/generic/ymath.h>  #include <util/memory/blob.h>  #include <util/stream/buffer.h> -#include <util/stream/mem.h> +#include <util/stream/mem.h>   #include <util/system/unaligned_mem.h>  #include <util/ysaveload.h> - -#include "reader.h" -#include "writer.h" - +  +#include "reader.h"  +#include "writer.h"  +   #include <cmath>  #include <cstddef>  template <typename T> -class TYVector { -private: -    ui32 Size; -    const T* Data; - -public: -    TYVector(const TBlob& blob) +class TYVector {  +private:  +    ui32 Size;  +    const T* Data;  +  +public:  +    TYVector(const TBlob& blob)           : Size(IntegerCast<ui32>(ReadUnaligned<ui64>(blob.Data())))          , Data((const T*)((const char*)blob.Data() + sizeof(ui64))) -    { -    } - +    {  +    }  +       void Get(size_t idx, T& t) const {          assert(idx < (size_t)Size);          t = ReadUnaligned<T>(Data + idx);      } -    const T& At(size_t idx) const { -        assert(idx < (size_t)Size); -        return Data[idx]; -    } - -    size_t GetSize() const { -        return Size; -    } - +    const T& At(size_t idx) const {  +        assert(idx < (size_t)Size);  +        return Data[idx];  +    }  +  +    size_t GetSize() const {  +        return Size;  +    }  +       size_t RealSize() const {          return sizeof(ui64) + Size * sizeof(T);      }      ~TYVector() = default; -}; - +};  +   template <typename T> -class TYVectorWriter { -private: +class TYVectorWriter {  +private:       TVector<T> Vector; - -public: +  +public:       TYVectorWriter() = default; - -    void PushBack(const T& value) { -        Vector.push_back(value); -    } - +  +    void PushBack(const T& value) {  +        Vector.push_back(value);  +    }  +       void Save(IOutputStream& out) const { -        ui64 uSize = (ui64)Vector.size(); -        out.Write(&uSize, sizeof(uSize)); +        ui64 uSize = (ui64)Vector.size();  +        out.Write(&uSize, sizeof(uSize));           out.Write(Vector.data(), Vector.size() * sizeof(T)); -    } - -    const T& At(size_t idx) const { -        assert(idx < Size()); -        return Vector[idx]; -    } - -    T& At(size_t idx) { -        assert(idx < Size()); -        return Vector[idx]; -    } - -    void Clear() { -        Vector.clear(); -    } - -    size_t Size() const { -        return Vector.size(); -    } - -    void Resize(size_t size) { -        Vector.resize(size); -    } - -    void Resize(size_t size, const T& value) { -        Vector.resize(size, value); -    } -}; - +    }  +  +    const T& At(size_t idx) const {  +        assert(idx < Size());  +        return Vector[idx];  +    }  +  +    T& At(size_t idx) {  +        assert(idx < Size());  +        return Vector[idx];  +    }  +  +    void Clear() {  +        Vector.clear();  +    }  +  +    size_t Size() const {  +        return Vector.size();  +    }  +  +    void Resize(size_t size) {  +        Vector.resize(size);  +    }  + +    void Resize(size_t size, const T& value) {  +        Vector.resize(size, value);  +    }  +};  +   template <typename T, bool> -struct TYVectorG; - +struct TYVectorG;  +   template <typename X> -struct TYVectorG<X, false> { -    typedef TYVector<X> T; -}; - +struct TYVectorG<X, false> {  +    typedef TYVector<X> T;  +};  +   template <typename X> -struct TYVectorG<X, true> { -    typedef TYVectorWriter<X> T; -}; - +struct TYVectorG<X, true> {  +    typedef TYVectorWriter<X> T;  +};  +   template <typename T>  struct TIsMemsetThisWithZeroesSupported {      enum { @@ -124,16 +124,16 @@ struct TIsMemsetThisWithZeroesSupported {          };                                          \      }; -class TPlainHashCommon { -protected: -#pragma pack(push, 8) +class TPlainHashCommon {  +protected:  +#pragma pack(push, 8)       template <typename TKey, typename TValue> -    class TPackedPair { -    private: -        typedef TPackedPair<TKey, TValue> TThis; -        TKey Key; -        TValue Value; - +    class TPackedPair {  +    private:  +        typedef TPackedPair<TKey, TValue> TThis;  +        TKey Key;  +        TValue Value;  +       private:          static_assert(TIsMemsetThisWithZeroesSupported<TKey>::Result, "expect TIsMemsetThisWithZeroesSupported<TKey>::Result");          static_assert(TIsMemsetThisWithZeroesSupported<TValue>::Result, "expect TIsMemsetThisWithZeroesSupported<TValue>::Result"); @@ -145,33 +145,33 @@ protected:              Value = value;          } -    public: +    public:           TPackedPair(typename TTypeTraits<TKey>::TFuncParam key, typename TTypeTraits<TValue>::TFuncParam value) {              Init(key, value); -        } - +        }  +           TPackedPair(const TThis& rhs) {              Init(rhs.Key, rhs.Value); -        } - -        TPackedPair& operator=(const TThis& rhs) { -            if (this != &rhs) { +        }  +  +        TPackedPair& operator=(const TThis& rhs) {  +            if (this != &rhs) {                   Init(rhs.Key, rhs.Value); -            } -            return *this; -        } - +            }  +            return *this;  +        }  +           TPackedPair() {              Init(TKey(), TValue()); -        } +        }  -        typename TTypeTraits<TKey>::TFuncParam First() const { -            return Key; -        } - -        typename TTypeTraits<TValue>::TFuncParam Second() const { -            return Value; -        } +        typename TTypeTraits<TKey>::TFuncParam First() const {  +            return Key;  +        }  +  +        typename TTypeTraits<TValue>::TFuncParam Second() const {  +            return Value;  +        }           static TKey GetFirst(const void* self) {              static constexpr size_t offset = offsetof(TThis, Key); @@ -182,29 +182,29 @@ protected:              static constexpr size_t offset = offsetof(TThis, Value);              return ReadUnaligned<TValue>(reinterpret_cast<const char*>(self) + offset);          } -    }; -#pragma pack(pop) - -protected: +    };  +#pragma pack(pop)  +  +protected:       static const ui16 VERSION_ID = 2; -#pragma pack(push, 8) -    struct TInterval { -        static const ui32 INVALID = (ui32)-1; -        ui32 Offset; -        ui32 Length; - -        TInterval() -            : Offset(INVALID) -            , Length(INVALID) -        { -        } - -        TInterval(ui32 offset, ui32 length) -            : Offset(offset) -            , Length(length) -        { -        } +#pragma pack(push, 8)  +    struct TInterval {  +        static const ui32 INVALID = (ui32)-1;  +        ui32 Offset;  +        ui32 Length;  +  +        TInterval()  +            : Offset(INVALID)  +            , Length(INVALID)  +        {  +        }  +  +        TInterval(ui32 offset, ui32 length)  +            : Offset(offset)  +            , Length(length)  +        {  +        }           static inline ui32 GetOffset(const TInterval* self) {              static constexpr size_t offset = offsetof(TInterval, Offset); @@ -215,244 +215,244 @@ protected:              static constexpr size_t offset = offsetof(TInterval, Length);              return ReadUnaligned<ui32>(reinterpret_cast<const char*>(self) + offset);          } -    }; -#pragma pack(pop) +    };  +#pragma pack(pop)       static_assert(8 == sizeof(TInterval), "expect 8 == sizeof(TInterval)");      template <typename TKey> -    static ui32 KeyHash(typename TTypeTraits<TKey>::TFuncParam key, ui16 bits) { +    static ui32 KeyHash(typename TTypeTraits<TKey>::TFuncParam key, ui16 bits) {           Y_ASSERT(bits < 32); -        const ui32 res = ui32(key) & ((ui32(1) << bits) - 1); +        const ui32 res = ui32(key) & ((ui32(1) << bits) - 1);           Y_ASSERT(res < (ui32(1) << bits)); -        return res; -    } -}; - +        return res;  +    }  +};  +   template <typename TKey, typename TValue> -class TPlainHashWriter : TPlainHashCommon { -private: -    typedef TPackedPair<TKey, TValue> TKeyValuePair; +class TPlainHashWriter : TPlainHashCommon {  +private:  +    typedef TPackedPair<TKey, TValue> TKeyValuePair;       typedef TVector<TKeyValuePair> TData; -    TData Data; +    TData Data;       typedef TVector<TData> TData2; - -    bool IsPlainEnought(ui16 bits) const { +  +    bool IsPlainEnought(ui16 bits) const {           TVector<size_t> counts(1LL << bits, 0);          for (size_t i = 0; i < Data.size(); ++i) {              size_t& count = counts[KeyHash<TKey>(TKeyValuePair::GetFirst(&Data[i]), bits)]; -            ++count; -            if (count > 2) -                return false; -        } -        return true; -    } - -public: -    void Add(const TKey& key, const TValue& value) { +            ++count;  +            if (count > 2)  +                return false;  +        }  +        return true;  +    }  +  +public:  +    void Add(const TKey& key, const TValue& value) {           Data.push_back(TKeyValuePair(key, value)); -    } - +    }  +       void Save(IOutputStream& out) const {          Y_ASSERT(Data.size() < Max<ui32>());          WriteBin<ui16>(&out, VERSION_ID); -        static const ui32 PAIR_SIZE = sizeof(TKeyValuePair); -        WriteBin<ui32>(&out, PAIR_SIZE); +        static const ui32 PAIR_SIZE = sizeof(TKeyValuePair);  +        WriteBin<ui32>(&out, PAIR_SIZE);  -        ui16 bits; -        if (!Data.empty()) { +        ui16 bits;  +        if (!Data.empty()) {               bits = (ui16)(log((float)Data.size()) / log(2.f));              while ((bits < 22) && !IsPlainEnought(bits)) -                ++bits; -        } else { -            bits = 0; -        } -        WriteBin<ui16>(&out, bits); +                ++bits;  +        } else {  +            bits = 0;  +        }  +        WriteBin<ui16>(&out, bits);           WriteBin<ui32>(&out, (ui32)Data.size()); -        const ui32 nBuckets = ui32(1) << bits; -        TData2 data2(nBuckets); +        const ui32 nBuckets = ui32(1) << bits;  +        TData2 data2(nBuckets);           for (size_t i = 0; i < Data.size(); ++i)              data2[KeyHash<TKey>(TKeyValuePair::GetFirst(&Data[i]), bits)].push_back(Data[i]);          typedef TVector<TInterval> TIntervals; -        TIntervals intervals(nBuckets); -        ui32 offset = 0; -        for (ui32 i = 0; i < nBuckets; ++i) { -            intervals[i].Offset = offset; +        TIntervals intervals(nBuckets);  +        ui32 offset = 0;  +        for (ui32 i = 0; i < nBuckets; ++i) {  +            intervals[i].Offset = offset;               intervals[i].Length = (ui32)data2[i].size();              offset += (ui32)data2[i].size(); -        } -#ifndef NDEBUG -        for (ui32 i = 0; i < nBuckets; ++i) { -            for (size_t j = 0; j < data2[i].size(); ++j) -                for (size_t k = j + 1; k < data2[i].size(); ++k) +        }  +#ifndef NDEBUG  +        for (ui32 i = 0; i < nBuckets; ++i) {  +            for (size_t j = 0; j < data2[i].size(); ++j)  +                for (size_t k = j + 1; k < data2[i].size(); ++k)                       if (TKeyValuePair::GetFirst(&data2[i][j]) == TKeyValuePair::GetFirst(&data2[i][k]))                          ythrow yexception() << "key clash"; -        } -#endif +        }  +#endif           out.Write(intervals.data(), intervals.size() * sizeof(intervals[0])); -        for (ui32 i = 0; i < nBuckets; ++i) +        for (ui32 i = 0; i < nBuckets; ++i)               out.Write(data2[i].data(), data2[i].size() * sizeof(data2[i][0])); -    } -}; - +    }  +};  +   template <typename TKey, typename TValue> -class TPlainHash : TPlainHashCommon { -private: -    typedef TPackedPair<TKey, TValue> TKeyValuePair; +class TPlainHash : TPlainHashCommon {  +private:  +    typedef TPackedPair<TKey, TValue> TKeyValuePair;  -    const char* P; +    const char* P;  -    ui16 GetBits() const { +    ui16 GetBits() const {           return ReadUnaligned<ui16>(P + 6); -    } +    }  -    ui32 GetSize() const { +    ui32 GetSize() const {           return ReadUnaligned<ui32>(P + 8); -    } - -    const TInterval* GetIntervals() const { -        return (const TInterval*)(P + 12); -    } +    }  -    const TKeyValuePair* GetData() const { -        return (const TKeyValuePair*)(GetIntervals() + (1ULL << GetBits())); -    } +    const TInterval* GetIntervals() const {  +        return (const TInterval*)(P + 12);  +    }  +    const TKeyValuePair* GetData() const {  +        return (const TKeyValuePair*)(GetIntervals() + (1ULL << GetBits()));  +    }  +       template <typename T> -    void Init(const T* p) { +    void Init(const T* p) {           static_assert(sizeof(T) == 1, "expect sizeof(T) == 1"); -        P = reinterpret_cast<const char*>(p); -#ifndef NDEBUG +        P = reinterpret_cast<const char*>(p);  +#ifndef NDEBUG           ui16 version = ReadUnaligned<ui16>(p);          if (version != VERSION_ID) -            ythrow yexception() << "bad version: " << version; -        static const ui32 PAIR_SIZE = sizeof(TKeyValuePair); +            ythrow yexception() << "bad version: " << version;  +        static const ui32 PAIR_SIZE = sizeof(TKeyValuePair);           const ui32 size = ReadUnaligned<ui32>(p + 2); -        if (size != PAIR_SIZE) -            ythrow yexception() << "bad size " << size << " instead of " << PAIR_SIZE; -#endif -    } - -public: -    typedef const TKeyValuePair* TConstIterator; - -    TPlainHash(const char* p) { -        Init(p); -    } - -    TPlainHash(const TBlob& blob) { -        Init(blob.Begin()); -    } - -    bool Find(typename TTypeTraits<TKey>::TFuncParam key, TValue* res) const { -        // Cerr << GetBits() << "\t" << (1 << GetBits()) << "\t" << GetSize() << Endl; -        const ui32 hash = KeyHash<TKey>(key, GetBits()); +        if (size != PAIR_SIZE)  +            ythrow yexception() << "bad size " << size << " instead of " << PAIR_SIZE;  +#endif  +    }  +  +public:  +    typedef const TKeyValuePair* TConstIterator;  +  +    TPlainHash(const char* p) {  +        Init(p);  +    }  + +    TPlainHash(const TBlob& blob) {  +        Init(blob.Begin());  +    }  +  +    bool Find(typename TTypeTraits<TKey>::TFuncParam key, TValue* res) const {  +        // Cerr << GetBits() << "\t" << (1 << GetBits()) << "\t" << GetSize() << Endl;  +        const ui32 hash = KeyHash<TKey>(key, GetBits());           const TInterval* intervalPtr = GetIntervals();          const TKeyValuePair* pair = GetData() + TInterval::GetOffset(intervalPtr + hash);          const ui32 length = TInterval::GetLength(intervalPtr + hash);          for (ui32 i = 0; i < length; ++i, ++pair) {              if (TKeyValuePair::GetFirst(pair) == key) {                  *res = TKeyValuePair::GetSecond(pair); -                return true; -            } -        } -        return false; -    } - -    TValue Get(typename TTypeTraits<TKey>::TFuncParam key) const { -        TValue res; -        if (Find(key, &res)) -            return res; -        else -            ythrow yexception() << "key not found"; -    } - -    TConstIterator Begin() const { -        return GetData(); -    } - -    TConstIterator End() const { -        return GetData() + GetSize(); -    } - -    const char* ByteEnd() const { -        return (const char*)(GetData() + GetSize()); -    } - -    size_t ByteSize() const { +                return true;  +            }  +        }  +        return false;  +    }  +  +    TValue Get(typename TTypeTraits<TKey>::TFuncParam key) const {  +        TValue res;  +        if (Find(key, &res))  +            return res;  +        else  +            ythrow yexception() << "key not found";  +    }  +  +    TConstIterator Begin() const {  +        return GetData();  +    }  +  +    TConstIterator End() const {  +        return GetData() + GetSize();  +    }  + +    const char* ByteEnd() const {  +        return (const char*)(GetData() + GetSize());  +    }  +  +    size_t ByteSize() const {           return 12 + sizeof(TInterval) * (size_t(1) << GetBits()) + sizeof(TKeyValuePair) * GetSize(); -    } -}; - +    }  +};  +   template <typename Key, typename Value, bool> -struct TPlainHashG; - +struct TPlainHashG;  +   template <typename Key, typename Value> -struct TPlainHashG<Key, Value, false> { -    typedef TPlainHash<Key, Value> T; -}; - +struct TPlainHashG<Key, Value, false> {  +    typedef TPlainHash<Key, Value> T;  +};  +   template <typename Key, typename Value> -struct TPlainHashG<Key, Value, true> { -    typedef TPlainHashWriter<Key, Value> T; -}; - +struct TPlainHashG<Key, Value, true> {  +    typedef TPlainHashWriter<Key, Value> T;  +};  +   template <typename T> -class TSingleValue { -private: -    const T* Value; - -public: -    TSingleValue(const TBlob& blob) { +class TSingleValue {  +private:  +    const T* Value;  +  +public:  +    TSingleValue(const TBlob& blob) {           Y_ASSERT(blob.Length() >= sizeof(T));          Y_ASSERT(blob.Length() <= sizeof(T) + 16); -        Value = reinterpret_cast<const T*>(blob.Begin()); -    } - -    const T& Get() const { -        return *Value; -    } -}; - +        Value = reinterpret_cast<const T*>(blob.Begin());  +    }  +  +    const T& Get() const {  +        return *Value;  +    }  +};  +   template <typename T> -class TSingleValueWriter { -private: -    T Value; - -public: +class TSingleValueWriter {  +private:  +    T Value;  +  +public:       TSingleValueWriter() = default; - +       TSingleValueWriter(const T& value) -        : Value(value) -    { -    } - -    void Set(const T& value) { -        Value = value; -    } - +        : Value(value)  +    {  +    }  +  +    void Set(const T& value) {  +        Value = value;  +    }  +       void Save(IOutputStream& out) const { -        out.Write(&Value, sizeof(Value)); -    } -}; - -TBlob GetBlock(const TBlob& data, size_t index); - +        out.Write(&Value, sizeof(Value));  +    }  +};  +  +TBlob GetBlock(const TBlob& data, size_t index);  +   template <class T> -void WriteBlock(TChunkedDataWriter& writer, const T& t) { -    writer.NewBlock(); -    t.Save(writer); -} - +void WriteBlock(TChunkedDataWriter& writer, const T& t) {  +    writer.NewBlock();  +    t.Save(writer);  +}  +   template <class T> -void WriteBlock(TChunkedDataWriter& writer, T& t) { -    writer.NewBlock(); -    t.Save(writer); -} - +void WriteBlock(TChunkedDataWriter& writer, T& t) {  +    writer.NewBlock();  +    t.Save(writer);  +}  +   // Extends TChunkedDataWriter, allowing user to name blocks with arbitrary strings.  class TNamedChunkedDataWriter: public TChunkedDataWriter {  public: diff --git a/library/cpp/on_disk/chunks/chunks_ut.cpp b/library/cpp/on_disk/chunks/chunks_ut.cpp index f727647f7f2..8ef8f812a52 100644 --- a/library/cpp/on_disk/chunks/chunks_ut.cpp +++ b/library/cpp/on_disk/chunks/chunks_ut.cpp @@ -1,11 +1,11 @@  #include <library/cpp/testing/unittest/registar.h> - +   #include <util/stream/file.h>  #include <util/system/filemap.h>  #include <util/system/tempfile.h> - -#include "chunked_helpers.h" - +  +#include "chunked_helpers.h"  +   /// Data for TChunkedHelpersTest::TestGeneralVector  struct TPodStruct {      int x; @@ -82,13 +82,13 @@ public:      void TestGeneralVector() {          { /// ui32              const size_t N = 3; -            TBufferStream stream; -            { +            TBufferStream stream;  +            {                   TGeneralVectorWriter<ui32> writer;                  for (size_t i = 0; i < N; ++i)                      writer.PushBack(i); -                writer.Save(stream); -            } +                writer.Save(stream);  +            }               {                  TBlob temp = TBlob::FromStreamSingleThreaded(stream);                  TGeneralVector<ui32> reader(temp); @@ -138,7 +138,7 @@ public:                  TBlob temp = TBlob::FromStreamSingleThreaded(stream);                  TGeneralVector<TItem> reader(temp);                  UNIT_ASSERT_EQUAL(reader.GetSize(), N); - +                   TItem value;                  reader.Get(0, value);                  UNIT_ASSERT(value.x == 1 && value.y == 2.0); @@ -154,14 +154,14 @@ public:              TVector<int> data_holder(N);              int* a = &(data_holder[0]);              TBufferStream stream; -            { +            {                   TGeneralVectorWriter<int*> writer;                  for (size_t i = 0; i < N; ++i) {                      a[i] = i;                      writer.PushBack(a + i);                  }                  writer.Save(stream); -            } +            }               {                  TBlob temp = TBlob::FromStreamSingleThreaded(stream);                  TGeneralVector<int*> reader(temp); @@ -174,7 +174,7 @@ public:                  }                  UNIT_ASSERT_EQUAL(reader.RealSize(), sizeof(ui64) + N * sizeof(int*));              } -        } +        }           { /// std::pair<int, int>              typedef std::pair<int, int> TItem;              const size_t N = 3; @@ -197,8 +197,8 @@ public:                  UNIT_ASSERT_EQUAL(reader.RealSize(), sizeof(ui64) + N * sizeof(TItem));              }          } -    } - +    }  +       void TestStrings() {          const TString FILENAME = "chunked_helpers_test.bin";          TTempFileHandle file(FILENAME.c_str()); diff --git a/library/cpp/on_disk/chunks/reader.cpp b/library/cpp/on_disk/chunks/reader.cpp index 6e28cbf367d..af4fef0ecfd 100644 --- a/library/cpp/on_disk/chunks/reader.cpp +++ b/library/cpp/on_disk/chunks/reader.cpp @@ -1,52 +1,52 @@  #include <util/generic/cast.h> -#include <util/memory/blob.h> +#include <util/memory/blob.h>   #include <util/system/unaligned_mem.h> - -#include "reader.h" - -template <typename T> +  +#include "reader.h"  +  +template <typename T>   static inline void ReadAux(const char* data, T* aux, T count, TVector<const char*>* result) { -    result->resize(count); -    for (size_t i = 0; i < count; ++i) { +    result->resize(count);  +    for (size_t i = 0; i < count; ++i) {           (*result)[i] = data + ReadUnaligned<T>(aux + i); -    } -} - +    }  +}  +   TChunkedDataReader::TChunkedDataReader(const TBlob& blob) { -    const char* cdata = blob.AsCharPtr(); -    const size_t size = blob.Size(); +    const char* cdata = blob.AsCharPtr();  +    const size_t size = blob.Size();       Y_ENSURE(size >= sizeof(ui32), "Empty file with chunks. ");      ui32 last = ReadUnaligned<ui32>((ui32*)(cdata + size) - 1); - -    if (last != 0) { // old version file -        ui32* aux = (ui32*)(cdata + size); -        ui32 count = last; -        Size = size - (count + 1) * sizeof(ui32); - -        aux -= (count + 1); -        ReadAux<ui32>(cdata, aux, count, &Offsets); -        return; -    } - +  +    if (last != 0) { // old version file  +        ui32* aux = (ui32*)(cdata + size);  +        ui32 count = last;  +        Size = size - (count + 1) * sizeof(ui32);  +  +        aux -= (count + 1);  +        ReadAux<ui32>(cdata, aux, count, &Offsets);  +        return;  +    }  +       Y_ENSURE(size >= 3 * sizeof(ui64), "Blob size must be >= 3 * sizeof(ui64). "); -    ui64* aux = (ui64*)(cdata + size); +    ui64* aux = (ui64*)(cdata + size);       Version = ReadUnaligned<ui64>(aux - 2);      Y_ENSURE(Version > 0, "Invalid chunked array version. "); - +       ui64 count = ReadUnaligned<ui64>(aux - 3); - -    aux -= (count + 3); -    ReadAux<ui64>(cdata, aux, count, &Offsets); - -    aux -= count; -    Lengths.resize(count); -    for (size_t i = 0; i < count; ++i) { +  +    aux -= (count + 3);  +    ReadAux<ui64>(cdata, aux, count, &Offsets);  +  +    aux -= count;  +    Lengths.resize(count);  +    for (size_t i = 0; i < count; ++i) {           Lengths[i] = IntegerCast<size_t>(ReadUnaligned<ui64>(aux + i)); -    } -} - +    }  +}  +   TBlob TChunkedDataReader::GetBlob(size_t index) const {      return TBlob::NoCopy(GetBlock(index), GetBlockLen(index));  } diff --git a/library/cpp/on_disk/chunks/reader.h b/library/cpp/on_disk/chunks/reader.h index c5fe783319f..66b01559953 100644 --- a/library/cpp/on_disk/chunks/reader.h +++ b/library/cpp/on_disk/chunks/reader.h @@ -1,31 +1,31 @@ -#pragma once - +#pragma once  +   #include <util/generic/array_ref.h> -#include <util/generic/vector.h> -#include <util/generic/yexception.h> - -class TBlob; - -class TChunkedDataReader { +#include <util/generic/vector.h>  +#include <util/generic/yexception.h>  +  +class TBlob;  +  +class TChunkedDataReader {   public:      TChunkedDataReader(const TBlob& blob); - +       inline const void* GetBlock(size_t index) const {          CheckIndex(index);          return Offsets[index];      } - +       inline size_t GetBlockLen(size_t index) const {          CheckIndex(index); - +           if (Version == 0) {              if (index + 1 < Offsets.size()) {                  return Offsets[index + 1] - Offsets[index]; -            } - +            }  +               return Size - (Offsets.back() - Offsets.front()); -        } - +        }  +           return Lengths[index];      } @@ -41,17 +41,17 @@ public:      inline size_t GetBlocksCount() const {          return Offsets.size();      } - +   private:      inline void CheckIndex(size_t index) const {          if (index >= GetBlocksCount()) {              ythrow yexception() << "requested block " << index << " of " << GetBlocksCount() << " blocks"; -        } +        }       } - +   private:      ui64 Version = 0;      TVector<const char*> Offsets;      TVector<size_t> Lengths;      size_t Size = 0; -}; +};  diff --git a/library/cpp/on_disk/chunks/writer.cpp b/library/cpp/on_disk/chunks/writer.cpp index 6dc7397f09a..3c7747de0bd 100644 --- a/library/cpp/on_disk/chunks/writer.cpp +++ b/library/cpp/on_disk/chunks/writer.cpp @@ -1,46 +1,46 @@ -#include <util/ysaveload.h> - -#include "writer.h" - +#include <util/ysaveload.h>  +  +#include "writer.h"  +   static inline void WriteAux(IOutputStream* out, const TVector<ui64>& data) {      ::SavePodArray(out, data.data(), data.size()); -} - -/*************************** TBuffersWriter ***************************/ - +}  +  +/*************************** TBuffersWriter ***************************/  +   TChunkedDataWriter::TChunkedDataWriter(IOutputStream& slave) -    : Slave(slave) -    , Offset(0) -{ -} - +    : Slave(slave)  +    , Offset(0)  +{  +}  +   TChunkedDataWriter::~TChunkedDataWriter() { -} - -void TChunkedDataWriter::NewBlock() { -    if (Offsets.size()) { -        Lengths.push_back(Offset - Offsets.back()); -    } - -    Pad(16); -    Offsets.push_back(Offset); -} - -void TChunkedDataWriter::WriteFooter() { -    Lengths.push_back(Offset - Offsets.back()); -    WriteAux(this, Lengths); -    WriteAux(this, Offsets); -    WriteBinary<ui64>(Offsets.size()); -    WriteBinary<ui64>(Version); -    WriteBinary<ui64>(0); -} - -size_t TChunkedDataWriter::GetCurrentBlockOffset() const { +}  +  +void TChunkedDataWriter::NewBlock() {  +    if (Offsets.size()) {  +        Lengths.push_back(Offset - Offsets.back());  +    }  +  +    Pad(16);  +    Offsets.push_back(Offset);  +}  +  +void TChunkedDataWriter::WriteFooter() {  +    Lengths.push_back(Offset - Offsets.back());  +    WriteAux(this, Lengths);  +    WriteAux(this, Offsets);  +    WriteBinary<ui64>(Offsets.size());  +    WriteBinary<ui64>(Version);  +    WriteBinary<ui64>(0);  +}  +  +size_t TChunkedDataWriter::GetCurrentBlockOffset() const {       Y_ASSERT(!Offsets.empty());      Y_ASSERT(Offset >= Offsets.back()); -    return Offset - Offsets.back(); -} - -size_t TChunkedDataWriter::GetBlockCount() const { -    return Offsets.size(); -} +    return Offset - Offsets.back();  +}  +  +size_t TChunkedDataWriter::GetBlockCount() const {  +    return Offsets.size();  +}  diff --git a/library/cpp/on_disk/chunks/writer.h b/library/cpp/on_disk/chunks/writer.h index ab14522bddd..ee0d7983c7f 100644 --- a/library/cpp/on_disk/chunks/writer.h +++ b/library/cpp/on_disk/chunks/writer.h @@ -1,57 +1,57 @@ -#pragma once - -#include <util/generic/vector.h> -#include <util/stream/output.h> - -template <typename T> +#pragma once  +  +#include <util/generic/vector.h>  +#include <util/stream/output.h>  +  +template <typename T>   inline void WriteBin(IOutputStream* out, typename TTypeTraits<T>::TFuncParam t) { -    out->Write(&t, sizeof(T)); -} - +    out->Write(&t, sizeof(T));  +}  +   class TChunkedDataWriter: public IOutputStream {  public:      TChunkedDataWriter(IOutputStream& slave);      ~TChunkedDataWriter() override; - +       void NewBlock(); - +       template <typename T>      inline void WriteBinary(typename TTypeTraits<T>::TFuncParam t) {          this->Write(&t, sizeof(T));      } - +       void WriteFooter();      size_t GetCurrentBlockOffset() const;      size_t GetBlockCount() const; - +   protected:      void DoWrite(const void* buf, size_t len) override {          Slave.Write(buf, len);          Offset += len;      } - +   private:      static inline size_t PaddingSize(size_t size, size_t boundary) noexcept {          const size_t boundaryViolation = size % boundary; - +           return boundaryViolation == 0 ? 0 : boundary - boundaryViolation;      } - +       inline void Pad(size_t boundary) {          const size_t newOffset = Offset + PaddingSize(Offset, boundary); - +           while (Offset < newOffset) {              Write('\0'); -        } +        }       } - +   private:      static const ui64 Version = 1; - +       IOutputStream& Slave; - +       size_t Offset;      TVector<ui64> Offsets;      TVector<ui64> Lengths; -}; +};  diff --git a/library/cpp/on_disk/chunks/ya.make b/library/cpp/on_disk/chunks/ya.make index acb52df5b0b..33d6f1e0580 100644 --- a/library/cpp/on_disk/chunks/ya.make +++ b/library/cpp/on_disk/chunks/ya.make @@ -1,11 +1,11 @@ -LIBRARY() - +LIBRARY()  +   OWNER(g:util) -SRCS( -    chunked_helpers.cpp -    reader.cpp -    writer.cpp -) - -END() +SRCS(  +    chunked_helpers.cpp  +    reader.cpp  +    writer.cpp  +)  +  +END()  | 
