diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:15 +0300 |
commit | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch) | |
tree | da2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /library/cpp/archive | |
parent | 778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff) | |
download | ydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/archive')
-rw-r--r-- | library/cpp/archive/yarchive.cpp | 552 | ||||
-rw-r--r-- | library/cpp/archive/yarchive.h | 40 | ||||
-rw-r--r-- | library/cpp/archive/yarchive_ut.cpp | 88 |
3 files changed, 340 insertions, 340 deletions
diff --git a/library/cpp/archive/yarchive.cpp b/library/cpp/archive/yarchive.cpp index 1becc3e5da..7adefc714d 100644 --- a/library/cpp/archive/yarchive.cpp +++ b/library/cpp/archive/yarchive.cpp @@ -1,5 +1,5 @@ -#include "yarchive.h" - +#include "yarchive.h" + #include <util/generic/algorithm.h> #include <util/generic/hash.h> #include <util/generic/utility.h> @@ -14,145 +14,145 @@ #include <util/stream/zlib.h> #include <util/system/byteorder.h> #include <util/ysaveload.h> - -template <class T> + +template <class T> static inline void ESSave(IOutputStream* out, const T& t_in) { - T t = HostToLittle(t_in); - - out->Write((const void*)&t, sizeof(t)); -} - + T t = HostToLittle(t_in); + + out->Write((const void*)&t, sizeof(t)); +} + static inline void ESSave(IOutputStream* out, const TString& s) { ESSave(out, (ui32) s.size()); out->Write(s.data(), s.size()); -} - -template <class T> +} + +template <class T> static inline T ESLoad(IInputStream* in) { - T t = T(); - - if (in->Load(&t, sizeof(t)) != sizeof(t)) { + T t = T(); + + if (in->Load(&t, sizeof(t)) != sizeof(t)) { ythrow TSerializeException() << "malformed archive"; - } - - return LittleToHost(t); -} - -template <> + } + + return LittleToHost(t); +} + +template <> inline TString ESLoad<TString>(IInputStream* in) { - size_t len = ESLoad<ui32>(in); + size_t len = ESLoad<ui32>(in); TString ret; - TTempBuf tmp; - - while (len) { - const size_t toread = Min(len, tmp.Size()); - const size_t readed = in->Read(tmp.Data(), toread); - - if (!readed) { + TTempBuf tmp; + + while (len) { + const size_t toread = Min(len, tmp.Size()); + const size_t readed = in->Read(tmp.Data(), toread); + + if (!readed) { ythrow TSerializeException() << "malformed archive"; - } - - ret.append(tmp.Data(), readed); - len -= readed; - } - - return ret; -} - -namespace { - class TArchiveRecordDescriptor: public TSimpleRefCount<TArchiveRecordDescriptor> { - public: + } + + ret.append(tmp.Data(), readed); + len -= readed; + } + + return ret; +} + +namespace { + class TArchiveRecordDescriptor: public TSimpleRefCount<TArchiveRecordDescriptor> { + public: inline TArchiveRecordDescriptor(ui64 off, ui64 len, const TString& name) - : Off_(off) - , Len_(len) - , Name_(name) - { - } - + : Off_(off) + , Len_(len) + , Name_(name) + { + } + inline TArchiveRecordDescriptor(IInputStream* in) - : Off_(ESLoad<ui64>(in)) - , Len_(ESLoad<ui64>(in)) + : Off_(ESLoad<ui64>(in)) + , Len_(ESLoad<ui64>(in)) , Name_(ESLoad<TString>(in)) - { - } - + { + } + inline ~TArchiveRecordDescriptor() = default; - + inline void SaveTo(IOutputStream* out) const { - ESSave(out, Off_); - ESSave(out, Len_); - ESSave(out, Name_); - } - + ESSave(out, Off_); + ESSave(out, Len_); + ESSave(out, Name_); + } + inline const TString& Name() const noexcept { - return Name_; - } - + return Name_; + } + inline ui64 Length() const noexcept { - return Len_; - } - + return Len_; + } + inline ui64 Offset() const noexcept { - return Off_; - } - - private: - ui64 Off_; - ui64 Len_; + return Off_; + } + + private: + ui64 Off_; + ui64 Len_; TString Name_; - }; - - typedef TIntrusivePtr<TArchiveRecordDescriptor> TArchiveRecordDescriptorRef; -} - -class TArchiveWriter::TImpl { + }; + + typedef TIntrusivePtr<TArchiveRecordDescriptor> TArchiveRecordDescriptorRef; +} + +class TArchiveWriter::TImpl { using TDict = THashMap<TString, TArchiveRecordDescriptorRef>; - -public: + +public: inline TImpl(IOutputStream& out, bool compress) - : Off_(0) - , Out_(&out) - , UseCompression(compress) - { - } - + : Off_(0) + , Out_(&out) + , UseCompression(compress) + { + } + inline ~TImpl() = default; - - inline void Flush() { - Out_->Flush(); - } - - inline void Finish() { + + inline void Flush() { + Out_->Flush(); + } + + inline void Finish() { TCountingOutput out(Out_); - - { - TZLibCompress compress(&out); - - ESSave(&compress, (ui32)Dict_.size()); - + + { + TZLibCompress compress(&out); + + ESSave(&compress, (ui32)Dict_.size()); + for (const auto& kv : Dict_) { kv.second->SaveTo(&compress); - } - - ESSave(&compress, static_cast<ui8>(UseCompression)); - - compress.Finish(); - } - + } + + ESSave(&compress, static_cast<ui8>(UseCompression)); + + compress.Finish(); + } + ESSave(Out_, out.Counter()); - - Out_->Flush(); - } - + + Out_->Flush(); + } + inline void Add(const TString& key, IInputStream* src) { Y_ENSURE(!Dict_.contains(key), "key " << key.data() << " already stored"); - + TCountingOutput out(Out_); - if (UseCompression) { - TZLibCompress compress(&out); - TransferData(src, &compress); - compress.Finish(); - } else { + if (UseCompression) { + TZLibCompress compress(&out); + TransferData(src, &compress); + compress.Finish(); + } else { size_t skip_size = ArchiveWriterDefaultDataAlignment - Off_ % ArchiveWriterDefaultDataAlignment; if (skip_size == ArchiveWriterDefaultDataAlignment) { skip_size = 0; @@ -162,16 +162,16 @@ public: Off_ += 1; skip_size -= 1; } - TransferData(src, &out); - out.Finish(); - } - + TransferData(src, &out); + out.Finish(); + } + TArchiveRecordDescriptorRef descr(new TArchiveRecordDescriptor(Off_, out.Counter(), key)); - - Dict_[key] = descr; + + Dict_[key] = descr; Off_ += out.Counter(); - } - + } + inline void AddSynonym(const TString& existingKey, const TString& newKey) { Y_ENSURE(Dict_.contains(existingKey), "key " << existingKey.data() << " not stored yet"); Y_ENSURE(!Dict_.contains(newKey), "key " << newKey.data() << " already stored"); @@ -182,208 +182,208 @@ public: Dict_[newKey] = descr; } -private: - ui64 Off_; +private: + ui64 Off_; IOutputStream* Out_; - TDict Dict_; - const bool UseCompression; -}; - + TDict Dict_; + const bool UseCompression; +}; + TArchiveWriter::TArchiveWriter(IOutputStream* out, bool compress) : Impl_(new TImpl(*out, compress)) -{ -} - +{ +} + TArchiveWriter::~TArchiveWriter() { - try { - Finish(); - } catch (...) { - } -} - -void TArchiveWriter::Flush() { - if (Impl_.Get()) { - Impl_->Flush(); - } -} - -void TArchiveWriter::Finish() { - if (Impl_.Get()) { - Impl_->Finish(); - Impl_.Destroy(); - } -} - + try { + Finish(); + } catch (...) { + } +} + +void TArchiveWriter::Flush() { + if (Impl_.Get()) { + Impl_->Flush(); + } +} + +void TArchiveWriter::Finish() { + if (Impl_.Get()) { + Impl_->Finish(); + Impl_.Destroy(); + } +} + void TArchiveWriter::Add(const TString& key, IInputStream* src) { Y_ENSURE(Impl_.Get(), "archive already closed"); - - Impl_->Add(key, src); -} - + + Impl_->Add(key, src); +} + void TArchiveWriter::AddSynonym(const TString& existingKey, const TString& newKey) { Y_ENSURE(Impl_.Get(), "archive already closed"); Impl_->AddSynonym(existingKey, newKey); } -namespace { - class TArchiveInputStreamBase { - public: - inline TArchiveInputStreamBase(const TBlob& b) - : Blob_(b) - , Input_(b.Data(), b.Size()) - { - } - - protected: - TBlob Blob_; - TMemoryInput Input_; - }; - - class TArchiveInputStream: public TArchiveInputStreamBase, public TZLibDecompress { - public: - inline TArchiveInputStream(const TBlob& b) - : TArchiveInputStreamBase(b) +namespace { + class TArchiveInputStreamBase { + public: + inline TArchiveInputStreamBase(const TBlob& b) + : Blob_(b) + , Input_(b.Data(), b.Size()) + { + } + + protected: + TBlob Blob_; + TMemoryInput Input_; + }; + + class TArchiveInputStream: public TArchiveInputStreamBase, public TZLibDecompress { + public: + inline TArchiveInputStream(const TBlob& b) + : TArchiveInputStreamBase(b) , TZLibDecompress(&Input_) - { - } - + { + } + ~TArchiveInputStream() override = default; - }; -} - -class TArchiveReader::TImpl { + }; +} + +class TArchiveReader::TImpl { typedef THashMap<TString, TArchiveRecordDescriptorRef> TDict; - -public: - inline TImpl(const TBlob& blob) - : Blob_(blob) - , UseDecompression(true) - { - ReadDict(); - } - + +public: + inline TImpl(const TBlob& blob) + : Blob_(blob) + , UseDecompression(true) + { + ReadDict(); + } + inline ~TImpl() = default; - - inline void ReadDict() { + + inline void ReadDict() { Y_ENSURE(Blob_.Size() >= sizeof(ui64), "too small blob"); - - const char* end = (const char*)Blob_.End(); - const char* ptr = end - sizeof(ui64); - ui64 dictlen = 0; - memcpy(&dictlen, ptr, sizeof(ui64)); - dictlen = LittleToHost(dictlen); - + + const char* end = (const char*)Blob_.End(); + const char* ptr = end - sizeof(ui64); + ui64 dictlen = 0; + memcpy(&dictlen, ptr, sizeof(ui64)); + dictlen = LittleToHost(dictlen); + Y_ENSURE(dictlen <= Blob_.Size() - sizeof(ui64), "bad blob"); - - const char* beg = ptr - dictlen; - TMemoryInput mi(beg, dictlen); + + const char* beg = ptr - dictlen; + TMemoryInput mi(beg, dictlen); TZLibDecompress d(&mi); - const ui32 count = ESLoad<ui32>(&d); - - for (size_t i = 0; i < count; ++i) { - TArchiveRecordDescriptorRef descr(new TArchiveRecordDescriptor(&d)); - - Recs_.push_back(descr); - Dict_[descr->Name()] = descr; - } + const ui32 count = ESLoad<ui32>(&d); + + for (size_t i = 0; i < count; ++i) { + TArchiveRecordDescriptorRef descr(new TArchiveRecordDescriptor(&d)); + + Recs_.push_back(descr); + Dict_[descr->Name()] = descr; + } Sort(Recs_.begin(), Recs_.end(), [](const auto& lhs, const auto& rhs) -> bool { return lhs->Offset() < rhs->Offset(); }); - - try { - UseDecompression = static_cast<bool>(ESLoad<ui8>(&d)); - } catch (const TSerializeException&) { - // that's ok - just old format - UseDecompression = true; - } - } - + + try { + UseDecompression = static_cast<bool>(ESLoad<ui8>(&d)); + } catch (const TSerializeException&) { + // that's ok - just old format + UseDecompression = true; + } + } + inline size_t Count() const noexcept { - return Recs_.size(); - } - + return Recs_.size(); + } + inline TString KeyByIndex(size_t n) const { - if (n < Count()) { - return Recs_[n]->Name(); - } - - ythrow yexception() << "incorrect index"; - } + if (n < Count()) { + return Recs_[n]->Name(); + } + + ythrow yexception() << "incorrect index"; + } inline bool Has(const TStringBuf key) const { return Dict_.contains(key); - } + } inline TAutoPtr<IInputStream> ObjectByKey(const TStringBuf key) const { - TBlob subBlob = BlobByKey(key); - - if (UseDecompression) { - return new TArchiveInputStream(subBlob); - } else { - return new TMemoryInput(subBlob.Data(), subBlob.Length()); + TBlob subBlob = BlobByKey(key); + + if (UseDecompression) { + return new TArchiveInputStream(subBlob); + } else { + return new TMemoryInput(subBlob.Data(), subBlob.Length()); } - } + } inline TBlob ObjectBlobByKey(const TStringBuf key) const { - TBlob subBlob = BlobByKey(key); + TBlob subBlob = BlobByKey(key); - if (UseDecompression) { - TArchiveInputStream st(subBlob); - return TBlob::FromStream(st); - } else { - return subBlob; + if (UseDecompression) { + TArchiveInputStream st(subBlob); + return TBlob::FromStream(st); + } else { + return subBlob; } - } + } inline TBlob BlobByKey(const TStringBuf key) const { const auto it = Dict_.find(key); - + Y_ENSURE(it != Dict_.end(), "key " << key.data() << " not found"); - - const size_t off = it->second->Offset(); - const size_t len = it->second->Length(); - - /* - * TODO - overflow check - */ - - return Blob_.SubBlob(off, off + len); - } - - inline bool Compressed() const { - return UseDecompression; - } - -private: - TBlob Blob_; + + const size_t off = it->second->Offset(); + const size_t len = it->second->Length(); + + /* + * TODO - overflow check + */ + + return Blob_.SubBlob(off, off + len); + } + + inline bool Compressed() const { + return UseDecompression; + } + +private: + TBlob Blob_; TVector<TArchiveRecordDescriptorRef> Recs_; - TDict Dict_; - bool UseDecompression; -}; - -TArchiveReader::TArchiveReader(const TBlob& data) - : Impl_(new TImpl(data)) -{ -} - + TDict Dict_; + bool UseDecompression; +}; + +TArchiveReader::TArchiveReader(const TBlob& data) + : Impl_(new TImpl(data)) +{ +} + TArchiveReader::~TArchiveReader() {} - + size_t TArchiveReader::Count() const noexcept { - return Impl_->Count(); -} - + return Impl_->Count(); +} + TString TArchiveReader::KeyByIndex(size_t n) const { - return Impl_->KeyByIndex(n); -} - + return Impl_->KeyByIndex(n); +} + bool TArchiveReader::Has(const TStringBuf key) const { return Impl_->Has(key); } TAutoPtr<IInputStream> TArchiveReader::ObjectByKey(const TStringBuf key) const { - return Impl_->ObjectByKey(key); -} + return Impl_->ObjectByKey(key); +} TBlob TArchiveReader::ObjectBlobByKey(const TStringBuf key) const { return Impl_->ObjectBlobByKey(key); diff --git a/library/cpp/archive/yarchive.h b/library/cpp/archive/yarchive.h index 8120bcb940..a4e468ded2 100644 --- a/library/cpp/archive/yarchive.h +++ b/library/cpp/archive/yarchive.h @@ -1,10 +1,10 @@ #pragma once - + #include "models_archive_reader.h" #include <util/generic/fwd.h> #include <util/generic/ptr.h> - + class IInputStream; class IOutputStream; @@ -14,26 +14,26 @@ class TBlob; //noncompressed data will be stored with default alignment DEVTOOLS-4384 static constexpr size_t ArchiveWriterDefaultDataAlignment = 16; -class TArchiveWriter { -public: +class TArchiveWriter { +public: explicit TArchiveWriter(IOutputStream* out, bool compress = true); ~TArchiveWriter(); - - void Flush(); - void Finish(); + + void Flush(); + void Finish(); void Add(const TString& key, IInputStream* src); void AddSynonym(const TString& existingKey, const TString& newKey); - -private: - class TImpl; - THolder<TImpl> Impl_; -}; - + +private: + class TImpl; + THolder<TImpl> Impl_; +}; + class TArchiveReader : public IModelsArchiveReader { -public: +public: explicit TArchiveReader(const TBlob& data); ~TArchiveReader() override; - + size_t Count() const noexcept override; TString KeyByIndex(size_t n) const override; bool Has(TStringBuf key) const override; @@ -41,8 +41,8 @@ public: TBlob ObjectBlobByKey(TStringBuf key) const override; TBlob BlobByKey(TStringBuf key) const override; bool Compressed() const override; - -private: - class TImpl; - THolder<TImpl> Impl_; -}; + +private: + class TImpl; + THolder<TImpl> Impl_; +}; diff --git a/library/cpp/archive/yarchive_ut.cpp b/library/cpp/archive/yarchive_ut.cpp index 602a1cdbbd..d4abdcd44c 100644 --- a/library/cpp/archive/yarchive_ut.cpp +++ b/library/cpp/archive/yarchive_ut.cpp @@ -1,67 +1,67 @@ -#include "yarchive.h" - +#include "yarchive.h" + #include <library/cpp/testing/unittest/registar.h> - -#include <util/string/cast.h> + +#include <util/string/cast.h> #include <util/stream/file.h> -#include <util/system/tempfile.h> +#include <util/system/tempfile.h> #include <util/memory/blob.h> - -class TArchiveTest: public TTestBase { - UNIT_TEST_SUITE(TArchiveTest) - UNIT_TEST(TestCreate); - UNIT_TEST(TestRead); + +class TArchiveTest: public TTestBase { + UNIT_TEST_SUITE(TArchiveTest) + UNIT_TEST(TestCreate); + UNIT_TEST(TestRead); UNIT_TEST(TestOffsetOrder); - UNIT_TEST_SUITE_END(); - -private: + UNIT_TEST_SUITE_END(); + +private: void CreateArchive(); - void TestCreate(); - void TestRead(); + void TestCreate(); + void TestRead(); void TestOffsetOrder(); -}; - -UNIT_TEST_SUITE_REGISTRATION(TArchiveTest); - -#define ARCHIVE "./test.ar" - +}; + +UNIT_TEST_SUITE_REGISTRATION(TArchiveTest); + +#define ARCHIVE "./test.ar" + void TArchiveTest::CreateArchive() { TFixedBufferFileOutput out(ARCHIVE); - TArchiveWriter w(&out); - - for (size_t i = 0; i < 1000; ++i) { + TArchiveWriter w(&out); + + for (size_t i = 0; i < 1000; ++i) { const TString path = "/" + ToString(i); const TString data = "data" + ToString(i * 1000) + "dataend"; - TStringInput si(data); - - w.Add(path, &si); - } - - w.Finish(); - out.Finish(); -} - + TStringInput si(data); + + w.Add(path, &si); + } + + w.Finish(); + out.Finish(); +} + void TArchiveTest::TestCreate() { CreateArchive(); TTempFile tmpFile(ARCHIVE); } -void TArchiveTest::TestRead() { +void TArchiveTest::TestRead() { CreateArchive(); - TTempFile tmpFile(ARCHIVE); + TTempFile tmpFile(ARCHIVE); TBlob blob = TBlob::FromFileSingleThreaded(ARCHIVE); - TArchiveReader r(blob); - - UNIT_ASSERT_EQUAL(r.Count(), 1000); - - for (size_t i = 0; i < 1000; ++i) { + TArchiveReader r(blob); + + UNIT_ASSERT_EQUAL(r.Count(), 1000); + + for (size_t i = 0; i < 1000; ++i) { const TString key = "/" + ToString(i); TAutoPtr<IInputStream> is = r.ObjectByKey(key); const TString data = is->ReadAll(); - - UNIT_ASSERT_EQUAL(data, "data" + ToString(i * 1000) + "dataend"); - } -} + + UNIT_ASSERT_EQUAL(data, "data" + ToString(i * 1000) + "dataend"); + } +} void TArchiveTest::TestOffsetOrder() { CreateArchive(); |