aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/archive
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/archive
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/archive')
-rw-r--r--library/cpp/archive/directory_models_archive_reader.cpp115
-rw-r--r--library/cpp/archive/directory_models_archive_reader.h38
-rw-r--r--library/cpp/archive/directory_models_archive_reader_ut.cpp55
-rw-r--r--library/cpp/archive/models_archive_reader.h20
-rw-r--r--library/cpp/archive/ut/ya.make16
-rw-r--r--library/cpp/archive/ya.make12
-rw-r--r--library/cpp/archive/yarchive.cpp398
-rw-r--r--library/cpp/archive/yarchive.h48
-rw-r--r--library/cpp/archive/yarchive_ut.cpp84
9 files changed, 786 insertions, 0 deletions
diff --git a/library/cpp/archive/directory_models_archive_reader.cpp b/library/cpp/archive/directory_models_archive_reader.cpp
new file mode 100644
index 00000000000..6de9424c7c2
--- /dev/null
+++ b/library/cpp/archive/directory_models_archive_reader.cpp
@@ -0,0 +1,115 @@
+#include "directory_models_archive_reader.h"
+#include "yarchive.h"
+
+#include <util/folder/dirut.h>
+#include <util/folder/filelist.h>
+#include <util/folder/path.h>
+#include <util/memory/blob.h>
+#include <util/stream/file.h>
+#include <util/stream/input.h>
+#include <util/stream/mem.h>
+
+TDirectoryModelsArchiveReader::TDirectoryModelsArchiveReader(const TString& path, bool lockMemory, bool ownBlobs)
+ : Path_(path)
+{
+ Y_ENSURE(IsDir(path), "directory not found on this path");
+
+ LoadFilesAndSubdirs("", lockMemory, ownBlobs);
+}
+
+TDirectoryModelsArchiveReader::~TDirectoryModelsArchiveReader() {}
+
+size_t TDirectoryModelsArchiveReader::Count() const noexcept {
+ return Recs_.size();
+}
+
+TString TDirectoryModelsArchiveReader::KeyByIndex(size_t n) const {
+ Y_ENSURE(n < Count(), "incorrect index " << n);
+ return Recs_[n];
+}
+
+bool TDirectoryModelsArchiveReader::Has(const TStringBuf key) const {
+ return BlobByKey_.contains(key) || PathByKey_.contains(key);
+}
+
+namespace {
+ struct TBlobOwningStream : public TMemoryInput {
+ TBlob Blob;
+ TBlobOwningStream(TBlob blob)
+ : TMemoryInput(blob.Data(), blob.Length())
+ , Blob(blob)
+ {}
+ };
+}
+
+TAutoPtr<IInputStream> TDirectoryModelsArchiveReader::ObjectByKey(const TStringBuf key) const {
+ return new TBlobOwningStream(BlobByKey(key));
+}
+
+TBlob TDirectoryModelsArchiveReader::ObjectBlobByKey(const TStringBuf key) const {
+ return BlobByKey(key);
+}
+
+TBlob TDirectoryModelsArchiveReader::BlobByKey(const TStringBuf key) const {
+ Y_ENSURE(Has(key), "key " << key << " not found");
+ if (auto ptr = BlobByKey_.FindPtr(key); ptr) {
+ return *ptr;
+ }
+ if (auto ptr = PathByKey_.FindPtr(key); ptr) {
+ return TBlob::FromFile(*ptr);
+ }
+ Y_UNREACHABLE();
+}
+
+bool TDirectoryModelsArchiveReader::Compressed() const {
+ return false;
+}
+
+TString TDirectoryModelsArchiveReader::NormalizePath(TString path) const {
+ path = "/" + path;
+ for (size_t i = 0; i < path.size(); i++) {
+ if (path[i] == '\\')
+ path[i] = '/';
+ }
+ return path;
+}
+
+void TDirectoryModelsArchiveReader::LoadFilesAndSubdirs(const TString& subPath, bool lockMemory, bool ownBlobs) {
+ TFileList fileList;
+ fileList.Fill(JoinFsPaths(Path_, subPath));
+ const char* file;
+ while ((file = fileList.Next()) != nullptr) {
+ TString key = JoinFsPaths(subPath, TString(file));
+ TString fullPath = JoinFsPaths(Path_, key);
+ TBlob fileBlob;
+ if (lockMemory) {
+ fileBlob = TBlob::LockedFromFile(fullPath);
+ } else {
+ fileBlob = TBlob::FromFile(fullPath);
+ }
+ if (key.EndsWith(".archive")) {
+ TArchiveReader reader(fileBlob);
+ for (size_t i = 0, iEnd = reader.Count(); i < iEnd; ++i) {
+ const TString archiveKey = reader.KeyByIndex(i);
+ const TString normalizedPath = NormalizePath(JoinFsPaths(subPath, archiveKey.substr(1)));
+ BlobByKey_.emplace(normalizedPath, reader.ObjectBlobByKey(archiveKey));
+ Recs_.push_back(normalizedPath);
+ }
+ } else {
+ const TString normalizedPath = NormalizePath(key);
+ if (lockMemory || ownBlobs) {
+ BlobByKey_.emplace(normalizedPath, fileBlob);
+ } else {
+ PathByKey_.emplace(normalizedPath, RealPath(fullPath));
+ }
+ Recs_.push_back(normalizedPath);
+ }
+ }
+
+ TDirsList dirsList;
+ dirsList.Fill(JoinFsPaths(Path_, subPath));
+ const char* dir;
+ while ((dir = dirsList.Next()) != nullptr) {
+ LoadFilesAndSubdirs(JoinFsPaths(subPath, TString(dir)), lockMemory, ownBlobs);
+ }
+}
diff --git a/library/cpp/archive/directory_models_archive_reader.h b/library/cpp/archive/directory_models_archive_reader.h
new file mode 100644
index 00000000000..d16d6d728d9
--- /dev/null
+++ b/library/cpp/archive/directory_models_archive_reader.h
@@ -0,0 +1,38 @@
+#pragma once
+
+#include "models_archive_reader.h"
+
+#include <util/folder/path.h>
+#include <util/generic/fwd.h>
+#include <util/generic/hash.h>
+#include <util/generic/ptr.h>
+#include <util/generic/vector.h>
+
+
+class IInputStream;
+
+class TBlob;
+
+class TDirectoryModelsArchiveReader : public IModelsArchiveReader {
+public:
+ TDirectoryModelsArchiveReader(const TString& path, bool lockMemory = false, bool ownBlobs = false);
+ virtual ~TDirectoryModelsArchiveReader() override;
+
+ virtual size_t Count() const noexcept override;
+ virtual TString KeyByIndex(size_t n) const override;
+ virtual bool Has(const TStringBuf key) const override;
+ virtual TAutoPtr<IInputStream> ObjectByKey(const TStringBuf key) const override;
+ virtual TBlob ObjectBlobByKey(const TStringBuf key) const override;
+ virtual TBlob BlobByKey(const TStringBuf key) const override;
+ virtual bool Compressed() const override;
+
+private:
+ TString NormalizePath(TString path) const; // in archive path works unix-like path delimiter and leading slash is neccesery
+ void LoadFilesAndSubdirs(const TString& subPath, bool lockMemory, bool ownBlobs);
+
+private:
+ TString Path_;
+ THashMap<TString, TString> PathByKey_;
+ THashMap<TString, TBlob> BlobByKey_;
+ TVector<TString> Recs_;
+};
diff --git a/library/cpp/archive/directory_models_archive_reader_ut.cpp b/library/cpp/archive/directory_models_archive_reader_ut.cpp
new file mode 100644
index 00000000000..09994de9b4f
--- /dev/null
+++ b/library/cpp/archive/directory_models_archive_reader_ut.cpp
@@ -0,0 +1,55 @@
+#include "directory_models_archive_reader.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/folder/tempdir.h>
+#include <util/string/cast.h>
+#include <util/stream/file.h>
+#include <util/system/tempfile.h>
+#include <util/memory/blob.h>
+
+class TDirectoryModelsArchiveReaderTest: public TTestBase {
+ UNIT_TEST_SUITE(TDirectoryModelsArchiveReaderTest)
+ UNIT_TEST(TestRead);
+ UNIT_TEST_SUITE_END();
+
+private:
+ void TestRead();
+};
+
+UNIT_TEST_SUITE_REGISTRATION(TDirectoryModelsArchiveReaderTest);
+
+const TString MAIN_DIR = "./dir";
+const TString SUBDIR = "/subdir";
+const TString SAMPLE_FILE1 = "/sample1";
+const TString SAMPLE_FILE2 = "/sample2";
+const TString TEST_TEXT = "Test Text.";
+
+void TDirectoryModelsArchiveReaderTest::TestRead() {
+ TTempDir mainDir(MAIN_DIR);
+ TTempDir subDir(MAIN_DIR + SUBDIR);
+ TTempFileHandle file1(MAIN_DIR + SAMPLE_FILE1);
+ TTempFileHandle file2(MAIN_DIR + SUBDIR + SAMPLE_FILE2);
+
+ file1.Write(TEST_TEXT.data(), TEST_TEXT.size());
+ file1.FlushData();
+
+ TDirectoryModelsArchiveReader reader(MAIN_DIR, false);
+
+ UNIT_ASSERT_EQUAL(reader.Count(), 2);
+
+ UNIT_ASSERT(reader.Has(SAMPLE_FILE1));
+ UNIT_ASSERT(reader.Has(SUBDIR + SAMPLE_FILE2));
+
+ UNIT_ASSERT_EQUAL(reader.KeyByIndex(0), SAMPLE_FILE1);
+ UNIT_ASSERT_EQUAL(reader.KeyByIndex(1), SUBDIR + SAMPLE_FILE2);
+
+ TBlob blob = reader.BlobByKey(SAMPLE_FILE1);
+ Cout << "'" << TString(blob.AsCharPtr(), blob.Size()) << "' - '" << TEST_TEXT << "'" << Endl;
+ UNIT_ASSERT_VALUES_EQUAL(TString(blob.AsCharPtr(), blob.Size()), TString(TEST_TEXT));
+
+ TAutoPtr<IInputStream> is = reader.ObjectByKey(SAMPLE_FILE1);
+ const TString data = is->ReadAll();
+ Cout << "'" << data << "' - '" << TEST_TEXT << "'" << Endl;
+ UNIT_ASSERT_VALUES_EQUAL(data, TEST_TEXT);
+}
diff --git a/library/cpp/archive/models_archive_reader.h b/library/cpp/archive/models_archive_reader.h
new file mode 100644
index 00000000000..ea237aecb0e
--- /dev/null
+++ b/library/cpp/archive/models_archive_reader.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <util/generic/fwd.h>
+#include <util/generic/ptr.h>
+
+class IInputStream;
+
+class TBlob;
+
+class IModelsArchiveReader {
+public:
+ virtual ~IModelsArchiveReader() = default;
+ virtual size_t Count() const = 0;
+ virtual TString KeyByIndex(size_t n) const = 0;
+ virtual bool Has(const TStringBuf key) const = 0;
+ virtual TAutoPtr<IInputStream> ObjectByKey(const TStringBuf key) const = 0;
+ virtual TBlob ObjectBlobByKey(const TStringBuf key) const = 0;
+ virtual TBlob BlobByKey(const TStringBuf key) const = 0;
+ virtual bool Compressed() const = 0;
+};
diff --git a/library/cpp/archive/ut/ya.make b/library/cpp/archive/ut/ya.make
new file mode 100644
index 00000000000..4f324ccfc61
--- /dev/null
+++ b/library/cpp/archive/ut/ya.make
@@ -0,0 +1,16 @@
+UNITTEST()
+
+OWNER(pg)
+
+PEERDIR(
+ ADDINCL library/cpp/archive
+)
+
+SRCDIR(library/cpp/archive)
+
+SRCS(
+ yarchive_ut.cpp
+ directory_models_archive_reader_ut.cpp
+)
+
+END()
diff --git a/library/cpp/archive/ya.make b/library/cpp/archive/ya.make
new file mode 100644
index 00000000000..65d36479efc
--- /dev/null
+++ b/library/cpp/archive/ya.make
@@ -0,0 +1,12 @@
+LIBRARY()
+
+OWNER(pg)
+
+SRCS(
+ yarchive.cpp
+ yarchive.h
+ directory_models_archive_reader.cpp
+ directory_models_archive_reader.h
+)
+
+END()
diff --git a/library/cpp/archive/yarchive.cpp b/library/cpp/archive/yarchive.cpp
new file mode 100644
index 00000000000..1becc3e5dac
--- /dev/null
+++ b/library/cpp/archive/yarchive.cpp
@@ -0,0 +1,398 @@
+#include "yarchive.h"
+
+#include <util/generic/algorithm.h>
+#include <util/generic/hash.h>
+#include <util/generic/utility.h>
+#include <util/generic/vector.h>
+#include <util/generic/yexception.h>
+#include <util/memory/blob.h>
+#include <util/memory/tempbuf.h>
+#include <util/stream/input.h>
+#include <util/stream/length.h>
+#include <util/stream/mem.h>
+#include <util/stream/output.h>
+#include <util/stream/zlib.h>
+#include <util/system/byteorder.h>
+#include <util/ysaveload.h>
+
+template <class T>
+static inline void ESSave(IOutputStream* out, const T& t_in) {
+ T t = HostToLittle(t_in);
+
+ out->Write((const void*)&t, sizeof(t));
+}
+
+static inline void ESSave(IOutputStream* out, const TString& s) {
+ ESSave(out, (ui32) s.size());
+ out->Write(s.data(), s.size());
+}
+
+template <class T>
+static inline T ESLoad(IInputStream* in) {
+ T t = T();
+
+ if (in->Load(&t, sizeof(t)) != sizeof(t)) {
+ ythrow TSerializeException() << "malformed archive";
+ }
+
+ return LittleToHost(t);
+}
+
+template <>
+inline TString ESLoad<TString>(IInputStream* in) {
+ size_t len = ESLoad<ui32>(in);
+ TString ret;
+ TTempBuf tmp;
+
+ while (len) {
+ const size_t toread = Min(len, tmp.Size());
+ const size_t readed = in->Read(tmp.Data(), toread);
+
+ if (!readed) {
+ ythrow TSerializeException() << "malformed archive";
+ }
+
+ ret.append(tmp.Data(), readed);
+ len -= readed;
+ }
+
+ return ret;
+}
+
+namespace {
+ class TArchiveRecordDescriptor: public TSimpleRefCount<TArchiveRecordDescriptor> {
+ public:
+ inline TArchiveRecordDescriptor(ui64 off, ui64 len, const TString& name)
+ : Off_(off)
+ , Len_(len)
+ , Name_(name)
+ {
+ }
+
+ inline TArchiveRecordDescriptor(IInputStream* in)
+ : Off_(ESLoad<ui64>(in))
+ , Len_(ESLoad<ui64>(in))
+ , Name_(ESLoad<TString>(in))
+ {
+ }
+
+ inline ~TArchiveRecordDescriptor() = default;
+
+ inline void SaveTo(IOutputStream* out) const {
+ ESSave(out, Off_);
+ ESSave(out, Len_);
+ ESSave(out, Name_);
+ }
+
+ inline const TString& Name() const noexcept {
+ return Name_;
+ }
+
+ inline ui64 Length() const noexcept {
+ return Len_;
+ }
+
+ inline ui64 Offset() const noexcept {
+ return Off_;
+ }
+
+ private:
+ ui64 Off_;
+ ui64 Len_;
+ TString Name_;
+ };
+
+ typedef TIntrusivePtr<TArchiveRecordDescriptor> TArchiveRecordDescriptorRef;
+}
+
+class TArchiveWriter::TImpl {
+ using TDict = THashMap<TString, TArchiveRecordDescriptorRef>;
+
+public:
+ inline TImpl(IOutputStream& out, bool compress)
+ : Off_(0)
+ , Out_(&out)
+ , UseCompression(compress)
+ {
+ }
+
+ inline ~TImpl() = default;
+
+ inline void Flush() {
+ Out_->Flush();
+ }
+
+ inline void Finish() {
+ TCountingOutput out(Out_);
+
+ {
+ TZLibCompress compress(&out);
+
+ ESSave(&compress, (ui32)Dict_.size());
+
+ for (const auto& kv : Dict_) {
+ kv.second->SaveTo(&compress);
+ }
+
+ ESSave(&compress, static_cast<ui8>(UseCompression));
+
+ compress.Finish();
+ }
+
+ ESSave(Out_, out.Counter());
+
+ Out_->Flush();
+ }
+
+ inline void Add(const TString& key, IInputStream* src) {
+ Y_ENSURE(!Dict_.contains(key), "key " << key.data() << " already stored");
+
+ TCountingOutput out(Out_);
+ if (UseCompression) {
+ TZLibCompress compress(&out);
+ TransferData(src, &compress);
+ compress.Finish();
+ } else {
+ size_t skip_size = ArchiveWriterDefaultDataAlignment - Off_ % ArchiveWriterDefaultDataAlignment;
+ if (skip_size == ArchiveWriterDefaultDataAlignment) {
+ skip_size = 0;
+ }
+ while(skip_size > 0) {
+ Out_->Write(char(0));
+ Off_ += 1;
+ skip_size -= 1;
+ }
+ TransferData(src, &out);
+ out.Finish();
+ }
+
+ TArchiveRecordDescriptorRef descr(new TArchiveRecordDescriptor(Off_, out.Counter(), key));
+
+ Dict_[key] = descr;
+ Off_ += out.Counter();
+ }
+
+ inline void AddSynonym(const TString& existingKey, const TString& newKey) {
+ Y_ENSURE(Dict_.contains(existingKey), "key " << existingKey.data() << " not stored yet");
+ Y_ENSURE(!Dict_.contains(newKey), "key " << newKey.data() << " already stored");
+
+ TArchiveRecordDescriptorRef existingDescr = Dict_[existingKey];
+ TArchiveRecordDescriptorRef descr(new TArchiveRecordDescriptor(existingDescr->Offset(), existingDescr->Length(), newKey));
+
+ Dict_[newKey] = descr;
+ }
+
+private:
+ ui64 Off_;
+ IOutputStream* Out_;
+ TDict Dict_;
+ const bool UseCompression;
+};
+
+TArchiveWriter::TArchiveWriter(IOutputStream* out, bool compress)
+ : Impl_(new TImpl(*out, compress))
+{
+}
+
+TArchiveWriter::~TArchiveWriter() {
+ try {
+ Finish();
+ } catch (...) {
+ }
+}
+
+void TArchiveWriter::Flush() {
+ if (Impl_.Get()) {
+ Impl_->Flush();
+ }
+}
+
+void TArchiveWriter::Finish() {
+ if (Impl_.Get()) {
+ Impl_->Finish();
+ Impl_.Destroy();
+ }
+}
+
+void TArchiveWriter::Add(const TString& key, IInputStream* src) {
+ Y_ENSURE(Impl_.Get(), "archive already closed");
+
+ Impl_->Add(key, src);
+}
+
+void TArchiveWriter::AddSynonym(const TString& existingKey, const TString& newKey) {
+ Y_ENSURE(Impl_.Get(), "archive already closed");
+
+ Impl_->AddSynonym(existingKey, newKey);
+}
+
+namespace {
+ class TArchiveInputStreamBase {
+ public:
+ inline TArchiveInputStreamBase(const TBlob& b)
+ : Blob_(b)
+ , Input_(b.Data(), b.Size())
+ {
+ }
+
+ protected:
+ TBlob Blob_;
+ TMemoryInput Input_;
+ };
+
+ class TArchiveInputStream: public TArchiveInputStreamBase, public TZLibDecompress {
+ public:
+ inline TArchiveInputStream(const TBlob& b)
+ : TArchiveInputStreamBase(b)
+ , TZLibDecompress(&Input_)
+ {
+ }
+
+ ~TArchiveInputStream() override = default;
+ };
+}
+
+class TArchiveReader::TImpl {
+ typedef THashMap<TString, TArchiveRecordDescriptorRef> TDict;
+
+public:
+ inline TImpl(const TBlob& blob)
+ : Blob_(blob)
+ , UseDecompression(true)
+ {
+ ReadDict();
+ }
+
+ inline ~TImpl() = default;
+
+ inline void ReadDict() {
+ Y_ENSURE(Blob_.Size() >= sizeof(ui64), "too small blob");
+
+ const char* end = (const char*)Blob_.End();
+ const char* ptr = end - sizeof(ui64);
+ ui64 dictlen = 0;
+ memcpy(&dictlen, ptr, sizeof(ui64));
+ dictlen = LittleToHost(dictlen);
+
+ Y_ENSURE(dictlen <= Blob_.Size() - sizeof(ui64), "bad blob");
+
+ const char* beg = ptr - dictlen;
+ TMemoryInput mi(beg, dictlen);
+ TZLibDecompress d(&mi);
+ const ui32 count = ESLoad<ui32>(&d);
+
+ for (size_t i = 0; i < count; ++i) {
+ TArchiveRecordDescriptorRef descr(new TArchiveRecordDescriptor(&d));
+
+ Recs_.push_back(descr);
+ Dict_[descr->Name()] = descr;
+ }
+ Sort(Recs_.begin(), Recs_.end(), [](const auto& lhs, const auto& rhs) -> bool {
+ return lhs->Offset() < rhs->Offset();
+ });
+
+ try {
+ UseDecompression = static_cast<bool>(ESLoad<ui8>(&d));
+ } catch (const TSerializeException&) {
+ // that's ok - just old format
+ UseDecompression = true;
+ }
+ }
+
+ inline size_t Count() const noexcept {
+ return Recs_.size();
+ }
+
+ inline TString KeyByIndex(size_t n) const {
+ if (n < Count()) {
+ return Recs_[n]->Name();
+ }
+
+ ythrow yexception() << "incorrect index";
+ }
+
+ inline bool Has(const TStringBuf key) const {
+ return Dict_.contains(key);
+ }
+
+ inline TAutoPtr<IInputStream> ObjectByKey(const TStringBuf key) const {
+ TBlob subBlob = BlobByKey(key);
+
+ if (UseDecompression) {
+ return new TArchiveInputStream(subBlob);
+ } else {
+ return new TMemoryInput(subBlob.Data(), subBlob.Length());
+ }
+ }
+
+ inline TBlob ObjectBlobByKey(const TStringBuf key) const {
+ TBlob subBlob = BlobByKey(key);
+
+ if (UseDecompression) {
+ TArchiveInputStream st(subBlob);
+ return TBlob::FromStream(st);
+ } else {
+ return subBlob;
+ }
+ }
+
+ inline TBlob BlobByKey(const TStringBuf key) const {
+ const auto it = Dict_.find(key);
+
+ Y_ENSURE(it != Dict_.end(), "key " << key.data() << " not found");
+
+ const size_t off = it->second->Offset();
+ const size_t len = it->second->Length();
+
+ /*
+ * TODO - overflow check
+ */
+
+ return Blob_.SubBlob(off, off + len);
+ }
+
+ inline bool Compressed() const {
+ return UseDecompression;
+ }
+
+private:
+ TBlob Blob_;
+ TVector<TArchiveRecordDescriptorRef> Recs_;
+ TDict Dict_;
+ bool UseDecompression;
+};
+
+TArchiveReader::TArchiveReader(const TBlob& data)
+ : Impl_(new TImpl(data))
+{
+}
+
+TArchiveReader::~TArchiveReader() {}
+
+size_t TArchiveReader::Count() const noexcept {
+ return Impl_->Count();
+}
+
+TString TArchiveReader::KeyByIndex(size_t n) const {
+ return Impl_->KeyByIndex(n);
+}
+
+bool TArchiveReader::Has(const TStringBuf key) const {
+ return Impl_->Has(key);
+}
+
+TAutoPtr<IInputStream> TArchiveReader::ObjectByKey(const TStringBuf key) const {
+ return Impl_->ObjectByKey(key);
+}
+
+TBlob TArchiveReader::ObjectBlobByKey(const TStringBuf key) const {
+ return Impl_->ObjectBlobByKey(key);
+}
+
+TBlob TArchiveReader::BlobByKey(const TStringBuf key) const {
+ return Impl_->BlobByKey(key);
+}
+
+bool TArchiveReader::Compressed() const {
+ return Impl_->Compressed();
+}
diff --git a/library/cpp/archive/yarchive.h b/library/cpp/archive/yarchive.h
new file mode 100644
index 00000000000..8120bcb9402
--- /dev/null
+++ b/library/cpp/archive/yarchive.h
@@ -0,0 +1,48 @@
+#pragma once
+
+#include "models_archive_reader.h"
+
+#include <util/generic/fwd.h>
+#include <util/generic/ptr.h>
+
+
+class IInputStream;
+class IOutputStream;
+
+class TBlob;
+
+//noncompressed data will be stored with default alignment DEVTOOLS-4384
+static constexpr size_t ArchiveWriterDefaultDataAlignment = 16;
+
+class TArchiveWriter {
+public:
+ explicit TArchiveWriter(IOutputStream* out, bool compress = true);
+ ~TArchiveWriter();
+
+ void Flush();
+ void Finish();
+ void Add(const TString& key, IInputStream* src);
+ void AddSynonym(const TString& existingKey, const TString& newKey);
+
+private:
+ class TImpl;
+ THolder<TImpl> Impl_;
+};
+
+class TArchiveReader : public IModelsArchiveReader {
+public:
+ explicit TArchiveReader(const TBlob& data);
+ ~TArchiveReader() override;
+
+ size_t Count() const noexcept override;
+ TString KeyByIndex(size_t n) const override;
+ bool Has(TStringBuf key) const override;
+ TAutoPtr<IInputStream> ObjectByKey(TStringBuf key) const override;
+ TBlob ObjectBlobByKey(TStringBuf key) const override;
+ TBlob BlobByKey(TStringBuf key) const override;
+ bool Compressed() const override;
+
+private:
+ class TImpl;
+ THolder<TImpl> Impl_;
+};
diff --git a/library/cpp/archive/yarchive_ut.cpp b/library/cpp/archive/yarchive_ut.cpp
new file mode 100644
index 00000000000..602a1cdbbde
--- /dev/null
+++ b/library/cpp/archive/yarchive_ut.cpp
@@ -0,0 +1,84 @@
+#include "yarchive.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/string/cast.h>
+#include <util/stream/file.h>
+#include <util/system/tempfile.h>
+#include <util/memory/blob.h>
+
+class TArchiveTest: public TTestBase {
+ UNIT_TEST_SUITE(TArchiveTest)
+ UNIT_TEST(TestCreate);
+ UNIT_TEST(TestRead);
+ UNIT_TEST(TestOffsetOrder);
+ UNIT_TEST_SUITE_END();
+
+private:
+ void CreateArchive();
+ void TestCreate();
+ void TestRead();
+ void TestOffsetOrder();
+};
+
+UNIT_TEST_SUITE_REGISTRATION(TArchiveTest);
+
+#define ARCHIVE "./test.ar"
+
+void TArchiveTest::CreateArchive() {
+ TFixedBufferFileOutput out(ARCHIVE);
+ TArchiveWriter w(&out);
+
+ for (size_t i = 0; i < 1000; ++i) {
+ const TString path = "/" + ToString(i);
+ const TString data = "data" + ToString(i * 1000) + "dataend";
+ TStringInput si(data);
+
+ w.Add(path, &si);
+ }
+
+ w.Finish();
+ out.Finish();
+}
+
+void TArchiveTest::TestCreate() {
+ CreateArchive();
+ TTempFile tmpFile(ARCHIVE);
+}
+
+void TArchiveTest::TestRead() {
+ CreateArchive();
+ TTempFile tmpFile(ARCHIVE);
+ TBlob blob = TBlob::FromFileSingleThreaded(ARCHIVE);
+ TArchiveReader r(blob);
+
+ UNIT_ASSERT_EQUAL(r.Count(), 1000);
+
+ for (size_t i = 0; i < 1000; ++i) {
+ const TString key = "/" + ToString(i);
+ TAutoPtr<IInputStream> is = r.ObjectByKey(key);
+ const TString data = is->ReadAll();
+
+ UNIT_ASSERT_EQUAL(data, "data" + ToString(i * 1000) + "dataend");
+ }
+}
+
+void TArchiveTest::TestOffsetOrder() {
+ CreateArchive();
+ TTempFile tmpFile(ARCHIVE);
+ TBlob blob1 = TBlob::FromFileSingleThreaded(ARCHIVE);
+ TArchiveReader r(blob1);
+
+ const void* prevOffset = nullptr;
+
+ for (size_t i = 0; i < r.Count(); ++i) {
+ const TString key = r.KeyByIndex(i);
+ TBlob blob2 = r.BlobByKey(key);
+ const void* offset = blob2.Data();
+
+ if (i) {
+ UNIT_ASSERT(prevOffset < offset);
+ }
+ prevOffset = offset;
+ }
+}