diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/streams/bzip2 | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/streams/bzip2')
-rw-r--r-- | library/cpp/streams/bzip2/bzip2.cpp | 204 | ||||
-rw-r--r-- | library/cpp/streams/bzip2/bzip2.h | 53 | ||||
-rw-r--r-- | library/cpp/streams/bzip2/bzip2_ut.cpp | 41 | ||||
-rw-r--r-- | library/cpp/streams/bzip2/ut/ya.make | 12 | ||||
-rw-r--r-- | library/cpp/streams/bzip2/ya.make | 16 |
5 files changed, 326 insertions, 0 deletions
diff --git a/library/cpp/streams/bzip2/bzip2.cpp b/library/cpp/streams/bzip2/bzip2.cpp new file mode 100644 index 0000000000..bccc5c6807 --- /dev/null +++ b/library/cpp/streams/bzip2/bzip2.cpp @@ -0,0 +1,204 @@ +#include "bzip2.h" + +#include <util/memory/addstorage.h> +#include <util/generic/scope.h> + +#include <contrib/libs/libbz2/bzlib.h> + +class TBZipDecompress::TImpl: public TAdditionalStorage<TImpl> { +public: + inline TImpl(IInputStream* input) + : Stream_(input) + { + Zero(BzStream_); + Init(); + } + + inline ~TImpl() { + Clear(); + } + + inline void Init() { + if (BZ2_bzDecompressInit(&BzStream_, 0, 0) != BZ_OK) { + ythrow TBZipDecompressError() << "can not init bzip engine"; + } + } + + inline void Clear() noexcept { + BZ2_bzDecompressEnd(&BzStream_); + } + + inline size_t Read(void* buf, size_t size) { + BzStream_.next_out = (char*)buf; + BzStream_.avail_out = size; + + while (true) { + if (BzStream_.avail_in == 0) { + if (FillInputBuffer() == 0) { + return 0; + } + } + + switch (BZ2_bzDecompress(&BzStream_)) { + case BZ_STREAM_END: { + Clear(); + Init(); + [[fallthrough]]; + } + + case BZ_OK: { + const size_t processed = size - BzStream_.avail_out; + + if (processed) { + return processed; + } + + break; + } + + default: + ythrow TBZipDecompressError() << "bzip error"; + } + } + } + + inline size_t FillInputBuffer() { + BzStream_.next_in = (char*)AdditionalData(); + BzStream_.avail_in = Stream_->Read(BzStream_.next_in, AdditionalDataLength()); + + return BzStream_.avail_in; + } + +private: + IInputStream* Stream_; + bz_stream BzStream_; +}; + +TBZipDecompress::TBZipDecompress(IInputStream* input, size_t bufLen) + : Impl_(new (bufLen) TImpl(input)) +{ +} + +TBZipDecompress::~TBZipDecompress() { +} + +size_t TBZipDecompress::DoRead(void* buf, size_t size) { + return Impl_->Read(buf, size); +} + +class TBZipCompress::TImpl: public TAdditionalStorage<TImpl> { +public: + inline TImpl(IOutputStream* stream, size_t level) + : Stream_(stream) + { + Zero(BzStream_); + + if (BZ2_bzCompressInit(&BzStream_, level, 0, 0) != BZ_OK) { + ythrow TBZipCompressError() << "can not init bzip engine"; + } + + BzStream_.next_out = TmpBuf(); + BzStream_.avail_out = TmpBufLen(); + } + + inline ~TImpl() { + BZ2_bzCompressEnd(&BzStream_); + } + + inline void Write(const void* buf, size_t size) { + BzStream_.next_in = (char*)buf; + BzStream_.avail_in = size; + + Y_DEFER { + BzStream_.next_in = 0; + BzStream_.avail_in = 0; + }; + + while (BzStream_.avail_in) { + const int ret = BZ2_bzCompress(&BzStream_, BZ_RUN); + + switch (ret) { + case BZ_RUN_OK: + continue; + + case BZ_PARAM_ERROR: + case BZ_OUTBUFF_FULL: + Stream_->Write(TmpBuf(), TmpBufLen() - BzStream_.avail_out); + BzStream_.next_out = TmpBuf(); + BzStream_.avail_out = TmpBufLen(); + + break; + + default: + ythrow TBZipCompressError() << "bzip error(" << ret << ", " << BzStream_.avail_out << ")"; + } + } + } + + inline void Flush() { + /* + * TODO ? + */ + } + + inline void Finish() { + int ret = BZ2_bzCompress(&BzStream_, BZ_FINISH); + + while (ret != BZ_STREAM_END) { + Stream_->Write(TmpBuf(), TmpBufLen() - BzStream_.avail_out); + BzStream_.next_out = TmpBuf(); + BzStream_.avail_out = TmpBufLen(); + + ret = BZ2_bzCompress(&BzStream_, BZ_FINISH); + } + + Stream_->Write(TmpBuf(), TmpBufLen() - BzStream_.avail_out); + } + +private: + inline char* TmpBuf() noexcept { + return (char*)AdditionalData(); + } + + inline size_t TmpBufLen() const noexcept { + return AdditionalDataLength(); + } + +private: + IOutputStream* Stream_; + bz_stream BzStream_; +}; + +TBZipCompress::TBZipCompress(IOutputStream* out, size_t compressionLevel, size_t bufLen) + : Impl_(new (bufLen) TImpl(out, compressionLevel)) +{ +} + +TBZipCompress::~TBZipCompress() { + try { + Finish(); + } catch (...) { + } +} + +void TBZipCompress::DoWrite(const void* buf, size_t size) { + if (!Impl_) { + ythrow TBZipCompressError() << "can not write to finished bzip stream"; + } + + Impl_->Write(buf, size); +} + +void TBZipCompress::DoFlush() { + if (Impl_) { + Impl_->Flush(); + } +} + +void TBZipCompress::DoFinish() { + THolder<TImpl> impl(Impl_.Release()); + + if (impl) { + impl->Finish(); + } +} diff --git a/library/cpp/streams/bzip2/bzip2.h b/library/cpp/streams/bzip2/bzip2.h new file mode 100644 index 0000000000..2322277ef6 --- /dev/null +++ b/library/cpp/streams/bzip2/bzip2.h @@ -0,0 +1,53 @@ +#pragma once + +#include <util/stream/input.h> +#include <util/stream/output.h> +#include <util/generic/ptr.h> +#include <util/generic/yexception.h> + +#define BZIP_BUF_LEN (8 * 1024) +#define BZIP_COMPRESSION_LEVEL 6 + +/** + * @addtogroup Streams_Archs + * @{ + */ + +class TBZipException: public yexception { +}; + +class TBZipDecompressError: public TBZipException { +}; + +class TBZipCompressError: public TBZipException { +}; + +class TBZipDecompress: public IInputStream { +public: + TBZipDecompress(IInputStream* input, size_t bufLen = BZIP_BUF_LEN); + ~TBZipDecompress() override; + +private: + size_t DoRead(void* buf, size_t size) override; + +private: + class TImpl; + THolder<TImpl> Impl_; +}; + +class TBZipCompress: public IOutputStream { +public: + TBZipCompress(IOutputStream* out, size_t compressionLevel = BZIP_COMPRESSION_LEVEL, size_t bufLen = BZIP_BUF_LEN); + ~TBZipCompress() override; + +private: + void DoWrite(const void* buf, size_t size) override; + void DoFlush() override; + void DoFinish() override; + +public: + class TImpl; + THolder<TImpl> Impl_; +}; + +/** @} */ diff --git a/library/cpp/streams/bzip2/bzip2_ut.cpp b/library/cpp/streams/bzip2/bzip2_ut.cpp new file mode 100644 index 0000000000..69a98f296c --- /dev/null +++ b/library/cpp/streams/bzip2/bzip2_ut.cpp @@ -0,0 +1,41 @@ +#include "bzip2.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/stream/file.h> +#include <util/system/tempfile.h> + +#define ZDATA "./zdata" + +Y_UNIT_TEST_SUITE(TBZipTest) { + static const TString data = "8s7d5vc6s5vc67sa4c65ascx6asd4xcv76adsfxv76s"; + + Y_UNIT_TEST(TestCompress) { + TUnbufferedFileOutput o(ZDATA); + TBZipCompress c(&o); + + c.Write(data.data(), data.size()); + c.Finish(); + o.Finish(); + } + + Y_UNIT_TEST(TestDecompress) { + TTempFile tmp(ZDATA); + + { + TUnbufferedFileInput i(ZDATA); + TBZipDecompress d(&i); + + UNIT_ASSERT_EQUAL(d.ReadLine(), data); + } + } + + Y_UNIT_TEST(TestCorrupted) { + TMemoryInput i("blablabla", 10); + TBZipDecompress d(&i); + + UNIT_ASSERT_EXCEPTION(d.ReadLine(), TBZipDecompressError); + } +} + +#undef ZDATA diff --git a/library/cpp/streams/bzip2/ut/ya.make b/library/cpp/streams/bzip2/ut/ya.make new file mode 100644 index 0000000000..5ef91498ca --- /dev/null +++ b/library/cpp/streams/bzip2/ut/ya.make @@ -0,0 +1,12 @@ +UNITTEST_FOR(library/cpp/streams/bzip2) + +OWNER( + pg + g:util +) + +SRCS( + bzip2_ut.cpp +) + +END() diff --git a/library/cpp/streams/bzip2/ya.make b/library/cpp/streams/bzip2/ya.make new file mode 100644 index 0000000000..122a35837c --- /dev/null +++ b/library/cpp/streams/bzip2/ya.make @@ -0,0 +1,16 @@ +LIBRARY() + +OWNER( + pg + g:util +) + +PEERDIR( + contrib/libs/libbz2 +) + +SRCS( + bzip2.cpp +) + +END() |