diff options
author | monster <monster@ydb.tech> | 2022-07-07 14:41:37 +0300 |
---|---|---|
committer | monster <monster@ydb.tech> | 2022-07-07 14:41:37 +0300 |
commit | 06e5c21a835c0e923506c4ff27929f34e00761c2 (patch) | |
tree | 75efcbc6854ef9bd476eb8bf00cc5c900da436a2 /library/cpp/streams/lz/lz.cpp | |
parent | 03f024c4412e3aa613bb543cf1660176320ba8f4 (diff) | |
download | ydb-06e5c21a835c0e923506c4ff27929f34e00761c2.tar.gz |
fix ya.make
Diffstat (limited to 'library/cpp/streams/lz/lz.cpp')
-rw-r--r-- | library/cpp/streams/lz/lz.cpp | 731 |
1 files changed, 0 insertions, 731 deletions
diff --git a/library/cpp/streams/lz/lz.cpp b/library/cpp/streams/lz/lz.cpp deleted file mode 100644 index b65bb3ed96..0000000000 --- a/library/cpp/streams/lz/lz.cpp +++ /dev/null @@ -1,731 +0,0 @@ -#include "lz.h" - -#include <util/system/yassert.h> -#include <util/system/byteorder.h> -#include <util/memory/addstorage.h> -#include <util/generic/buffer.h> -#include <util/generic/utility.h> -#include <util/generic/singleton.h> -#include <util/generic/yexception.h> -#include <util/stream/mem.h> - -#include <contrib/libs/lz4/lz4.h> -#include <contrib/libs/fastlz/fastlz.h> -#include <contrib/libs/snappy/snappy.h> -#include <contrib/libs/quicklz/quicklz.h> -#include <contrib/libs/minilzo/minilzo.h> - -static inline ui8 HostToLittle(ui8 t) noexcept { - return t; -} - -static inline ui8 LittleToHost(ui8 t) noexcept { - return t; -} - -struct TCommonData { - static const size_t overhead = sizeof(ui16) + sizeof(ui8); -}; - -const size_t SIGNATURE_SIZE = 4; - -template <class TCompressor, class TBase> -class TCompressorBase: public TAdditionalStorage<TCompressorBase<TCompressor, TBase>>, public TCompressor, public TCommonData { -public: - inline TCompressorBase(IOutputStream* slave, ui16 blockSize) - : Slave_(slave) - , BlockSize_(blockSize) - { - /* - * save signature - */ - static_assert(sizeof(TCompressor::signature) - 1 == SIGNATURE_SIZE, "expect sizeof(TCompressor::signature) - 1 == SIGNATURE_SIZE"); - Slave_->Write(TCompressor::signature, sizeof(TCompressor::signature) - 1); - - /* - * save version - */ - this->Save((ui32)1); - - /* - * save block size - */ - this->Save(BlockSize()); - } - - inline ~TCompressorBase() { - } - - inline void Write(const char* buf, size_t len) { - while (len) { - const ui16 toWrite = (ui16)Min<size_t>(len, this->BlockSize()); - - this->WriteBlock(buf, toWrite); - - buf += toWrite; - len -= toWrite; - } - } - - inline void Flush() { - } - - inline void Finish() { - this->Flush(); - this->WriteBlock(nullptr, 0); - } - - template <class T> - static inline void Save(T t, IOutputStream* out) { - t = HostToLittle(t); - - out->Write(&t, sizeof(t)); - } - - template <class T> - inline void Save(T t) { - Save(t, Slave_); - } - -private: - inline void* Block() const noexcept { - return this->AdditionalData(); - } - - inline ui16 BlockSize() const noexcept { - return BlockSize_; - } - - inline void WriteBlock(const void* ptr, ui16 len) { - Y_ASSERT(len <= this->BlockSize()); - - ui8 compressed = false; - - if (len) { - const size_t out = this->Compress((const char*)ptr, len, (char*)Block(), this->AdditionalDataLength()); - // catch compressor buffer overrun (e.g. SEARCH-2043) - //Y_VERIFY(out <= this->Hint(this->BlockSize())); - - if (out < len || TCompressor::SaveIncompressibleChunks()) { - compressed = true; - ptr = Block(); - len = (ui16)out; - } - } - - char tmp[overhead]; - TMemoryOutput header(tmp, sizeof(tmp)); - - this->Save(len, &header); - this->Save(compressed, &header); - - using TPart = IOutputStream::TPart; - if (ptr) { - const TPart parts[] = { - TPart(tmp, sizeof(tmp)), - TPart(ptr, len), - }; - - Slave_->Write(parts, sizeof(parts) / sizeof(*parts)); - } else { - Slave_->Write(tmp, sizeof(tmp)); - } - } - -private: - IOutputStream* Slave_; - const ui16 BlockSize_; -}; - -template <class T> -static inline T GLoad(IInputStream* input) { - T t; - - if (input->Load(&t, sizeof(t)) != sizeof(t)) { - ythrow TDecompressorError() << "stream error"; - } - - return LittleToHost(t); -} - -class TDecompressSignature { -public: - inline TDecompressSignature(IInputStream* input) { - if (input->Load(Buffer_, SIGNATURE_SIZE) != SIGNATURE_SIZE) { - ythrow TDecompressorError() << "can not load stream signature"; - } - } - - template <class TDecompressor> - inline bool Check() const { - static_assert(sizeof(TDecompressor::signature) - 1 == SIGNATURE_SIZE, "expect sizeof(TDecompressor::signature) - 1 == SIGNATURE_SIZE"); - return memcmp(TDecompressor::signature, Buffer_, SIGNATURE_SIZE) == 0; - } - -private: - char Buffer_[SIGNATURE_SIZE]; -}; - -template <class TDecompressor> -static inline IInputStream* ConsumeSignature(IInputStream* input) { - TDecompressSignature sign(input); - if (!sign.Check<TDecompressor>()) { - ythrow TDecompressorError() << "incorrect signature"; - } - return input; -} - -template <class TDecompressor> -class TDecompressorBaseImpl: public TDecompressor, public TCommonData { -public: - static inline ui32 CheckVer(ui32 v) { - if (v != 1) { - ythrow yexception() << TStringBuf("incorrect stream version: ") << v; - } - - return v; - } - - inline TDecompressorBaseImpl(IInputStream* slave) - : Slave_(slave) - , Input_(nullptr, 0) - , Eof_(false) - , Version_(CheckVer(Load<ui32>())) - , BlockSize_(Load<ui16>()) - , OutBufSize_(TDecompressor::Hint(BlockSize_)) - , Tmp_(2 * OutBufSize_) - , In_(Tmp_.Data()) - , Out_(In_ + OutBufSize_) - { - this->InitFromStream(Slave_); - } - - inline ~TDecompressorBaseImpl() { - } - - inline size_t Read(void* buf, size_t len) { - size_t ret = Input_.Read(buf, len); - - if (ret) { - return ret; - } - - if (Eof_) { - return 0; - } - - this->FillNextBlock(); - - ret = Input_.Read(buf, len); - - if (ret) { - return ret; - } - - Eof_ = true; - - return 0; - } - - inline void FillNextBlock() { - char tmp[overhead]; - - if (Slave_->Load(tmp, sizeof(tmp)) != sizeof(tmp)) { - ythrow TDecompressorError() << "can not read block header"; - } - - TMemoryInput header(tmp, sizeof(tmp)); - - const ui16 len = GLoad<ui16>(&header); - if (len > Tmp_.Capacity()) { - ythrow TDecompressorError() << "invalid len inside block header"; - } - const ui8 compressed = GLoad<ui8>(&header); - - if (compressed > 1) { - ythrow TDecompressorError() << "broken header"; - } - - if (Slave_->Load(In_, len) != len) { - ythrow TDecompressorError() << "can not read data"; - } - - if (compressed) { - const size_t ret = this->Decompress(In_, len, Out_, OutBufSize_); - - Input_.Reset(Out_, ret); - } else { - Input_.Reset(In_, len); - } - } - - template <class T> - inline T Load() { - return GLoad<T>(Slave_); - } - -protected: - IInputStream* Slave_; - TMemoryInput Input_; - bool Eof_; - const ui32 Version_; - const ui16 BlockSize_; - const size_t OutBufSize_; - TBuffer Tmp_; - char* In_; - char* Out_; -}; - -template <class TDecompressor, class TBase> -class TDecompressorBase: public TDecompressorBaseImpl<TDecompressor> { -public: - inline TDecompressorBase(IInputStream* slave) - : TDecompressorBaseImpl<TDecompressor>(ConsumeSignature<TDecompressor>(slave)) - { - } - - inline ~TDecompressorBase() { - } -}; - -#define DEF_COMPRESSOR_COMMON(rname, name) \ - rname::~rname() { \ - try { \ - Finish(); \ - } catch (...) { \ - } \ - } \ - \ - void rname::DoWrite(const void* buf, size_t len) { \ - if (!Impl_) { \ - ythrow yexception() << "can not write to finalized stream"; \ - } \ - \ - Impl_->Write((const char*)buf, len); \ - } \ - \ - void rname::DoFlush() { \ - if (!Impl_) { \ - ythrow yexception() << "can not flush finalized stream"; \ - } \ - \ - Impl_->Flush(); \ - } \ - \ - void rname::DoFinish() { \ - THolder<TImpl> impl(Impl_.Release()); \ - \ - if (impl) { \ - impl->Finish(); \ - } \ - } - -#define DEF_COMPRESSOR(rname, name) \ - class rname::TImpl: public TCompressorBase<name, TImpl> { \ - public: \ - inline TImpl(IOutputStream* out, ui16 blockSize) \ - : TCompressorBase<name, TImpl>(out, blockSize) { \ - } \ - }; \ - \ - rname::rname(IOutputStream* slave, ui16 blockSize) \ - : Impl_(new (TImpl::Hint(blockSize)) TImpl(slave, blockSize)) { \ - } \ - \ - DEF_COMPRESSOR_COMMON(rname, name) - -#define DEF_DECOMPRESSOR(rname, name) \ - class rname::TImpl: public TDecompressorBase<name, TImpl> { \ - public: \ - inline TImpl(IInputStream* in) \ - : TDecompressorBase<name, TImpl>(in) { \ - } \ - }; \ - \ - rname::rname(IInputStream* slave) \ - : Impl_(new TImpl(slave)) { \ - } \ - \ - rname::~rname() { \ - } \ - \ - size_t rname::DoRead(void* buf, size_t len) { \ - return Impl_->Read(buf, len); \ - } - -/* - * MiniLzo - */ -class TMiniLzo { - class TInit { - public: - inline TInit() { - if (lzo_init() != LZO_E_OK) { - ythrow yexception() << "can not init lzo engine"; - } - } - }; - -public: - static const char signature[]; - - inline TMiniLzo() { - Singleton<TInit>(); - } - - inline ~TMiniLzo() { - } - - static inline size_t Hint(size_t len) noexcept { - // see SEARCH-2043 and, e.g. examples at - // http://stackoverflow.com/questions/4235019/how-to-get-lzo-to-work-with-a-file-stream - return len + (len / 16) + 64 + 3; - } - - static inline bool SaveIncompressibleChunks() noexcept { - return false; - } -}; - -const char TMiniLzo::signature[] = "YLZO"; - -template <size_t N> -class TFixedArray { -public: - inline TFixedArray() noexcept { - memset(WorkMem_, 0, sizeof(WorkMem_)); - } - -protected: - char WorkMem_[N]; -}; - -class TMiniLzoCompressor: public TMiniLzo, public TFixedArray<LZO1X_MEM_COMPRESS + 1> { -public: - inline size_t Compress(const char* data, size_t len, char* ptr, size_t /*dstMaxSize*/) { - lzo_uint out = 0; - lzo1x_1_compress((const lzo_bytep)data, len, (lzo_bytep)ptr, &out, WorkMem_); - - return out; - } -}; - -class TMiniLzoDecompressor: public TMiniLzo, public TFixedArray<LZO1X_MEM_DECOMPRESS + 1> { -public: - inline size_t Decompress(const char* data, size_t len, char* ptr, size_t /*max*/) { - lzo_uint ret = 0; - - lzo1x_decompress((const lzo_bytep)data, len, (lzo_bytep)ptr, &ret, WorkMem_); - - return ret; - } - - inline void InitFromStream(IInputStream*) const noexcept { - } -}; - -DEF_COMPRESSOR(TLzoCompress, TMiniLzoCompressor) -DEF_DECOMPRESSOR(TLzoDecompress, TMiniLzoDecompressor) - -/* - * FastLZ - */ -class TFastLZ { -public: - static const char signature[]; - - static inline size_t Hint(size_t len) noexcept { - return Max<size_t>((size_t)(len * 1.06), 100); - } - - inline size_t Compress(const char* data, size_t len, char* ptr, size_t /*dstMaxSize*/) { - return fastlz_compress(data, len, ptr); - } - - inline size_t Decompress(const char* data, size_t len, char* ptr, size_t max) { - return fastlz_decompress(data, len, ptr, max); - } - - inline void InitFromStream(IInputStream*) const noexcept { - } - - static inline bool SaveIncompressibleChunks() noexcept { - return false; - } -}; - -const char TFastLZ::signature[] = "YLZF"; - -DEF_COMPRESSOR(TLzfCompress, TFastLZ) -DEF_DECOMPRESSOR(TLzfDecompress, TFastLZ) - -/* - * LZ4 - */ -class TLZ4 { -public: - static const char signature[]; - - static inline size_t Hint(size_t len) noexcept { - return Max<size_t>((size_t)(len * 1.06), 100); - } - - inline size_t Compress(const char* data, size_t len, char* ptr, size_t dstMaxSize) { - return LZ4_compress_default(data, ptr, len, dstMaxSize); - } - - inline size_t Decompress(const char* data, size_t len, char* ptr, size_t max) { - int res = LZ4_decompress_safe(data, ptr, len, max); - if (res < 0) - ythrow TDecompressorError(); - return res; - } - - inline void InitFromStream(IInputStream*) const noexcept { - } - - static inline bool SaveIncompressibleChunks() noexcept { - return false; - } -}; - -const char TLZ4::signature[] = "LZ.4"; - -DEF_COMPRESSOR(TLz4Compress, TLZ4) -DEF_DECOMPRESSOR(TLz4Decompress, TLZ4) - -/* - * Snappy - */ -class TSnappy { -public: - static const char signature[]; - - static inline size_t Hint(size_t len) noexcept { - return Max<size_t>(snappy::MaxCompressedLength(len), 100); - } - - inline size_t Compress(const char* data, size_t len, char* ptr, size_t /*dstMaxSize*/) { - size_t reslen = 0; - snappy::RawCompress(data, len, ptr, &reslen); - return reslen; - } - - inline size_t Decompress(const char* data, size_t len, char* ptr, size_t) { - size_t srclen = 0; - if (!snappy::GetUncompressedLength(data, len, &srclen) || !snappy::RawUncompress(data, len, ptr)) - ythrow TDecompressorError(); - return srclen; - } - - inline void InitFromStream(IInputStream*) const noexcept { - } - - static inline bool SaveIncompressibleChunks() noexcept { - return false; - } -}; - -const char TSnappy::signature[] = "Snap"; - -DEF_COMPRESSOR(TSnappyCompress, TSnappy) -DEF_DECOMPRESSOR(TSnappyDecompress, TSnappy) - -/* - * QuickLZ - */ -class TQuickLZBase { -public: - static const char signature[]; - - static inline size_t Hint(size_t len) noexcept { - return len + 500; - } - - inline TQuickLZBase() - : Table_(nullptr) - { - } - - inline void Init(unsigned ver, unsigned lev, unsigned mod, unsigned type) { - Table_ = LzqTable(ver, lev, mod); - - if (!Table_) { - ythrow yexception() << "unsupported lzq stream(" << ver << ", " << lev << ", " << mod << ")"; - } - - const size_t size = Table_->Setting(3) + Table_->Setting(type); - - Mem_.Reset(::operator new(size)); - memset(Mem_.Get(), 0, size); - } - - inline bool SaveIncompressibleChunks() const noexcept { - // we must save incompressible chunks "as is" - // after compressor run in streaming mode - return Table_->Setting(3); - } - -protected: - const TQuickLZMethods* Table_; - THolder<void> Mem_; -}; - -const char TQuickLZBase::signature[] = "YLZQ"; - -class TQuickLZCompress: public TQuickLZBase { -public: - inline size_t Compress(const char* data, size_t len, char* ptr, size_t /*dstMaxSize*/) { - return Table_->Compress(data, ptr, len, (char*)Mem_.Get()); - } -}; - -class TQuickLZDecompress: public TQuickLZBase { -public: - inline size_t Decompress(const char* data, size_t /*len*/, char* ptr, size_t /*max*/) { - return Table_->Decompress(data, ptr, (char*)Mem_.Get()); - } - - inline void InitFromStream(IInputStream* in) { - const ui8 ver = ::GLoad<ui8>(in); - const ui8 lev = ::GLoad<ui8>(in); - const ui8 mod = ::GLoad<ui8>(in); - - Init(ver, lev, mod, 2); - } -}; - -class TLzqCompress::TImpl: public TCompressorBase<TQuickLZCompress, TImpl> { -public: - inline TImpl(IOutputStream* out, ui16 blockSize, EVersion ver, unsigned level, EMode mode) - : TCompressorBase<TQuickLZCompress, TImpl>(out, blockSize) - { - memset(AdditionalData(), 0, AdditionalDataLength()); - - Init(ver, level, mode, 1); - - Save((ui8)ver); - Save((ui8)level); - Save((ui8)mode); - } -}; - -TLzqCompress::TLzqCompress(IOutputStream* slave, ui16 blockSize, EVersion ver, unsigned level, EMode mode) - : Impl_(new (TImpl::Hint(blockSize)) TImpl(slave, blockSize, ver, level, mode)) -{ -} - -DEF_COMPRESSOR_COMMON(TLzqCompress, TQuickLZCompress) -DEF_DECOMPRESSOR(TLzqDecompress, TQuickLZDecompress) - -namespace { - template <class T> - struct TInputHolder { - static inline T Set(T t) noexcept { - return t; - } - }; - - template <class T> - struct TInputHolder<TAutoPtr<T>> { - inline T* Set(TAutoPtr<T> v) noexcept { - V_ = v; - - return V_.Get(); - } - - TAutoPtr<T> V_; - }; - - // Decompressing input streams without signature verification - template <class TInput, class TDecompressor> - class TLzDecompressInput: public TInputHolder<TInput>, public IInputStream { - public: - inline TLzDecompressInput(TInput in) - : Impl_(this->Set(in)) - { - } - - private: - size_t DoRead(void* buf, size_t len) override { - return Impl_.Read(buf, len); - } - - private: - TDecompressorBaseImpl<TDecompressor> Impl_; - }; -} - -template <class T> -static TAutoPtr<IInputStream> TryOpenLzDecompressorX(const TDecompressSignature& s, T input) { - if (s.Check<TLZ4>()) - return new TLzDecompressInput<T, TLZ4>(input); - - if (s.Check<TSnappy>()) - return new TLzDecompressInput<T, TSnappy>(input); - - if (s.Check<TMiniLzo>()) - return new TLzDecompressInput<T, TMiniLzoDecompressor>(input); - - if (s.Check<TFastLZ>()) - return new TLzDecompressInput<T, TFastLZ>(input); - - if (s.Check<TQuickLZDecompress>()) - return new TLzDecompressInput<T, TQuickLZDecompress>(input); - - return nullptr; -} - -template <class T> -static inline TAutoPtr<IInputStream> TryOpenLzDecompressorImpl(const TStringBuf& signature, T input) { - if (signature.size() == SIGNATURE_SIZE) { - TMemoryInput mem(signature.data(), signature.size()); - TDecompressSignature s(&mem); - - return TryOpenLzDecompressorX(s, input); - } - - return nullptr; -} - -template <class T> -static inline TAutoPtr<IInputStream> TryOpenLzDecompressorImpl(T input) { - TDecompressSignature s(&*input); - - return TryOpenLzDecompressorX(s, input); -} - -template <class T> -static inline TAutoPtr<IInputStream> OpenLzDecompressorImpl(T input) { - TAutoPtr<IInputStream> ret = TryOpenLzDecompressorImpl(input); - - if (!ret) { - ythrow TDecompressorError() << "Unknown compression format"; - } - - return ret; -} - -TAutoPtr<IInputStream> OpenLzDecompressor(IInputStream* input) { - return OpenLzDecompressorImpl(input); -} - -TAutoPtr<IInputStream> TryOpenLzDecompressor(IInputStream* input) { - return TryOpenLzDecompressorImpl(input); -} - -TAutoPtr<IInputStream> TryOpenLzDecompressor(const TStringBuf& signature, IInputStream* input) { - return TryOpenLzDecompressorImpl(signature, input); -} - -TAutoPtr<IInputStream> OpenOwnedLzDecompressor(TAutoPtr<IInputStream> input) { - return OpenLzDecompressorImpl(input); -} - -TAutoPtr<IInputStream> TryOpenOwnedLzDecompressor(TAutoPtr<IInputStream> input) { - return TryOpenLzDecompressorImpl(input); -} - -TAutoPtr<IInputStream> TryOpenOwnedLzDecompressor(const TStringBuf& signature, TAutoPtr<IInputStream> input) { - return TryOpenLzDecompressorImpl(signature, input); -} |