diff options
author | Ruslan Kovalev <ruslan.a.kovalev@gmail.com> | 2022-02-10 16:46:44 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:44 +0300 |
commit | 59e19371de37995fcb36beb16cd6ec030af960bc (patch) | |
tree | fa68e36093ebff8b805462e9e6d331fe9d348214 /library/cpp/codecs/codecs.h | |
parent | 89db6fe2fe2c32d2a832ddfeb04e8d078e301084 (diff) | |
download | ydb-59e19371de37995fcb36beb16cd6ec030af960bc.tar.gz |
Restoring authorship annotation for Ruslan Kovalev <ruslan.a.kovalev@gmail.com>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/codecs/codecs.h')
-rw-r--r-- | library/cpp/codecs/codecs.h | 138 |
1 files changed, 69 insertions, 69 deletions
diff --git a/library/cpp/codecs/codecs.h b/library/cpp/codecs/codecs.h index cc5e72b285..aa7c24b4c6 100644 --- a/library/cpp/codecs/codecs.h +++ b/library/cpp/codecs/codecs.h @@ -1,63 +1,63 @@ -#pragma once - -#include "sample.h" - -#include <util/generic/bt_exception.h> -#include <util/generic/hash.h> -#include <util/generic/ptr.h> -#include <util/generic/singleton.h> - -#include <util/stream/input.h> -#include <util/stream/output.h> - +#pragma once + +#include "sample.h" + +#include <util/generic/bt_exception.h> +#include <util/generic/hash.h> +#include <util/generic/ptr.h> +#include <util/generic/singleton.h> + +#include <util/stream/input.h> +#include <util/stream/output.h> + #include <util/string/cast.h> -#include <util/string/vector.h> -#include <util/system/tls.h> -#include <util/ysaveload.h> - -namespace NCodecs { +#include <util/string/vector.h> +#include <util/system/tls.h> +#include <util/ysaveload.h> + +namespace NCodecs { class TCodecException: public TWithBackTrace<yexception> {}; - + class ICodec; - + using TCodecPtr = TIntrusivePtr<ICodec>; using TCodecConstPtr = TIntrusiveConstPtr<ICodec>; - + struct TCodecTraits { ui32 RecommendedSampleSize = 0; ui16 SizeOfInputElement = 1; ui8 SizeOnEncodeMultiplier = 1; ui8 SizeOnEncodeAddition = 0; ui8 SizeOnDecodeMultiplier = 1; - + bool NeedsTraining = false; bool PreservesPrefixGrouping = false; bool Irreversible = false; bool PaddingBit = 0; bool AssumesStructuredInput = false; - + size_t ApproximateSizeOnEncode(size_t sz) const { return sz * SizeOnEncodeMultiplier + SizeOnEncodeAddition; } - + size_t ApproximateSizeOnDecode(size_t sz) const { return sz * SizeOnDecodeMultiplier; } }; - + class ICodec: public TAtomicRefCount<ICodec> { protected: bool Trained = false; TCodecTraits MyTraits; - + public: TCodecTraits Traits() const { return MyTraits; } - + // the name of the codec (or its variant) to be used in the codec registry virtual TString GetName() const = 0; - + virtual ui8 /*free bits in last byte*/ Encode(TStringBuf, TBuffer&) const = 0; virtual ui8 Encode(const TBuffer& input, TBuffer& output) const { return Encode(TStringBuf(input.Data(), input.Data() + input.Size()), output); @@ -66,16 +66,16 @@ namespace NCodecs { virtual void Decode(const TBuffer& input, TBuffer& output) const { Decode(TStringBuf(input.Data(), input.Data() + input.Size()), output); } - + virtual ~ICodec() = default; - + virtual bool AlreadyTrained() const { return !Traits().NeedsTraining || Trained; } virtual void SetTrained(bool t) { Trained = t; } - + bool TryToLearn(ISequenceReader& r) { Trained = DoTryToLearn(r); return Trained; @@ -84,32 +84,32 @@ namespace NCodecs { void Learn(ISequenceReader& r) { LearnX(r, 1); } - + template <class TIter> void Learn(TIter beg, TIter end) { Learn(beg, end, IterToStringBuf<TIter>); } - + template <class TIter, class TGetter> void Learn(TIter beg, TIter end, TGetter getter) { auto sample = GetSample(beg, end, Traits().RecommendedSampleSize, getter); TSimpleSequenceReader<TBuffer> reader{sample}; Learn(reader); } - + static TCodecPtr GetInstance(TStringBuf name); - + static TVector<TString> GetCodecsList(); - + static TString GetNameSafe(TCodecPtr p); - + static void Store(IOutputStream* out, TCodecPtr p); static TCodecPtr Restore(IInputStream* in); static TCodecPtr RestoreFromString(TStringBuf); - + protected: virtual void DoLearn(ISequenceReader&) = 0; - + virtual bool DoTryToLearn(ISequenceReader& r) { DoLearn(r); return true; @@ -119,20 +119,20 @@ namespace NCodecs { virtual void DoLearnX(ISequenceReader& r, double /*sampleSizeMultiplier*/) { DoLearn(r); } - + virtual void Save(IOutputStream*) const { } virtual void Load(IInputStream*) { } friend class TPipelineCodec; - + public: // so the pipeline codec will know to adjust the sample for the subcodecs void LearnX(ISequenceReader& r, double sampleSizeMult) { DoLearnX(r, sampleSizeMult); Trained = true; } - + template <class TIter> void LearnX(TIter beg, TIter end, double sampleSizeMult) { auto sample = GetSample(beg, end, Traits().RecommendedSampleSize * sampleSizeMult); @@ -140,54 +140,54 @@ namespace NCodecs { LearnX(reader, sampleSizeMult); } }; - + class TBasicTrivialCodec: public ICodec { public: ui8 Encode(TStringBuf in, TBuffer& out) const override { out.Assign(in.data(), in.size()); return 0; } - + void Decode(TStringBuf in, TBuffer& out) const override { Encode(in, out); } - + protected: void DoLearn(ISequenceReader&) override { } }; - + class TTrivialCodec: public TBasicTrivialCodec { public: TTrivialCodec() { MyTraits.PreservesPrefixGrouping = true; } - + static TStringBuf MyName() { return "trivial"; } - + TString GetName() const override { return ToString(MyName()); } }; - + class TTrivialTrainableCodec: public TBasicTrivialCodec { public: TTrivialTrainableCodec() { MyTraits.PreservesPrefixGrouping = true; MyTraits.NeedsTraining = true; } - + static TStringBuf MyName() { return "trivial-trainable"; } - + TString GetName() const override { return ToString(MyName()); } }; - + class TNullCodec: public ICodec { public: TNullCodec() { @@ -195,31 +195,31 @@ namespace NCodecs { MyTraits.SizeOnDecodeMultiplier = 0; MyTraits.SizeOnEncodeMultiplier = 0; } - + TString GetName() const override { return "null"; } - + ui8 Encode(TStringBuf, TBuffer& out) const override { out.Clear(); return 0; } - + void Decode(TStringBuf, TBuffer& out) const override { out.Clear(); } - + protected: void DoLearn(ISequenceReader&) override { } }; - + class TPipelineCodec: public ICodec { typedef TVector<TCodecPtr> TPipeline; - + TPipeline Pipeline; TString MyName; - + public: explicit TPipelineCodec(TCodecPtr c0 = nullptr, TCodecPtr c1 = nullptr, TCodecPtr c2 = nullptr, TCodecPtr c3 = nullptr) { MyTraits.PreservesPrefixGrouping = true; @@ -228,32 +228,32 @@ namespace NCodecs { AddCodec(c2); AddCodec(c3); } - + TString GetName() const override { return MyName; } - + ui8 Encode(TStringBuf in, TBuffer& out) const override; void Decode(TStringBuf in, TBuffer& out) const override; - + public: /* - * Add codecs in the following order: - * uncompressed -> codec0 | codec1 | ... | codecN -> compressed - */ + * Add codecs in the following order: + * uncompressed -> codec0 | codec1 | ... | codecN -> compressed + */ TPipelineCodec& AddCodec(TCodecPtr codec); - + bool AlreadyTrained() const override; void SetTrained(bool t) override; - + protected: void DoLearn(ISequenceReader& in) override { DoLearnX(in, 1); } - + void DoLearnX(ISequenceReader& in, double sampleSizeMult) override; void Save(IOutputStream* out) const override; void Load(IInputStream* in) override; }; - -} + +} |