aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/codecs/comptable_codec.cpp
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/codecs/comptable_codec.cpp
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/codecs/comptable_codec.cpp')
-rw-r--r--library/cpp/codecs/comptable_codec.cpp108
1 files changed, 108 insertions, 0 deletions
diff --git a/library/cpp/codecs/comptable_codec.cpp b/library/cpp/codecs/comptable_codec.cpp
new file mode 100644
index 0000000000..476b8ada80
--- /dev/null
+++ b/library/cpp/codecs/comptable_codec.cpp
@@ -0,0 +1,108 @@
+#include "comptable_codec.h"
+
+#include <library/cpp/comptable/comptable.h>
+#include <util/string/cast.h>
+
+namespace NCodecs {
+ class TCompTableCodec::TImpl: public TAtomicRefCount<TImpl> {
+ public:
+ TImpl(EQuality q)
+ : Quality(q)
+ {
+ }
+
+ void Init() {
+ Compressor.Reset(new NCompTable::TChunkCompressor{(bool)Quality, Table});
+ Decompressor.Reset(new NCompTable::TChunkDecompressor{(bool)Quality, Table});
+ }
+
+ ui8 Encode(TStringBuf in, TBuffer& out) const {
+ out.Clear();
+ if (!in) {
+ return 0;
+ }
+
+ TVector<char> result;
+ Compressor->Compress(in, &result);
+ out.Assign(&result[0], result.size());
+ return 0;
+ }
+
+ void Decode(TStringBuf in, TBuffer& out) const {
+ out.Clear();
+ if (!in) {
+ return;
+ }
+
+ TVector<char> result;
+ Decompressor->Decompress(in, &result);
+ out.Assign(&result[0], result.size());
+ }
+
+ void DoLearn(ISequenceReader& in) {
+ NCompTable::TDataSampler sampler;
+ TStringBuf region;
+ while (in.NextRegion(region)) {
+ if (!region) {
+ continue;
+ }
+
+ sampler.AddStat(region);
+ }
+
+ sampler.BuildTable(Table);
+ Init();
+ }
+
+ void Save(IOutputStream* out) const {
+ ::Save(out, Table);
+ }
+
+ void Load(IInputStream* in) {
+ ::Load(in, Table);
+ Init();
+ }
+
+ NCompTable::TCompressorTable Table;
+ THolder<NCompTable::TChunkCompressor> Compressor;
+ THolder<NCompTable::TChunkDecompressor> Decompressor;
+ const EQuality Quality;
+ static const ui32 SampleSize = Max(NCompTable::TDataSampler::Size * 4, (1 << 22) * 5);
+ };
+
+ TCompTableCodec::TCompTableCodec(EQuality q)
+ : Impl(new TImpl{q})
+ {
+ MyTraits.NeedsTraining = true;
+ MyTraits.SizeOnEncodeMultiplier = 2;
+ MyTraits.SizeOnDecodeMultiplier = 10;
+ MyTraits.RecommendedSampleSize = TImpl::SampleSize;
+ }
+
+ TCompTableCodec::~TCompTableCodec() = default;
+
+ TString TCompTableCodec::GetName() const {
+ return ToString(Impl->Quality ? MyNameHQ() : MyNameLQ());
+ }
+
+ ui8 TCompTableCodec::Encode(TStringBuf in, TBuffer& out) const {
+ return Impl->Encode(in, out);
+ }
+
+ void TCompTableCodec::Decode(TStringBuf in, TBuffer& out) const {
+ Impl->Decode(in, out);
+ }
+
+ void TCompTableCodec::DoLearn(ISequenceReader& in) {
+ Impl->DoLearn(in);
+ }
+
+ void TCompTableCodec::Save(IOutputStream* out) const {
+ Impl->Save(out);
+ }
+
+ void TCompTableCodec::Load(IInputStream* in) {
+ Impl->Load(in);
+ }
+
+}