aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/codecs
diff options
context:
space:
mode:
authormonster <monster@ydb.tech>2022-07-07 14:41:37 +0300
committermonster <monster@ydb.tech>2022-07-07 14:41:37 +0300
commit06e5c21a835c0e923506c4ff27929f34e00761c2 (patch)
tree75efcbc6854ef9bd476eb8bf00cc5c900da436a2 /library/cpp/codecs
parent03f024c4412e3aa613bb543cf1660176320ba8f4 (diff)
downloadydb-06e5c21a835c0e923506c4ff27929f34e00761c2.tar.gz
fix ya.make
Diffstat (limited to 'library/cpp/codecs')
-rw-r--r--library/cpp/codecs/static/README1
-rw-r--r--library/cpp/codecs/static/builder.cpp39
-rw-r--r--library/cpp/codecs/static/builder.h29
-rw-r--r--library/cpp/codecs/static/common.h32
-rw-r--r--library/cpp/codecs/static/example/example.cpp43
-rw-r--r--library/cpp/codecs/static/example/example.h17
-rw-r--r--library/cpp/codecs/static/example/huffman.1467494385.codec_infobin385 -> 0 bytes
-rw-r--r--library/cpp/codecs/static/example/solar-8k-a.huffman.1467494385.codec_infobin3425 -> 0 bytes
-rw-r--r--library/cpp/codecs/static/static.cpp98
-rw-r--r--library/cpp/codecs/static/static.h34
-rw-r--r--library/cpp/codecs/static/static_codec_info.proto17
-rw-r--r--library/cpp/codecs/static/tools/common/ct_common.cpp74
-rw-r--r--library/cpp/codecs/static/tools/common/ct_common.h75
-rw-r--r--library/cpp/codecs/static/tools/static_codec_checker/README4
-rw-r--r--library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp73
-rw-r--r--library/cpp/codecs/static/tools/static_codec_generator/README4
-rw-r--r--library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp82
-rw-r--r--library/cpp/codecs/static/tools/tests/canondata/result.json6
-rw-r--r--library/cpp/codecs/static/tools/tests/static_codec_tools.py18
-rw-r--r--library/cpp/codecs/static/ut/builder_ut.cpp57
-rw-r--r--library/cpp/codecs/static/ut/static_ut.cpp27
21 files changed, 0 insertions, 730 deletions
diff --git a/library/cpp/codecs/static/README b/library/cpp/codecs/static/README
deleted file mode 100644
index 1b07f02433d..00000000000
--- a/library/cpp/codecs/static/README
+++ /dev/null
@@ -1 +0,0 @@
-Support of static libraries in library/cpp/codecs. See library/cpp/codecs/static/example.
diff --git a/library/cpp/codecs/static/builder.cpp b/library/cpp/codecs/static/builder.cpp
deleted file mode 100644
index 93e34a3edbb..00000000000
--- a/library/cpp/codecs/static/builder.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-#include "builder.h"
-#include "common.h"
-
-#include <library/cpp/codecs/static/static_codec_info.pb.h>
-
-#include <library/cpp/codecs/codecs.h>
-
-#include <util/generic/yexception.h>
-#include <util/string/subst.h>
-
-namespace NCodecs {
- TStaticCodecInfo BuildStaticCodec(const TVector<TString>& trainingData, const TCodecBuildInfo& info) {
- TStaticCodecInfo result;
- TCodecPtr codec = ICodec::GetInstance(info.CodecName);
- Y_ENSURE_EX(codec, TCodecException() << "empty codec is not allowed");
-
- codec->LearnX(trainingData.begin(), trainingData.end(), info.SampleSizeMultiplier);
- {
- TStringOutput sout{*result.MutableStoredCodec()};
- ICodec::Store(&sout, codec);
- }
-
- auto& debugInfo = *result.MutableDebugInfo();
- debugInfo.SetStoredCodecHash(DataSignature(result.GetStoredCodec()));
- debugInfo.SetCodecName(info.CodecName);
- debugInfo.SetSampleSizeMultiplier(info.SampleSizeMultiplier);
- debugInfo.SetTimestamp(info.Timestamp);
- debugInfo.SetRevisionInfo(info.RevisionInfo);
- debugInfo.SetTrainingSetComment(info.TrainingSetComment);
- debugInfo.SetTrainingSetResId(info.TrainingSetResId);
- return result;
- }
-
- TString GetStandardFileName(const TStaticCodecInfo& info) {
- TString cName = info.GetDebugInfo().GetCodecName();
- SubstGlobal(cName, ':', '.');
- return TStringBuilder() << cName << "." << info.GetDebugInfo().GetTimestamp() << ".codec_info";
- }
-}
diff --git a/library/cpp/codecs/static/builder.h b/library/cpp/codecs/static/builder.h
deleted file mode 100644
index d7533be4d58..00000000000
--- a/library/cpp/codecs/static/builder.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#pragma once
-
-#include "static.h"
-
-#include <library/cpp/svnversion/svnversion.h>
-
-#include <util/datetime/base.h>
-#include <util/generic/string.h>
-#include <util/generic/vector.h>
-#include <util/string/builder.h>
-
-namespace NCodecs {
- struct TCodecBuildInfo {
- // optimal values from SEARCH-1655
- TString CodecName = "solar-8k-a:zstd08d-1";
- float SampleSizeMultiplier = 1;
-
- // debug info:
- time_t Timestamp = TInstant::Now().TimeT();
- TString RevisionInfo = (TStringBuilder() << "r" << ToString(GetProgramSvnRevision()));
- TString TrainingSetComment; // a human comment on the training data
- TString TrainingSetResId; // sandbox resid of the training set
- };
-
- TStaticCodecInfo BuildStaticCodec(const TVector<TString>& trainingData, const TCodecBuildInfo&);
-
- TString GetStandardFileName(const TStaticCodecInfo&);
-
-}
diff --git a/library/cpp/codecs/static/common.h b/library/cpp/codecs/static/common.h
deleted file mode 100644
index 211de2a27d2..00000000000
--- a/library/cpp/codecs/static/common.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#pragma once
-
-#include <util/string/hex.h>
-#include <util/digest/city.h>
-#include <util/system/byteorder.h>
-
-namespace NCodecs {
- template <class T>
- ui64 DataSignature(const T& t) {
- static_assert(!std::is_scalar<T>::value, "no scalars");
- return CityHash64(t.data(), t.size());
- }
-
- template <class T>
- TString HexWriteScalar(T t) {
- static_assert(std::is_scalar<T>::value, "scalars only");
- t = LittleToBig(t);
- TString res = HexEncode(&t, sizeof(t));
- res.to_lower();
- return res;
- }
-
- template <class T>
- T HexReadScalar(TStringBuf s) {
- static_assert(std::is_scalar<T>::value, "scalars only");
- T t = 0;
- HexDecode(s.data(), Min(s.size(), sizeof(T)), &t);
- t = BigToLittle(t);
- return t;
- }
-
-}
diff --git a/library/cpp/codecs/static/example/example.cpp b/library/cpp/codecs/static/example/example.cpp
deleted file mode 100644
index 5b750b717e1..00000000000
--- a/library/cpp/codecs/static/example/example.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-#include "example.h"
-
-#include <library/cpp/codecs/static/static.h>
-
-#include <util/generic/yexception.h>
-
-extern "C" {
-extern const ui8 codec_info_huff_20160707[];
-extern const ui32 codec_info_huff_20160707Size;
-extern const ui8 codec_info_sa_huff_20160707[];
-extern const ui32 codec_info_sa_huff_20160707Size;
-};
-
-namespace NStaticCodecExample {
- static const NCodecs::TCodecConstPtr CODECS[] = {
- nullptr,
- NCodecs::RestoreCodecFromArchive(codec_info_huff_20160707, codec_info_huff_20160707Size),
- NCodecs::RestoreCodecFromArchive(codec_info_sa_huff_20160707, codec_info_sa_huff_20160707Size),
- };
-
- static_assert(Y_ARRAY_SIZE(CODECS) == DV_COUNT, "bad array size");
-
- void Encode(TBuffer& out, TStringBuf in, EDictVersion dv) {
- Y_ENSURE(dv > DV_NULL && dv < DV_COUNT, "invalid dict version: " << (int)dv);
- out.Clear();
- if (!in) {
- return;
- }
- CODECS[dv]->Encode(in, out);
- out.Append((char)dv);
- }
-
- void Decode(TBuffer& out, TStringBuf in) {
- out.Clear();
- if (!in) {
- return;
- }
- EDictVersion dv = (EDictVersion)in.back();
- Y_ENSURE(dv > DV_NULL && dv < DV_COUNT, "invalid dict version: " << (int)dv);
- in.Chop(1);
- CODECS[dv]->Decode(in, out);
- }
-}
diff --git a/library/cpp/codecs/static/example/example.h b/library/cpp/codecs/static/example/example.h
deleted file mode 100644
index f9b3a7324b7..00000000000
--- a/library/cpp/codecs/static/example/example.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#pragma once
-
-#include <util/generic/strbuf.h>
-#include <util/generic/buffer.h>
-
-namespace NStaticCodecExample {
- enum EDictVersion : ui8 {
- DV_NULL = 0,
- DV_HUFF_20160707,
- DV_SA_HUFF_20160707,
- DV_COUNT
- };
-
- void Encode(TBuffer&, TStringBuf, EDictVersion dv = DV_SA_HUFF_20160707);
-
- void Decode(TBuffer&, TStringBuf);
-}
diff --git a/library/cpp/codecs/static/example/huffman.1467494385.codec_info b/library/cpp/codecs/static/example/huffman.1467494385.codec_info
deleted file mode 100644
index 5fc18270a6b..00000000000
--- a/library/cpp/codecs/static/example/huffman.1467494385.codec_info
+++ /dev/null
Binary files differ
diff --git a/library/cpp/codecs/static/example/solar-8k-a.huffman.1467494385.codec_info b/library/cpp/codecs/static/example/solar-8k-a.huffman.1467494385.codec_info
deleted file mode 100644
index d36d8e24ec9..00000000000
--- a/library/cpp/codecs/static/example/solar-8k-a.huffman.1467494385.codec_info
+++ /dev/null
Binary files differ
diff --git a/library/cpp/codecs/static/static.cpp b/library/cpp/codecs/static/static.cpp
deleted file mode 100644
index 44a07dd73a2..00000000000
--- a/library/cpp/codecs/static/static.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-#include "static.h"
-#include "common.h"
-
-#include <library/cpp/codecs/static/static_codec_info.pb.h>
-#include <library/cpp/archive/yarchive.h>
-
-#include <util/draft/datetime.h>
-
-#include <util/string/builder.h>
-#include <util/stream/buffer.h>
-#include <util/stream/mem.h>
-#include <util/string/hex.h>
-#include <util/ysaveload.h>
-
-namespace NCodecs {
- static constexpr TStringBuf STATIC_CODEC_INFO_MAGIC = "CodecInf";
-
- static TStringBuf GetStaticCodecInfoMagic() {
- return STATIC_CODEC_INFO_MAGIC;
- }
-
- void SaveCodecInfoToStream(IOutputStream& out, const TStaticCodecInfo& info) {
- TBufferOutput bout;
- info.SerializeToArcadiaStream(&bout);
- ui64 hash = DataSignature(bout.Buffer());
- out.Write(GetStaticCodecInfoMagic());
- ::Save(&out, hash);
- ::Save(&out, bout.Buffer());
- }
-
- TStaticCodecInfo LoadCodecInfoFromStream(IInputStream& in) {
- {
- TBuffer magic;
- magic.Resize(GetStaticCodecInfoMagic().size());
- Y_ENSURE_EX(in.Read(magic.Data(), GetStaticCodecInfoMagic().size()) == GetStaticCodecInfoMagic().size(),
- TCodecException() << "bad codec info");
- Y_ENSURE_EX(TStringBuf(magic.data(), magic.size()) == GetStaticCodecInfoMagic(),
- TCodecException() << "bad codec info");
- }
-
- ui64 hash;
- ::Load(&in, hash);
- TBuffer info;
- ::Load(&in, info);
- Y_ENSURE_EX(hash == DataSignature(info), TCodecException() << "bad codec info");
-
- TStaticCodecInfo result;
- Y_ENSURE_EX(result.ParseFromArray(info.data(), info.size()), TCodecException() << "bad codec info");
-
- return result;
- }
-
- TString SaveCodecInfoToString(const TStaticCodecInfo& info) {
- TStringStream s;
- SaveCodecInfoToStream(s, info);
- return s.Str();
- }
-
- TStaticCodecInfo LoadCodecInfoFromString(TStringBuf data) {
- TMemoryInput m{data.data(), data.size()};
- return LoadCodecInfoFromStream(m);
- }
-
- TString FormatCodecInfo(const TStaticCodecInfo& ci) {
- TStringBuilder s;
- s << "codec name: " << ci.GetDebugInfo().GetCodecName() << Endl;
- s << "codec hash: " << HexWriteScalar(ci.GetDebugInfo().GetStoredCodecHash()) << Endl;
- s << "dict size: " << ci.GetStoredCodec().Size() << Endl;
- s << "sample mult: " << ci.GetDebugInfo().GetSampleSizeMultiplier() << Endl;
- s << "orig.compress: " << ci.GetDebugInfo().GetCompression() * 100 << " %" << Endl;
- s << "timestamp: " << ci.GetDebugInfo().GetTimestamp() << " ("
- << NDatetime::TSimpleTM::NewLocal(ci.GetDebugInfo().GetTimestamp()).ToString()
- << ")" << Endl;
- s << "revision: " << ci.GetDebugInfo().GetRevisionInfo() << Endl;
- s << "training set comment: " << ci.GetDebugInfo().GetTrainingSetComment() << Endl;
- s << "training set resId: " << ci.GetDebugInfo().GetTrainingSetResId() << Endl;
- return s;
- }
-
- TString LoadStringFromArchive(const ui8* begin, size_t size) {
- TArchiveReader ar(TBlob::NoCopy(begin, size));
- Y_VERIFY(ar.Count() == 1, "invalid number of entries");
- auto blob = ar.ObjectBlobByKey(ar.KeyByIndex(0));
- return TString{blob.AsCharPtr(), blob.Size()};
- }
-
- TCodecConstPtr RestoreCodecFromCodecInfo(const TStaticCodecInfo& info) {
- return NCodecs::ICodec::RestoreFromString(info.GetStoredCodec());
- }
-
- TCodecConstPtr RestoreCodecFromArchive(const ui8* begin, size_t size) {
- const auto& data = LoadStringFromArchive(begin, size);
- const auto& info = LoadCodecInfoFromString(data);
- const auto& codec = RestoreCodecFromCodecInfo(info);
- Y_ENSURE_EX(codec, TCodecException() << "null codec");
- return codec;
- }
-}
diff --git a/library/cpp/codecs/static/static.h b/library/cpp/codecs/static/static.h
deleted file mode 100644
index c1eaed2a742..00000000000
--- a/library/cpp/codecs/static/static.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#pragma once
-
-#include <library/cpp/codecs/codecs.h>
-
-#include <util/generic/strbuf.h>
-#include <util/generic/string.h>
-#include <util/stream/output.h>
-
-namespace NCodecs {
- class TStaticCodecInfo;
-
- // load
-
- TCodecConstPtr RestoreCodecFromCodecInfo(const TStaticCodecInfo&);
-
- TStaticCodecInfo LoadCodecInfoFromString(TStringBuf data);
-
- TString LoadStringFromArchive(const ui8* begin, size_t size);
-
- TCodecConstPtr RestoreCodecFromArchive(const ui8* begin, size_t size);
-
- // save
-
- TString SaveCodecInfoToString(const TStaticCodecInfo&);
-
- void SaveCodecInfoToStream(IOutputStream& out, const TStaticCodecInfo&);
-
- // misc
-
- TStaticCodecInfo LoadCodecInfoFromStream(IInputStream& in);
-
- TString FormatCodecInfo(const TStaticCodecInfo&);
-
-}
diff --git a/library/cpp/codecs/static/static_codec_info.proto b/library/cpp/codecs/static/static_codec_info.proto
deleted file mode 100644
index 362abb4dadf..00000000000
--- a/library/cpp/codecs/static/static_codec_info.proto
+++ /dev/null
@@ -1,17 +0,0 @@
-package NCodecs;
-
-message TStaticCodecInfo {
- message TDebugInfo {
- optional string CodecName = 1; // the exact codec variant name
- optional uint64 Timestamp = 2; // when the codec was built
- optional string RevisionInfo = 3; // the arcadia revision info
- optional float SampleSizeMultiplier = 4; // how the default sample size was modified to improve compression
- optional float Compression = 5; // the compression on the training set ((raw_size - coded_size) / raw_size)
- optional string TrainingSetComment = 6; // a human readable description of the training set
- optional string TrainingSetResId = 7; // the training set sandbox resource id
- optional uint64 StoredCodecHash = 8; // cityhash64(data)
- }
-
- optional bytes StoredCodec = 1; // the data of the codec
- optional TDebugInfo DebugInfo = 2; // misc debug info which could be useful in finding whereabouts later
-}
diff --git a/library/cpp/codecs/static/tools/common/ct_common.cpp b/library/cpp/codecs/static/tools/common/ct_common.cpp
deleted file mode 100644
index fe776912805..00000000000
--- a/library/cpp/codecs/static/tools/common/ct_common.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-#include "ct_common.h"
-
-#include <library/cpp/codecs/codecs.h>
-#include <library/cpp/codecs/static/static_codec_info.pb.h>
-#include <library/cpp/string_utils/base64/base64.h>
-
-#include <util/stream/output.h>
-#include <util/string/builder.h>
-#include <util/system/hp_timer.h>
-
-namespace NCodecs {
- TString TComprStats::Format(const TStaticCodecInfo& info, bool checkMode) const {
- TStringBuilder s;
- s << "raw size/item: " << RawSizePerRecord() << Endl;
- s << "enc.size/item: " << EncSizePerRecord() << Endl;
- if (checkMode) {
- s << "orig.enc.size/item: " << OldEncSizePerRecord(info.GetDebugInfo().GetCompression()) << Endl;
- }
- s << "enc time us/item: " << EncTimePerRecordUS() << Endl;
- s << "dec time us/item: " << DecTimePerRecordUS() << Endl;
- s << "dict size: " << info.GetStoredCodec().Size() << Endl;
- s << "compression: " << AsPercent(Compression()) << " %" << Endl;
- if (checkMode) {
- s << "orig.compression: " << AsPercent(info.GetDebugInfo().GetCompression()) << " %" << Endl;
- }
- return s;
- }
-
- TComprStats TestCodec(const ICodec& c, const TVector<TString>& input) {
- TComprStats stats;
-
- TBuffer encodeBuffer;
- TBuffer decodeBuffer;
- for (const auto& data : input) {
- encodeBuffer.Clear();
- decodeBuffer.Clear();
-
- stats.Records += 1;
- stats.RawSize += data.size();
-
- THPTimer timer;
- c.Encode(data, encodeBuffer);
- stats.EncSize += encodeBuffer.size();
- stats.EncSeconds += timer.PassedReset();
-
- c.Decode(TStringBuf{encodeBuffer.data(), encodeBuffer.size()}, decodeBuffer);
- stats.DecSeconds += timer.PassedReset();
- Y_ENSURE(data == TStringBuf(decodeBuffer.data(), decodeBuffer.size()), "invalid encoding at record " << stats.Records);
- }
-
- return stats;
- }
-
- void ParseBlob(TVector<TString>& result, EDataStreamFormat fmt, const TBlob& blob) {
- TStringBuf bin(blob.AsCharPtr(), blob.Size());
- TStringBuf line;
- TString buffer;
- while (bin.ReadLine(line)) {
- if (DSF_BASE64_LF == fmt) {
- Base64Decode(line, buffer);
- line = buffer;
- }
- if (!line) {
- continue;
- }
- result.emplace_back(line.data(), line.size());
- }
- }
-
- TBlob GetInputBlob(const TString& dataFile) {
- return dataFile && dataFile != "-" ? TBlob::FromFile(dataFile) : TBlob::FromStream(Cin);
- }
-
-}
diff --git a/library/cpp/codecs/static/tools/common/ct_common.h b/library/cpp/codecs/static/tools/common/ct_common.h
deleted file mode 100644
index 9d3dcbda934..00000000000
--- a/library/cpp/codecs/static/tools/common/ct_common.h
+++ /dev/null
@@ -1,75 +0,0 @@
-#pragma once
-
-#include <util/generic/string.h>
-#include <util/generic/vector.h>
-#include <util/memory/blob.h>
-#include <cmath>
-
-namespace NCodecs {
- class TStaticCodecInfo;
- class ICodec;
-
- struct TComprStats {
- double EncSeconds = 0;
- double DecSeconds = 0;
- size_t Records = 0;
- size_t RawSize = 0;
- size_t EncSize = 0;
-
- static double Round(double n, size_t decPlaces = 2) {
- double p = pow(10, decPlaces);
- return round(n * p) / p;
- }
-
- static double AsPercent(double n) {
- return Round(n * 100);
- }
-
- static double AsMicroSecond(double s) {
- return s * 1000000;
- }
-
- double PerRecord(double n) const {
- return Round((double)(Records ? n / Records : 0));
- }
-
- double Compression() const {
- return ((double)RawSize - (double)EncSize) / RawSize;
- }
-
- double EncTimePerRecordUS() const {
- return PerRecord(AsMicroSecond(EncSeconds));
- }
-
- double DecTimePerRecordUS() const {
- return PerRecord(AsMicroSecond(DecSeconds));
- }
-
- double RawSizePerRecord() const {
- return PerRecord(RawSize);
- }
-
- double EncSizePerRecord() const {
- return PerRecord(EncSize);
- }
-
- double OldEncSizePerRecord(double compr) const {
- return PerRecord((1 - compr) * RawSize);
- }
-
- TString Format(const TStaticCodecInfo&, bool checkMode) const;
- };
-
- TComprStats TestCodec(const ICodec&, const TVector<TString>& data);
-
- enum EDataStreamFormat {
- DSF_NONE,
- DSF_PLAIN_LF /* "plain" */,
- DSF_BASE64_LF /* "base64" */,
- };
-
- void ParseBlob(TVector<TString>&, EDataStreamFormat, const TBlob&);
-
- TBlob GetInputBlob(const TString& dataFile);
-
-}
diff --git a/library/cpp/codecs/static/tools/static_codec_checker/README b/library/cpp/codecs/static/tools/static_codec_checker/README
deleted file mode 100644
index 723a68300b0..00000000000
--- a/library/cpp/codecs/static/tools/static_codec_checker/README
+++ /dev/null
@@ -1,4 +0,0 @@
-This is a viewer for generated codec and utility for verification of the compression quality on a new data.
-
-Usage:
-static_codec_checker -t -c 029b29ff64a74927.codec_info -f plain samples.txt
diff --git a/library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp b/library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp
deleted file mode 100644
index 9c8d568d823..00000000000
--- a/library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-#include <library/cpp/codecs/static/tools/common/ct_common.h>
-#include <library/cpp/codecs/static/static.h>
-#include <library/cpp/codecs/static/static_codec_info.pb.h>
-#include <library/cpp/codecs/codecs.h>
-#include <library/cpp/getopt/small/last_getopt.h>
-
-#include <util/digest/city.h>
-#include <util/generic/yexception.h>
-#include <util/stream/file.h>
-#include <util/stream/buffer.h>
-#include <util/stream/format.h>
-#include <util/string/builder.h>
-
-int main(int argc, char** argv) {
- NCodecs::TCodecPtr codecPtr;
- NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE;
- TString codecFile;
- bool testCompression = false;
-
- auto opts = NLastGetopt::TOpts::Default();
- opts.SetTitle("Prints a .codec_info file and optionally checks its performance on new data. See also static_codec_generator.");
- opts.SetCmdLineDescr("-c 9089f3e9b7a0f0d4.codec_info -t -f base64 qtrees.sample.txt");
- NCodecs::TStaticCodecInfo codec;
-
- opts.AddLongOption('c', "codec-info").RequiredArgument("codec_info").Handler1T<TString>([&codecFile, &codec, &codecPtr](TString name) {
- codecFile = name;
- codec.CopyFrom(NCodecs::LoadCodecInfoFromString(TUnbufferedFileInput(name).ReadAll()));
- codecPtr = NCodecs::ICodec::RestoreFromString(codec.GetStoredCodec());
- })
- .Required()
- .Help(".codec_info file with serialized static data for codec");
-
- opts.AddLongOption('t', "test").NoArgument().StoreValue(&testCompression, true).Optional().Help("test current performance");
-
- opts.AddLongOption('f', "format").RequiredArgument(TStringBuilder() << "(" << NCodecs::DSF_PLAIN_LF << "|" << NCodecs::DSF_BASE64_LF << ")").StoreResult(&fmt).Optional().Help("test set input file format");
-
- opts.SetFreeArgsMin(0);
- opts.SetFreeArgTitle(0, "testing_set_input_file", "testing set input files");
-
- NLastGetopt::TOptsParseResult res(&opts, argc, argv);
-
- Cout << codecFile << Endl;
- Cout << NCodecs::FormatCodecInfo(codec) << Endl;
-
- if (testCompression) {
- if (NCodecs::DSF_NONE == fmt) {
- Cerr << "Specify format (-f|--format) for testing set input" << Endl;
- exit(1);
- }
-
- Cout << "Reading testing set data ... " << Flush;
-
- TVector<TString> allData;
- for (const auto& freeArg : res.GetFreeArgs()) {
- NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg));
- }
-
- if (!res.GetFreeArgs()) {
- NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-"));
- }
-
- Cout << "Done" << Endl << Endl;
-
- Cout << "records: " << allData.size() << Endl;
- Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl;
-
- Cout << "Testing compression ... " << Flush;
- auto stats = NCodecs::TestCodec(*codecPtr, allData);
- Cout << "Done" << Endl << Endl;
-
- Cout << stats.Format(codec, true) << Endl;
- }
-}
diff --git a/library/cpp/codecs/static/tools/static_codec_generator/README b/library/cpp/codecs/static/tools/static_codec_generator/README
deleted file mode 100644
index e6bb52b9591..00000000000
--- a/library/cpp/codecs/static/tools/static_codec_generator/README
+++ /dev/null
@@ -1,4 +0,0 @@
-This is a utility for reproducible teaching of a codec. And also for saving it into a file with a unique name for a static compilation as a resource.
-
-Usage:
-static_codec_generator -t -m 'the training data description' -f plain samples.txt
diff --git a/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp b/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp
deleted file mode 100644
index 45fdb5c5fe8..00000000000
--- a/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-#include <library/cpp/codecs/static/tools/common/ct_common.h>
-#include <library/cpp/codecs/static/static_codec_info.pb.h>
-#include <library/cpp/codecs/static/builder.h>
-#include <library/cpp/codecs/codecs.h>
-
-#include <library/cpp/getopt/small/last_getopt.h>
-
-#include <util/generic/yexception.h>
-#include <util/stream/file.h>
-#include <util/string/builder.h>
-
-int main(int argc, char** argv) {
- NCodecs::TCodecBuildInfo info;
- NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE;
-
- auto opts = NLastGetopt::TOpts::Default();
- opts.SetCmdLineDescr("-m 'Training set: 100000 qtrees taken from web mmeta logs' -f base64 qtrees.sample.txt");
- opts.SetTitle("Teaches the codec and serializes it as a file named CODECNAME.hash(CODECDATA).bin");
-
- opts.AddLongOption('m', "message").RequiredArgument("training_set_comment").StoreResult(&info.TrainingSetComment).Required().Help("a human description for the training set");
-
- opts.AddLongOption('r', "resource").RequiredArgument("training_set_res_id").StoreResult(&info.TrainingSetResId).Optional().Help("sandbox resource id for the training set");
-
- opts.AddLongOption('c', "codec").RequiredArgument("codec_name").StoreResult(&info.CodecName).Optional().DefaultValue(info.CodecName);
-
- opts.AddLongOption('s', "sample-multiplier").RequiredArgument("multiplier").StoreResult(&info.SampleSizeMultiplier).Optional().DefaultValue(ToString(info.SampleSizeMultiplier)).Help("multiplier for default sample size");
-
- opts.AddLongOption('f', "format").RequiredArgument(TStringBuilder() << "(" << NCodecs::DSF_PLAIN_LF << "|" << NCodecs::DSF_BASE64_LF << ")").StoreResult(&fmt).Required().Help("training set input file format");
-
- opts.AddLongOption("list-codecs").NoArgument().Handler0([]() {
- Cout << JoinStrings(NCodecs::ICodec::GetCodecsList(), "\n") << Endl;
- exit(0);
- })
- .Optional()
- .Help("list available codecs");
-
- opts.AddLongOption("fake-revision").RequiredArgument("revision").StoreResult(&info.RevisionInfo).Optional().Hidden(); // replace static_codec_generator revision in debug info
-
- opts.AddLongOption("fake-timestamp").RequiredArgument("timestamp").StoreResult(&info.Timestamp).Optional().Hidden(); // replace generating timestamp in debug info
-
- opts.SetFreeArgsMin(0);
- opts.SetFreeArgTitle(0, "training_set_input_file", "training set input files");
-
- NLastGetopt::TOptsParseResult res(&opts, argc, argv);
-
- Cout << "Reading training set data ... " << Flush;
- TVector<TString> allData;
- for (const auto& freeArg : res.GetFreeArgs()) {
- NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg));
- }
-
- if (!res.GetFreeArgs()) {
- NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-"));
- }
- Cout << "Done" << Endl << Endl;
-
- Cout << "records: " << allData.size() << Endl;
- Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl;
-
- Cout << "Training " << info.CodecName << " , sample size multiplier is " << info.SampleSizeMultiplier << " ... " << Flush;
- auto codec = NCodecs::BuildStaticCodec(allData, info);
- Cout << "Done" << Endl;
-
- TString codecName = NCodecs::GetStandardFileName(codec);
- NCodecs::TCodecPtr codecPtr = NCodecs::ICodec::RestoreFromString(codec.GetStoredCodec());
-
- Cout << "Testing compression ... " << Flush;
- auto stats = NCodecs::TestCodec(*codecPtr, allData);
- Cout << "Done" << Endl << Endl;
-
- codec.MutableDebugInfo()->SetCompression(stats.Compression());
-
- Cout << stats.Format(codec, false) << Endl;
-
- Cout << "Saving as " << codecName << " ... " << Flush;
- {
- TUnbufferedFileOutput fout{codecName};
- NCodecs::SaveCodecInfoToStream(fout, codec);
- fout.Finish();
- }
- Cout << "Done" << Endl << Endl;
-}
diff --git a/library/cpp/codecs/static/tools/tests/canondata/result.json b/library/cpp/codecs/static/tools/tests/canondata/result.json
deleted file mode 100644
index 7a637c6763a..00000000000
--- a/library/cpp/codecs/static/tools/tests/canondata/result.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
- "static_codec_tools.test_static_codec_tools": {
- "checksum": "960e3c8c57fb846ab53ccbd07e287233",
- "uri": "sbr://144512644/static_codec_tools.test_static_codec_tools/solar-8k-a.huffman.1467494385.codec_info"
- }
-} \ No newline at end of file
diff --git a/library/cpp/codecs/static/tools/tests/static_codec_tools.py b/library/cpp/codecs/static/tools/tests/static_codec_tools.py
deleted file mode 100644
index db4140e3703..00000000000
--- a/library/cpp/codecs/static/tools/tests/static_codec_tools.py
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/env python
-
-import yatest.common as tt
-import os.path as op
-
-def test_static_codec_tools():
- tt.execute([tt.binary_path("library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator")]
- + ["-m", "test codec", "-r", "sbr://143310406", "-f", "plain", "-c", "solar-8k-a:huffman", "-s", "1",
- "--fake-revision", "r2385905", "--fake-timestamp", "1467494385", "sample.txt"],
- timeout=60)
- assert(op.exists("solar-8k-a.huffman.1467494385.codec_info"))
- tt.canonical_execute(tt.binary_path("library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker"),
- args=["-c", "solar-8k-a.huffman.1467494385.codec_info"],
- timeout=60)
- tt.execute([tt.binary_path("library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker")]
- + ["-c", "solar-8k-a.huffman.1467494385.codec_info", "-f", "plain", "-t", "sample.txt"],
- timeout=60)
- return tt.canonical_file("solar-8k-a.huffman.1467494385.codec_info")
diff --git a/library/cpp/codecs/static/ut/builder_ut.cpp b/library/cpp/codecs/static/ut/builder_ut.cpp
deleted file mode 100644
index 778ab47d931..00000000000
--- a/library/cpp/codecs/static/ut/builder_ut.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-#include <library/cpp/testing/unittest/registar.h>
-#include <library/cpp/codecs/static/builder.h>
-#include <library/cpp/codecs/static/static_codec_info.pb.h>
-#include <util/string/vector.h>
-
-class TStaticCodecInfoBuilderTest: public NUnitTest::TTestBase {
- UNIT_TEST_SUITE(TStaticCodecInfoBuilderTest)
- UNIT_TEST(TestBuild)
- UNIT_TEST_SUITE_END();
-
-private:
- TVector<TString> PrepareData() {
- TVector<TString> data;
- for (ui32 i = 'a'; i <= 'z'; ++i) {
- data.push_back(TString(1, (char)i));
- }
- return data;
- }
-
- void TestBuild() {
- TVector<TString> data;
- NCodecs::TCodecBuildInfo info;
- info.CodecName = "huffman";
- info.SampleSizeMultiplier = 2;
- info.Timestamp = 1467494385;
- info.RevisionInfo = "r2385905";
- info.TrainingSetComment = "some dummy data";
- info.TrainingSetResId = "sbr://1234";
- auto res = NCodecs::BuildStaticCodec(PrepareData(), info);
- UNIT_ASSERT_VALUES_EQUAL(res.ShortUtf8DebugString(),
- "StoredCodec: \"\\007\\000huffman@S\\000a"
- "\\006b\\005c\\005d\\005e\\005f\\005g\\005h\\005i\\005j\\005k\\005l\\005m\\005n\\005o"
- "\\005p\\005q\\005r\\005s\\005t\\005u\\004v\\004w\\004x\\004y\\004z\\004\\307?\\310>"
- "\\311=\\312<\\313;\\314:\\3159\\3168\\3177\\3206\\3215\\3224\\3233\\3242\\3251\\3260\\327/\\330."
- "\\331-\\332,\\333+\\334*\\335)\\336(\\337\\'\\340&\\341%\\342$\\343#\\344\\\"\\345!\\346 \\347"
- "\\037\\350\\036\\351\\035\\352\\034\\353\\033\\354\\032\\355\\031\\356\\030\\357\\027\\360"
- "\\026\\361\\025\\362\\024\\363\\023\\364\\022\\365\\021\\366\\020\\367\\017\\370\\016\\371"
- "\\r\\372\\014\\373\\013\\374\\n\\375\\t\\376\\010\\377\\007\" "
- "DebugInfo { "
- "CodecName: \"huffman\" "
- "Timestamp: 1467494385 "
- "RevisionInfo: \"r2385905\" "
- "SampleSizeMultiplier: 2 "
- "TrainingSetComment: \"some dummy data\" "
- "TrainingSetResId: \"sbr://1234\" "
- "StoredCodecHash: 2509195835471488613 "
- "}");
-
- UNIT_ASSERT_VALUES_EQUAL(NCodecs::GetStandardFileName(res), "huffman.1467494385.codec_info");
- UNIT_ASSERT_VALUES_EQUAL(res.GetDebugInfo().GetStoredCodecHash(), 2509195835471488613ULL);
-
- auto res1 = NCodecs::LoadCodecInfoFromString(NCodecs::SaveCodecInfoToString(res));
- UNIT_ASSERT_VALUES_EQUAL(res1.ShortUtf8DebugString(), res.ShortUtf8DebugString());
- }
-};
-
-UNIT_TEST_SUITE_REGISTRATION(TStaticCodecInfoBuilderTest);
diff --git a/library/cpp/codecs/static/ut/static_ut.cpp b/library/cpp/codecs/static/ut/static_ut.cpp
deleted file mode 100644
index 57e1e628874..00000000000
--- a/library/cpp/codecs/static/ut/static_ut.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-#include <library/cpp/testing/unittest/registar.h>
-#include <library/cpp/codecs/static/example/example.h>
-
-class TStaticCodecUsageTest: public NUnitTest::TTestBase {
- UNIT_TEST_SUITE(TStaticCodecUsageTest)
- UNIT_TEST(TestUsage)
- UNIT_TEST_SUITE_END();
-
-private:
- void DoTestUsage(NStaticCodecExample::EDictVersion dv, size_t expectedSize) {
- const TStringBuf letov = "Всё идёт по плану";
-
- TBuffer outEnc, outDec;
- NStaticCodecExample::Encode(outEnc, letov, dv);
- NStaticCodecExample::Decode(outDec, TStringBuf{outEnc.data(), outEnc.size()});
-
- UNIT_ASSERT_VALUES_EQUAL(outEnc.Size(), expectedSize);
- UNIT_ASSERT_EQUAL(TStringBuf(outDec.data(), outDec.size()), letov);
- }
-
- void TestUsage() {
- DoTestUsage(NStaticCodecExample::DV_HUFF_20160707, 18u);
- DoTestUsage(NStaticCodecExample::DV_SA_HUFF_20160707, 22u);
- }
-};
-
-UNIT_TEST_SUITE_REGISTRATION(TStaticCodecUsageTest)