aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/codecs/static
diff options
context:
space:
mode:
authorRuslan Kovalev <ruslan.a.kovalev@gmail.com>2022-02-10 16:46:44 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:44 +0300
commit59e19371de37995fcb36beb16cd6ec030af960bc (patch)
treefa68e36093ebff8b805462e9e6d331fe9d348214 /library/cpp/codecs/static
parent89db6fe2fe2c32d2a832ddfeb04e8d078e301084 (diff)
downloadydb-59e19371de37995fcb36beb16cd6ec030af960bc.tar.gz
Restoring authorship annotation for Ruslan Kovalev <ruslan.a.kovalev@gmail.com>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/codecs/static')
-rw-r--r--library/cpp/codecs/static/builder.cpp68
-rw-r--r--library/cpp/codecs/static/builder.h42
-rw-r--r--library/cpp/codecs/static/common.h56
-rw-r--r--library/cpp/codecs/static/example/example.cpp76
-rw-r--r--library/cpp/codecs/static/example/example.h24
-rw-r--r--library/cpp/codecs/static/example/ya.make36
-rw-r--r--library/cpp/codecs/static/static.cpp160
-rw-r--r--library/cpp/codecs/static/static.h52
-rw-r--r--library/cpp/codecs/static/static_codec_info.proto34
-rw-r--r--library/cpp/codecs/static/tools/common/ct_common.cpp118
-rw-r--r--library/cpp/codecs/static/tools/common/ct_common.h140
-rw-r--r--library/cpp/codecs/static/tools/common/ya.make26
-rw-r--r--library/cpp/codecs/static/tools/static_codec_checker/README4
-rw-r--r--library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp114
-rw-r--r--library/cpp/codecs/static/tools/static_codec_checker/ya.make22
-rw-r--r--library/cpp/codecs/static/tools/static_codec_generator/README4
-rw-r--r--library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp122
-rw-r--r--library/cpp/codecs/static/tools/static_codec_generator/ya.make22
-rw-r--r--library/cpp/codecs/static/tools/tests/static_codec_tools.py30
-rw-r--r--library/cpp/codecs/static/tools/tests/ya.make28
-rw-r--r--library/cpp/codecs/static/tools/ya.make10
-rw-r--r--library/cpp/codecs/static/ut/builder_ut.cpp94
-rw-r--r--library/cpp/codecs/static/ut/static_ut.cpp38
-rw-r--r--library/cpp/codecs/static/ut/ya.make24
-rw-r--r--library/cpp/codecs/static/ya.make30
25 files changed, 687 insertions, 687 deletions
diff --git a/library/cpp/codecs/static/builder.cpp b/library/cpp/codecs/static/builder.cpp
index 93e34a3edb..083f0fc6f6 100644
--- a/library/cpp/codecs/static/builder.cpp
+++ b/library/cpp/codecs/static/builder.cpp
@@ -1,39 +1,39 @@
-#include "builder.h"
-#include "common.h"
-
+#include "builder.h"
+#include "common.h"
+
#include <library/cpp/codecs/static/static_codec_info.pb.h>
-
+
#include <library/cpp/codecs/codecs.h>
-
-#include <util/generic/yexception.h>
-#include <util/string/subst.h>
-
-namespace NCodecs {
+
+#include <util/generic/yexception.h>
+#include <util/string/subst.h>
+
+namespace NCodecs {
TStaticCodecInfo BuildStaticCodec(const TVector<TString>& trainingData, const TCodecBuildInfo& info) {
- TStaticCodecInfo result;
- TCodecPtr codec = ICodec::GetInstance(info.CodecName);
- Y_ENSURE_EX(codec, TCodecException() << "empty codec is not allowed");
-
- codec->LearnX(trainingData.begin(), trainingData.end(), info.SampleSizeMultiplier);
- {
- TStringOutput sout{*result.MutableStoredCodec()};
- ICodec::Store(&sout, codec);
- }
-
- auto& debugInfo = *result.MutableDebugInfo();
- debugInfo.SetStoredCodecHash(DataSignature(result.GetStoredCodec()));
- debugInfo.SetCodecName(info.CodecName);
- debugInfo.SetSampleSizeMultiplier(info.SampleSizeMultiplier);
- debugInfo.SetTimestamp(info.Timestamp);
- debugInfo.SetRevisionInfo(info.RevisionInfo);
- debugInfo.SetTrainingSetComment(info.TrainingSetComment);
- debugInfo.SetTrainingSetResId(info.TrainingSetResId);
- return result;
- }
-
+ TStaticCodecInfo result;
+ TCodecPtr codec = ICodec::GetInstance(info.CodecName);
+ Y_ENSURE_EX(codec, TCodecException() << "empty codec is not allowed");
+
+ codec->LearnX(trainingData.begin(), trainingData.end(), info.SampleSizeMultiplier);
+ {
+ TStringOutput sout{*result.MutableStoredCodec()};
+ ICodec::Store(&sout, codec);
+ }
+
+ auto& debugInfo = *result.MutableDebugInfo();
+ debugInfo.SetStoredCodecHash(DataSignature(result.GetStoredCodec()));
+ debugInfo.SetCodecName(info.CodecName);
+ debugInfo.SetSampleSizeMultiplier(info.SampleSizeMultiplier);
+ debugInfo.SetTimestamp(info.Timestamp);
+ debugInfo.SetRevisionInfo(info.RevisionInfo);
+ debugInfo.SetTrainingSetComment(info.TrainingSetComment);
+ debugInfo.SetTrainingSetResId(info.TrainingSetResId);
+ return result;
+ }
+
TString GetStandardFileName(const TStaticCodecInfo& info) {
TString cName = info.GetDebugInfo().GetCodecName();
- SubstGlobal(cName, ':', '.');
- return TStringBuilder() << cName << "." << info.GetDebugInfo().GetTimestamp() << ".codec_info";
- }
-}
+ SubstGlobal(cName, ':', '.');
+ return TStringBuilder() << cName << "." << info.GetDebugInfo().GetTimestamp() << ".codec_info";
+ }
+}
diff --git a/library/cpp/codecs/static/builder.h b/library/cpp/codecs/static/builder.h
index d7533be4d5..234ad42dff 100644
--- a/library/cpp/codecs/static/builder.h
+++ b/library/cpp/codecs/static/builder.h
@@ -1,29 +1,29 @@
-#pragma once
-
-#include "static.h"
-
+#pragma once
+
+#include "static.h"
+
#include <library/cpp/svnversion/svnversion.h>
-
-#include <util/datetime/base.h>
+
+#include <util/datetime/base.h>
#include <util/generic/string.h>
-#include <util/generic/vector.h>
-#include <util/string/builder.h>
-
-namespace NCodecs {
- struct TCodecBuildInfo {
- // optimal values from SEARCH-1655
+#include <util/generic/vector.h>
+#include <util/string/builder.h>
+
+namespace NCodecs {
+ struct TCodecBuildInfo {
+ // optimal values from SEARCH-1655
TString CodecName = "solar-8k-a:zstd08d-1";
- float SampleSizeMultiplier = 1;
-
- // debug info:
- time_t Timestamp = TInstant::Now().TimeT();
+ float SampleSizeMultiplier = 1;
+
+ // debug info:
+ time_t Timestamp = TInstant::Now().TimeT();
TString RevisionInfo = (TStringBuilder() << "r" << ToString(GetProgramSvnRevision()));
TString TrainingSetComment; // a human comment on the training data
TString TrainingSetResId; // sandbox resid of the training set
- };
-
+ };
+
TStaticCodecInfo BuildStaticCodec(const TVector<TString>& trainingData, const TCodecBuildInfo&);
-
+
TString GetStandardFileName(const TStaticCodecInfo&);
-
-}
+
+}
diff --git a/library/cpp/codecs/static/common.h b/library/cpp/codecs/static/common.h
index 211de2a27d..84b0349d82 100644
--- a/library/cpp/codecs/static/common.h
+++ b/library/cpp/codecs/static/common.h
@@ -1,32 +1,32 @@
-#pragma once
-
-#include <util/string/hex.h>
-#include <util/digest/city.h>
-#include <util/system/byteorder.h>
-
-namespace NCodecs {
- template <class T>
- ui64 DataSignature(const T& t) {
- static_assert(!std::is_scalar<T>::value, "no scalars");
+#pragma once
+
+#include <util/string/hex.h>
+#include <util/digest/city.h>
+#include <util/system/byteorder.h>
+
+namespace NCodecs {
+ template <class T>
+ ui64 DataSignature(const T& t) {
+ static_assert(!std::is_scalar<T>::value, "no scalars");
return CityHash64(t.data(), t.size());
- }
-
- template <class T>
+ }
+
+ template <class T>
TString HexWriteScalar(T t) {
- static_assert(std::is_scalar<T>::value, "scalars only");
- t = LittleToBig(t);
+ static_assert(std::is_scalar<T>::value, "scalars only");
+ t = LittleToBig(t);
TString res = HexEncode(&t, sizeof(t));
- res.to_lower();
- return res;
- }
-
- template <class T>
- T HexReadScalar(TStringBuf s) {
- static_assert(std::is_scalar<T>::value, "scalars only");
- T t = 0;
+ res.to_lower();
+ return res;
+ }
+
+ template <class T>
+ T HexReadScalar(TStringBuf s) {
+ static_assert(std::is_scalar<T>::value, "scalars only");
+ T t = 0;
HexDecode(s.data(), Min(s.size(), sizeof(T)), &t);
- t = BigToLittle(t);
- return t;
- }
-
-}
+ t = BigToLittle(t);
+ return t;
+ }
+
+}
diff --git a/library/cpp/codecs/static/example/example.cpp b/library/cpp/codecs/static/example/example.cpp
index 5b750b717e..0c50a1a5be 100644
--- a/library/cpp/codecs/static/example/example.cpp
+++ b/library/cpp/codecs/static/example/example.cpp
@@ -1,43 +1,43 @@
-#include "example.h"
-
+#include "example.h"
+
#include <library/cpp/codecs/static/static.h>
-
-#include <util/generic/yexception.h>
-
-extern "C" {
+
+#include <util/generic/yexception.h>
+
+extern "C" {
extern const ui8 codec_info_huff_20160707[];
extern const ui32 codec_info_huff_20160707Size;
extern const ui8 codec_info_sa_huff_20160707[];
extern const ui32 codec_info_sa_huff_20160707Size;
-};
-
-namespace NStaticCodecExample {
- static const NCodecs::TCodecConstPtr CODECS[] = {
- nullptr,
- NCodecs::RestoreCodecFromArchive(codec_info_huff_20160707, codec_info_huff_20160707Size),
- NCodecs::RestoreCodecFromArchive(codec_info_sa_huff_20160707, codec_info_sa_huff_20160707Size),
- };
-
- static_assert(Y_ARRAY_SIZE(CODECS) == DV_COUNT, "bad array size");
-
- void Encode(TBuffer& out, TStringBuf in, EDictVersion dv) {
- Y_ENSURE(dv > DV_NULL && dv < DV_COUNT, "invalid dict version: " << (int)dv);
- out.Clear();
- if (!in) {
- return;
- }
- CODECS[dv]->Encode(in, out);
- out.Append((char)dv);
- }
-
- void Decode(TBuffer& out, TStringBuf in) {
- out.Clear();
- if (!in) {
- return;
- }
- EDictVersion dv = (EDictVersion)in.back();
- Y_ENSURE(dv > DV_NULL && dv < DV_COUNT, "invalid dict version: " << (int)dv);
- in.Chop(1);
- CODECS[dv]->Decode(in, out);
- }
-}
+};
+
+namespace NStaticCodecExample {
+ static const NCodecs::TCodecConstPtr CODECS[] = {
+ nullptr,
+ NCodecs::RestoreCodecFromArchive(codec_info_huff_20160707, codec_info_huff_20160707Size),
+ NCodecs::RestoreCodecFromArchive(codec_info_sa_huff_20160707, codec_info_sa_huff_20160707Size),
+ };
+
+ static_assert(Y_ARRAY_SIZE(CODECS) == DV_COUNT, "bad array size");
+
+ void Encode(TBuffer& out, TStringBuf in, EDictVersion dv) {
+ Y_ENSURE(dv > DV_NULL && dv < DV_COUNT, "invalid dict version: " << (int)dv);
+ out.Clear();
+ if (!in) {
+ return;
+ }
+ CODECS[dv]->Encode(in, out);
+ out.Append((char)dv);
+ }
+
+ void Decode(TBuffer& out, TStringBuf in) {
+ out.Clear();
+ if (!in) {
+ return;
+ }
+ EDictVersion dv = (EDictVersion)in.back();
+ Y_ENSURE(dv > DV_NULL && dv < DV_COUNT, "invalid dict version: " << (int)dv);
+ in.Chop(1);
+ CODECS[dv]->Decode(in, out);
+ }
+}
diff --git a/library/cpp/codecs/static/example/example.h b/library/cpp/codecs/static/example/example.h
index f9b3a7324b..070ca90f02 100644
--- a/library/cpp/codecs/static/example/example.h
+++ b/library/cpp/codecs/static/example/example.h
@@ -1,17 +1,17 @@
-#pragma once
-
-#include <util/generic/strbuf.h>
-#include <util/generic/buffer.h>
-
-namespace NStaticCodecExample {
+#pragma once
+
+#include <util/generic/strbuf.h>
+#include <util/generic/buffer.h>
+
+namespace NStaticCodecExample {
enum EDictVersion : ui8 {
DV_NULL = 0,
DV_HUFF_20160707,
DV_SA_HUFF_20160707,
DV_COUNT
- };
-
- void Encode(TBuffer&, TStringBuf, EDictVersion dv = DV_SA_HUFF_20160707);
-
- void Decode(TBuffer&, TStringBuf);
-}
+ };
+
+ void Encode(TBuffer&, TStringBuf, EDictVersion dv = DV_SA_HUFF_20160707);
+
+ void Decode(TBuffer&, TStringBuf);
+}
diff --git a/library/cpp/codecs/static/example/ya.make b/library/cpp/codecs/static/example/ya.make
index ca6c5fd900..85dc222624 100644
--- a/library/cpp/codecs/static/example/ya.make
+++ b/library/cpp/codecs/static/example/ya.make
@@ -1,24 +1,24 @@
-LIBRARY()
-
-OWNER(velavokr)
-
-SRCS(
- GLOBAL example.cpp
-)
-
-PEERDIR(
+LIBRARY()
+
+OWNER(velavokr)
+
+SRCS(
+ GLOBAL example.cpp
+)
+
+PEERDIR(
library/cpp/codecs
library/cpp/codecs/static
-)
-
-ARCHIVE_ASM(
+)
+
+ARCHIVE_ASM(
"solar-8k-a.huffman.1467494385.codec_info"
NAME codec_info_sa_huff_20160707
-)
-
-ARCHIVE_ASM(
+)
+
+ARCHIVE_ASM(
"huffman.1467494385.codec_info"
NAME codec_info_huff_20160707
-)
-
-END()
+)
+
+END()
diff --git a/library/cpp/codecs/static/static.cpp b/library/cpp/codecs/static/static.cpp
index 44a07dd73a..d2c99a15ee 100644
--- a/library/cpp/codecs/static/static.cpp
+++ b/library/cpp/codecs/static/static.cpp
@@ -1,98 +1,98 @@
-#include "static.h"
-#include "common.h"
-
+#include "static.h"
+#include "common.h"
+
#include <library/cpp/codecs/static/static_codec_info.pb.h>
#include <library/cpp/archive/yarchive.h>
-
-#include <util/draft/datetime.h>
-
-#include <util/string/builder.h>
-#include <util/stream/buffer.h>
-#include <util/stream/mem.h>
-#include <util/string/hex.h>
-#include <util/ysaveload.h>
-
-namespace NCodecs {
+
+#include <util/draft/datetime.h>
+
+#include <util/string/builder.h>
+#include <util/stream/buffer.h>
+#include <util/stream/mem.h>
+#include <util/string/hex.h>
+#include <util/ysaveload.h>
+
+namespace NCodecs {
static constexpr TStringBuf STATIC_CODEC_INFO_MAGIC = "CodecInf";
-
- static TStringBuf GetStaticCodecInfoMagic() {
+
+ static TStringBuf GetStaticCodecInfoMagic() {
return STATIC_CODEC_INFO_MAGIC;
- }
-
+ }
+
void SaveCodecInfoToStream(IOutputStream& out, const TStaticCodecInfo& info) {
- TBufferOutput bout;
+ TBufferOutput bout;
info.SerializeToArcadiaStream(&bout);
- ui64 hash = DataSignature(bout.Buffer());
- out.Write(GetStaticCodecInfoMagic());
- ::Save(&out, hash);
- ::Save(&out, bout.Buffer());
- }
-
+ ui64 hash = DataSignature(bout.Buffer());
+ out.Write(GetStaticCodecInfoMagic());
+ ::Save(&out, hash);
+ ::Save(&out, bout.Buffer());
+ }
+
TStaticCodecInfo LoadCodecInfoFromStream(IInputStream& in) {
- {
- TBuffer magic;
+ {
+ TBuffer magic;
magic.Resize(GetStaticCodecInfoMagic().size());
Y_ENSURE_EX(in.Read(magic.Data(), GetStaticCodecInfoMagic().size()) == GetStaticCodecInfoMagic().size(),
- TCodecException() << "bad codec info");
+ TCodecException() << "bad codec info");
Y_ENSURE_EX(TStringBuf(magic.data(), magic.size()) == GetStaticCodecInfoMagic(),
- TCodecException() << "bad codec info");
- }
-
- ui64 hash;
- ::Load(&in, hash);
- TBuffer info;
- ::Load(&in, info);
- Y_ENSURE_EX(hash == DataSignature(info), TCodecException() << "bad codec info");
-
- TStaticCodecInfo result;
+ TCodecException() << "bad codec info");
+ }
+
+ ui64 hash;
+ ::Load(&in, hash);
+ TBuffer info;
+ ::Load(&in, info);
+ Y_ENSURE_EX(hash == DataSignature(info), TCodecException() << "bad codec info");
+
+ TStaticCodecInfo result;
Y_ENSURE_EX(result.ParseFromArray(info.data(), info.size()), TCodecException() << "bad codec info");
-
- return result;
- }
-
+
+ return result;
+ }
+
TString SaveCodecInfoToString(const TStaticCodecInfo& info) {
- TStringStream s;
- SaveCodecInfoToStream(s, info);
- return s.Str();
- }
-
- TStaticCodecInfo LoadCodecInfoFromString(TStringBuf data) {
+ TStringStream s;
+ SaveCodecInfoToStream(s, info);
+ return s.Str();
+ }
+
+ TStaticCodecInfo LoadCodecInfoFromString(TStringBuf data) {
TMemoryInput m{data.data(), data.size()};
- return LoadCodecInfoFromStream(m);
- }
-
+ return LoadCodecInfoFromStream(m);
+ }
+
TString FormatCodecInfo(const TStaticCodecInfo& ci) {
- TStringBuilder s;
- s << "codec name: " << ci.GetDebugInfo().GetCodecName() << Endl;
- s << "codec hash: " << HexWriteScalar(ci.GetDebugInfo().GetStoredCodecHash()) << Endl;
- s << "dict size: " << ci.GetStoredCodec().Size() << Endl;
- s << "sample mult: " << ci.GetDebugInfo().GetSampleSizeMultiplier() << Endl;
- s << "orig.compress: " << ci.GetDebugInfo().GetCompression() * 100 << " %" << Endl;
- s << "timestamp: " << ci.GetDebugInfo().GetTimestamp() << " ("
+ TStringBuilder s;
+ s << "codec name: " << ci.GetDebugInfo().GetCodecName() << Endl;
+ s << "codec hash: " << HexWriteScalar(ci.GetDebugInfo().GetStoredCodecHash()) << Endl;
+ s << "dict size: " << ci.GetStoredCodec().Size() << Endl;
+ s << "sample mult: " << ci.GetDebugInfo().GetSampleSizeMultiplier() << Endl;
+ s << "orig.compress: " << ci.GetDebugInfo().GetCompression() * 100 << " %" << Endl;
+ s << "timestamp: " << ci.GetDebugInfo().GetTimestamp() << " ("
<< NDatetime::TSimpleTM::NewLocal(ci.GetDebugInfo().GetTimestamp()).ToString()
<< ")" << Endl;
- s << "revision: " << ci.GetDebugInfo().GetRevisionInfo() << Endl;
- s << "training set comment: " << ci.GetDebugInfo().GetTrainingSetComment() << Endl;
- s << "training set resId: " << ci.GetDebugInfo().GetTrainingSetResId() << Endl;
- return s;
- }
-
+ s << "revision: " << ci.GetDebugInfo().GetRevisionInfo() << Endl;
+ s << "training set comment: " << ci.GetDebugInfo().GetTrainingSetComment() << Endl;
+ s << "training set resId: " << ci.GetDebugInfo().GetTrainingSetResId() << Endl;
+ return s;
+ }
+
TString LoadStringFromArchive(const ui8* begin, size_t size) {
- TArchiveReader ar(TBlob::NoCopy(begin, size));
- Y_VERIFY(ar.Count() == 1, "invalid number of entries");
- auto blob = ar.ObjectBlobByKey(ar.KeyByIndex(0));
+ TArchiveReader ar(TBlob::NoCopy(begin, size));
+ Y_VERIFY(ar.Count() == 1, "invalid number of entries");
+ auto blob = ar.ObjectBlobByKey(ar.KeyByIndex(0));
return TString{blob.AsCharPtr(), blob.Size()};
- }
-
- TCodecConstPtr RestoreCodecFromCodecInfo(const TStaticCodecInfo& info) {
- return NCodecs::ICodec::RestoreFromString(info.GetStoredCodec());
- }
-
- TCodecConstPtr RestoreCodecFromArchive(const ui8* begin, size_t size) {
- const auto& data = LoadStringFromArchive(begin, size);
- const auto& info = LoadCodecInfoFromString(data);
- const auto& codec = RestoreCodecFromCodecInfo(info);
- Y_ENSURE_EX(codec, TCodecException() << "null codec");
- return codec;
- }
-}
+ }
+
+ TCodecConstPtr RestoreCodecFromCodecInfo(const TStaticCodecInfo& info) {
+ return NCodecs::ICodec::RestoreFromString(info.GetStoredCodec());
+ }
+
+ TCodecConstPtr RestoreCodecFromArchive(const ui8* begin, size_t size) {
+ const auto& data = LoadStringFromArchive(begin, size);
+ const auto& info = LoadCodecInfoFromString(data);
+ const auto& codec = RestoreCodecFromCodecInfo(info);
+ Y_ENSURE_EX(codec, TCodecException() << "null codec");
+ return codec;
+ }
+}
diff --git a/library/cpp/codecs/static/static.h b/library/cpp/codecs/static/static.h
index c1eaed2a74..efa9c60c22 100644
--- a/library/cpp/codecs/static/static.h
+++ b/library/cpp/codecs/static/static.h
@@ -1,34 +1,34 @@
-#pragma once
-
+#pragma once
+
#include <library/cpp/codecs/codecs.h>
-
-#include <util/generic/strbuf.h>
+
+#include <util/generic/strbuf.h>
#include <util/generic/string.h>
#include <util/stream/output.h>
-
-namespace NCodecs {
- class TStaticCodecInfo;
-
- // load
-
- TCodecConstPtr RestoreCodecFromCodecInfo(const TStaticCodecInfo&);
-
- TStaticCodecInfo LoadCodecInfoFromString(TStringBuf data);
-
+
+namespace NCodecs {
+ class TStaticCodecInfo;
+
+ // load
+
+ TCodecConstPtr RestoreCodecFromCodecInfo(const TStaticCodecInfo&);
+
+ TStaticCodecInfo LoadCodecInfoFromString(TStringBuf data);
+
TString LoadStringFromArchive(const ui8* begin, size_t size);
-
- TCodecConstPtr RestoreCodecFromArchive(const ui8* begin, size_t size);
-
- // save
-
+
+ TCodecConstPtr RestoreCodecFromArchive(const ui8* begin, size_t size);
+
+ // save
+
TString SaveCodecInfoToString(const TStaticCodecInfo&);
-
+
void SaveCodecInfoToStream(IOutputStream& out, const TStaticCodecInfo&);
-
- // misc
-
+
+ // misc
+
TStaticCodecInfo LoadCodecInfoFromStream(IInputStream& in);
-
+
TString FormatCodecInfo(const TStaticCodecInfo&);
-
-}
+
+}
diff --git a/library/cpp/codecs/static/static_codec_info.proto b/library/cpp/codecs/static/static_codec_info.proto
index 362abb4dad..178459784b 100644
--- a/library/cpp/codecs/static/static_codec_info.proto
+++ b/library/cpp/codecs/static/static_codec_info.proto
@@ -1,17 +1,17 @@
-package NCodecs;
-
-message TStaticCodecInfo {
- message TDebugInfo {
- optional string CodecName = 1; // the exact codec variant name
- optional uint64 Timestamp = 2; // when the codec was built
- optional string RevisionInfo = 3; // the arcadia revision info
- optional float SampleSizeMultiplier = 4; // how the default sample size was modified to improve compression
- optional float Compression = 5; // the compression on the training set ((raw_size - coded_size) / raw_size)
- optional string TrainingSetComment = 6; // a human readable description of the training set
- optional string TrainingSetResId = 7; // the training set sandbox resource id
- optional uint64 StoredCodecHash = 8; // cityhash64(data)
- }
-
- optional bytes StoredCodec = 1; // the data of the codec
- optional TDebugInfo DebugInfo = 2; // misc debug info which could be useful in finding whereabouts later
-}
+package NCodecs;
+
+message TStaticCodecInfo {
+ message TDebugInfo {
+ optional string CodecName = 1; // the exact codec variant name
+ optional uint64 Timestamp = 2; // when the codec was built
+ optional string RevisionInfo = 3; // the arcadia revision info
+ optional float SampleSizeMultiplier = 4; // how the default sample size was modified to improve compression
+ optional float Compression = 5; // the compression on the training set ((raw_size - coded_size) / raw_size)
+ optional string TrainingSetComment = 6; // a human readable description of the training set
+ optional string TrainingSetResId = 7; // the training set sandbox resource id
+ optional uint64 StoredCodecHash = 8; // cityhash64(data)
+ }
+
+ optional bytes StoredCodec = 1; // the data of the codec
+ optional TDebugInfo DebugInfo = 2; // misc debug info which could be useful in finding whereabouts later
+}
diff --git a/library/cpp/codecs/static/tools/common/ct_common.cpp b/library/cpp/codecs/static/tools/common/ct_common.cpp
index fe77691280..cea40506e1 100644
--- a/library/cpp/codecs/static/tools/common/ct_common.cpp
+++ b/library/cpp/codecs/static/tools/common/ct_common.cpp
@@ -1,74 +1,74 @@
-#include "ct_common.h"
-
+#include "ct_common.h"
+
#include <library/cpp/codecs/codecs.h>
#include <library/cpp/codecs/static/static_codec_info.pb.h>
#include <library/cpp/string_utils/base64/base64.h>
-
+
#include <util/stream/output.h>
-#include <util/string/builder.h>
-#include <util/system/hp_timer.h>
-
-namespace NCodecs {
+#include <util/string/builder.h>
+#include <util/system/hp_timer.h>
+
+namespace NCodecs {
TString TComprStats::Format(const TStaticCodecInfo& info, bool checkMode) const {
- TStringBuilder s;
- s << "raw size/item: " << RawSizePerRecord() << Endl;
- s << "enc.size/item: " << EncSizePerRecord() << Endl;
- if (checkMode) {
- s << "orig.enc.size/item: " << OldEncSizePerRecord(info.GetDebugInfo().GetCompression()) << Endl;
- }
- s << "enc time us/item: " << EncTimePerRecordUS() << Endl;
- s << "dec time us/item: " << DecTimePerRecordUS() << Endl;
- s << "dict size: " << info.GetStoredCodec().Size() << Endl;
- s << "compression: " << AsPercent(Compression()) << " %" << Endl;
- if (checkMode) {
- s << "orig.compression: " << AsPercent(info.GetDebugInfo().GetCompression()) << " %" << Endl;
- }
- return s;
- }
-
+ TStringBuilder s;
+ s << "raw size/item: " << RawSizePerRecord() << Endl;
+ s << "enc.size/item: " << EncSizePerRecord() << Endl;
+ if (checkMode) {
+ s << "orig.enc.size/item: " << OldEncSizePerRecord(info.GetDebugInfo().GetCompression()) << Endl;
+ }
+ s << "enc time us/item: " << EncTimePerRecordUS() << Endl;
+ s << "dec time us/item: " << DecTimePerRecordUS() << Endl;
+ s << "dict size: " << info.GetStoredCodec().Size() << Endl;
+ s << "compression: " << AsPercent(Compression()) << " %" << Endl;
+ if (checkMode) {
+ s << "orig.compression: " << AsPercent(info.GetDebugInfo().GetCompression()) << " %" << Endl;
+ }
+ return s;
+ }
+
TComprStats TestCodec(const ICodec& c, const TVector<TString>& input) {
- TComprStats stats;
-
- TBuffer encodeBuffer;
- TBuffer decodeBuffer;
- for (const auto& data : input) {
- encodeBuffer.Clear();
- decodeBuffer.Clear();
-
- stats.Records += 1;
+ TComprStats stats;
+
+ TBuffer encodeBuffer;
+ TBuffer decodeBuffer;
+ for (const auto& data : input) {
+ encodeBuffer.Clear();
+ decodeBuffer.Clear();
+
+ stats.Records += 1;
stats.RawSize += data.size();
-
- THPTimer timer;
- c.Encode(data, encodeBuffer);
+
+ THPTimer timer;
+ c.Encode(data, encodeBuffer);
stats.EncSize += encodeBuffer.size();
- stats.EncSeconds += timer.PassedReset();
-
+ stats.EncSeconds += timer.PassedReset();
+
c.Decode(TStringBuf{encodeBuffer.data(), encodeBuffer.size()}, decodeBuffer);
- stats.DecSeconds += timer.PassedReset();
+ stats.DecSeconds += timer.PassedReset();
Y_ENSURE(data == TStringBuf(decodeBuffer.data(), decodeBuffer.size()), "invalid encoding at record " << stats.Records);
- }
-
- return stats;
- }
-
+ }
+
+ return stats;
+ }
+
void ParseBlob(TVector<TString>& result, EDataStreamFormat fmt, const TBlob& blob) {
TStringBuf bin(blob.AsCharPtr(), blob.Size());
- TStringBuf line;
+ TStringBuf line;
TString buffer;
- while (bin.ReadLine(line)) {
- if (DSF_BASE64_LF == fmt) {
- Base64Decode(line, buffer);
- line = buffer;
- }
- if (!line) {
- continue;
- }
+ while (bin.ReadLine(line)) {
+ if (DSF_BASE64_LF == fmt) {
+ Base64Decode(line, buffer);
+ line = buffer;
+ }
+ if (!line) {
+ continue;
+ }
result.emplace_back(line.data(), line.size());
- }
- }
-
+ }
+ }
+
TBlob GetInputBlob(const TString& dataFile) {
- return dataFile && dataFile != "-" ? TBlob::FromFile(dataFile) : TBlob::FromStream(Cin);
- }
-
-}
+ return dataFile && dataFile != "-" ? TBlob::FromFile(dataFile) : TBlob::FromStream(Cin);
+ }
+
+}
diff --git a/library/cpp/codecs/static/tools/common/ct_common.h b/library/cpp/codecs/static/tools/common/ct_common.h
index 9d3dcbda93..de531b27e6 100644
--- a/library/cpp/codecs/static/tools/common/ct_common.h
+++ b/library/cpp/codecs/static/tools/common/ct_common.h
@@ -1,75 +1,75 @@
-#pragma once
-
+#pragma once
+
#include <util/generic/string.h>
-#include <util/generic/vector.h>
-#include <util/memory/blob.h>
-#include <cmath>
-
-namespace NCodecs {
- class TStaticCodecInfo;
- class ICodec;
-
- struct TComprStats {
- double EncSeconds = 0;
- double DecSeconds = 0;
- size_t Records = 0;
- size_t RawSize = 0;
- size_t EncSize = 0;
-
- static double Round(double n, size_t decPlaces = 2) {
- double p = pow(10, decPlaces);
- return round(n * p) / p;
- }
-
- static double AsPercent(double n) {
- return Round(n * 100);
- }
-
- static double AsMicroSecond(double s) {
- return s * 1000000;
- }
-
- double PerRecord(double n) const {
- return Round((double)(Records ? n / Records : 0));
- }
-
- double Compression() const {
- return ((double)RawSize - (double)EncSize) / RawSize;
- }
-
- double EncTimePerRecordUS() const {
- return PerRecord(AsMicroSecond(EncSeconds));
- }
-
- double DecTimePerRecordUS() const {
- return PerRecord(AsMicroSecond(DecSeconds));
- }
-
- double RawSizePerRecord() const {
- return PerRecord(RawSize);
- }
-
- double EncSizePerRecord() const {
- return PerRecord(EncSize);
- }
-
- double OldEncSizePerRecord(double compr) const {
- return PerRecord((1 - compr) * RawSize);
- }
-
+#include <util/generic/vector.h>
+#include <util/memory/blob.h>
+#include <cmath>
+
+namespace NCodecs {
+ class TStaticCodecInfo;
+ class ICodec;
+
+ struct TComprStats {
+ double EncSeconds = 0;
+ double DecSeconds = 0;
+ size_t Records = 0;
+ size_t RawSize = 0;
+ size_t EncSize = 0;
+
+ static double Round(double n, size_t decPlaces = 2) {
+ double p = pow(10, decPlaces);
+ return round(n * p) / p;
+ }
+
+ static double AsPercent(double n) {
+ return Round(n * 100);
+ }
+
+ static double AsMicroSecond(double s) {
+ return s * 1000000;
+ }
+
+ double PerRecord(double n) const {
+ return Round((double)(Records ? n / Records : 0));
+ }
+
+ double Compression() const {
+ return ((double)RawSize - (double)EncSize) / RawSize;
+ }
+
+ double EncTimePerRecordUS() const {
+ return PerRecord(AsMicroSecond(EncSeconds));
+ }
+
+ double DecTimePerRecordUS() const {
+ return PerRecord(AsMicroSecond(DecSeconds));
+ }
+
+ double RawSizePerRecord() const {
+ return PerRecord(RawSize);
+ }
+
+ double EncSizePerRecord() const {
+ return PerRecord(EncSize);
+ }
+
+ double OldEncSizePerRecord(double compr) const {
+ return PerRecord((1 - compr) * RawSize);
+ }
+
TString Format(const TStaticCodecInfo&, bool checkMode) const;
- };
-
+ };
+
TComprStats TestCodec(const ICodec&, const TVector<TString>& data);
-
- enum EDataStreamFormat {
- DSF_NONE,
- DSF_PLAIN_LF /* "plain" */,
- DSF_BASE64_LF /* "base64" */,
- };
-
+
+ enum EDataStreamFormat {
+ DSF_NONE,
+ DSF_PLAIN_LF /* "plain" */,
+ DSF_BASE64_LF /* "base64" */,
+ };
+
void ParseBlob(TVector<TString>&, EDataStreamFormat, const TBlob&);
-
+
TBlob GetInputBlob(const TString& dataFile);
-
-}
+
+}
diff --git a/library/cpp/codecs/static/tools/common/ya.make b/library/cpp/codecs/static/tools/common/ya.make
index d624222dad..5f575a2f28 100644
--- a/library/cpp/codecs/static/tools/common/ya.make
+++ b/library/cpp/codecs/static/tools/common/ya.make
@@ -1,19 +1,19 @@
-LIBRARY()
-
+LIBRARY()
+
OWNER(velavokr)
-
-SRCS(
- ct_common.cpp
-)
-
-PEERDIR(
+
+SRCS(
+ ct_common.cpp
+)
+
+PEERDIR(
library/cpp/codecs
library/cpp/codecs/static
library/cpp/getopt/small
library/cpp/string_utils/base64
- util/draft
-)
-
+ util/draft
+)
+
GENERATE_ENUM_SERIALIZATION(ct_common.h)
-
-END()
+
+END()
diff --git a/library/cpp/codecs/static/tools/static_codec_checker/README b/library/cpp/codecs/static/tools/static_codec_checker/README
index 723a68300b..c66703227d 100644
--- a/library/cpp/codecs/static/tools/static_codec_checker/README
+++ b/library/cpp/codecs/static/tools/static_codec_checker/README
@@ -1,4 +1,4 @@
This is a viewer for generated codec and utility for verification of the compression quality on a new data.
-
+
Usage:
-static_codec_checker -t -c 029b29ff64a74927.codec_info -f plain samples.txt
+static_codec_checker -t -c 029b29ff64a74927.codec_info -f plain samples.txt
diff --git a/library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp b/library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp
index 9c8d568d82..5ae901d8f8 100644
--- a/library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp
+++ b/library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp
@@ -3,25 +3,25 @@
#include <library/cpp/codecs/static/static_codec_info.pb.h>
#include <library/cpp/codecs/codecs.h>
#include <library/cpp/getopt/small/last_getopt.h>
-
-#include <util/digest/city.h>
-#include <util/generic/yexception.h>
-#include <util/stream/file.h>
-#include <util/stream/buffer.h>
-#include <util/stream/format.h>
-#include <util/string/builder.h>
-
-int main(int argc, char** argv) {
- NCodecs::TCodecPtr codecPtr;
- NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE;
+
+#include <util/digest/city.h>
+#include <util/generic/yexception.h>
+#include <util/stream/file.h>
+#include <util/stream/buffer.h>
+#include <util/stream/format.h>
+#include <util/string/builder.h>
+
+int main(int argc, char** argv) {
+ NCodecs::TCodecPtr codecPtr;
+ NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE;
TString codecFile;
- bool testCompression = false;
-
- auto opts = NLastGetopt::TOpts::Default();
- opts.SetTitle("Prints a .codec_info file and optionally checks its performance on new data. See also static_codec_generator.");
- opts.SetCmdLineDescr("-c 9089f3e9b7a0f0d4.codec_info -t -f base64 qtrees.sample.txt");
- NCodecs::TStaticCodecInfo codec;
-
+ bool testCompression = false;
+
+ auto opts = NLastGetopt::TOpts::Default();
+ opts.SetTitle("Prints a .codec_info file and optionally checks its performance on new data. See also static_codec_generator.");
+ opts.SetCmdLineDescr("-c 9089f3e9b7a0f0d4.codec_info -t -f base64 qtrees.sample.txt");
+ NCodecs::TStaticCodecInfo codec;
+
opts.AddLongOption('c', "codec-info").RequiredArgument("codec_info").Handler1T<TString>([&codecFile, &codec, &codecPtr](TString name) {
codecFile = name;
codec.CopyFrom(NCodecs::LoadCodecInfoFromString(TUnbufferedFileInput(name).ReadAll()));
@@ -29,45 +29,45 @@ int main(int argc, char** argv) {
})
.Required()
.Help(".codec_info file with serialized static data for codec");
-
+
opts.AddLongOption('t', "test").NoArgument().StoreValue(&testCompression, true).Optional().Help("test current performance");
-
+
opts.AddLongOption('f', "format").RequiredArgument(TStringBuilder() << "(" << NCodecs::DSF_PLAIN_LF << "|" << NCodecs::DSF_BASE64_LF << ")").StoreResult(&fmt).Optional().Help("test set input file format");
-
- opts.SetFreeArgsMin(0);
- opts.SetFreeArgTitle(0, "testing_set_input_file", "testing set input files");
-
- NLastGetopt::TOptsParseResult res(&opts, argc, argv);
-
- Cout << codecFile << Endl;
- Cout << NCodecs::FormatCodecInfo(codec) << Endl;
-
- if (testCompression) {
- if (NCodecs::DSF_NONE == fmt) {
- Cerr << "Specify format (-f|--format) for testing set input" << Endl;
- exit(1);
- }
-
- Cout << "Reading testing set data ... " << Flush;
-
+
+ opts.SetFreeArgsMin(0);
+ opts.SetFreeArgTitle(0, "testing_set_input_file", "testing set input files");
+
+ NLastGetopt::TOptsParseResult res(&opts, argc, argv);
+
+ Cout << codecFile << Endl;
+ Cout << NCodecs::FormatCodecInfo(codec) << Endl;
+
+ if (testCompression) {
+ if (NCodecs::DSF_NONE == fmt) {
+ Cerr << "Specify format (-f|--format) for testing set input" << Endl;
+ exit(1);
+ }
+
+ Cout << "Reading testing set data ... " << Flush;
+
TVector<TString> allData;
- for (const auto& freeArg : res.GetFreeArgs()) {
- NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg));
- }
-
- if (!res.GetFreeArgs()) {
- NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-"));
- }
-
- Cout << "Done" << Endl << Endl;
-
- Cout << "records: " << allData.size() << Endl;
- Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl;
-
- Cout << "Testing compression ... " << Flush;
- auto stats = NCodecs::TestCodec(*codecPtr, allData);
- Cout << "Done" << Endl << Endl;
-
- Cout << stats.Format(codec, true) << Endl;
- }
-}
+ for (const auto& freeArg : res.GetFreeArgs()) {
+ NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg));
+ }
+
+ if (!res.GetFreeArgs()) {
+ NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-"));
+ }
+
+ Cout << "Done" << Endl << Endl;
+
+ Cout << "records: " << allData.size() << Endl;
+ Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl;
+
+ Cout << "Testing compression ... " << Flush;
+ auto stats = NCodecs::TestCodec(*codecPtr, allData);
+ Cout << "Done" << Endl << Endl;
+
+ Cout << stats.Format(codec, true) << Endl;
+ }
+}
diff --git a/library/cpp/codecs/static/tools/static_codec_checker/ya.make b/library/cpp/codecs/static/tools/static_codec_checker/ya.make
index 90e06ca448..86b73dff6c 100644
--- a/library/cpp/codecs/static/tools/static_codec_checker/ya.make
+++ b/library/cpp/codecs/static/tools/static_codec_checker/ya.make
@@ -1,16 +1,16 @@
-PROGRAM()
-
+PROGRAM()
+
OWNER(velavokr)
-
-SRCS(
- static_codec_checker.cpp
-)
-
-PEERDIR(
+
+SRCS(
+ static_codec_checker.cpp
+)
+
+PEERDIR(
library/cpp/codecs
library/cpp/codecs/static
library/cpp/codecs/static/tools/common
library/cpp/getopt/small
-)
-
-END()
+)
+
+END()
diff --git a/library/cpp/codecs/static/tools/static_codec_generator/README b/library/cpp/codecs/static/tools/static_codec_generator/README
index e6bb52b959..f0fffd745a 100644
--- a/library/cpp/codecs/static/tools/static_codec_generator/README
+++ b/library/cpp/codecs/static/tools/static_codec_generator/README
@@ -1,4 +1,4 @@
This is a utility for reproducible teaching of a codec. And also for saving it into a file with a unique name for a static compilation as a resource.
-
+
Usage:
-static_codec_generator -t -m 'the training data description' -f plain samples.txt
+static_codec_generator -t -m 'the training data description' -f plain samples.txt
diff --git a/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp b/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp
index 45fdb5c5fe..b37a0f686d 100644
--- a/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp
+++ b/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp
@@ -2,81 +2,81 @@
#include <library/cpp/codecs/static/static_codec_info.pb.h>
#include <library/cpp/codecs/static/builder.h>
#include <library/cpp/codecs/codecs.h>
-
+
#include <library/cpp/getopt/small/last_getopt.h>
-
-#include <util/generic/yexception.h>
-#include <util/stream/file.h>
-#include <util/string/builder.h>
-
-int main(int argc, char** argv) {
- NCodecs::TCodecBuildInfo info;
- NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE;
-
- auto opts = NLastGetopt::TOpts::Default();
- opts.SetCmdLineDescr("-m 'Training set: 100000 qtrees taken from web mmeta logs' -f base64 qtrees.sample.txt");
- opts.SetTitle("Teaches the codec and serializes it as a file named CODECNAME.hash(CODECDATA).bin");
-
+
+#include <util/generic/yexception.h>
+#include <util/stream/file.h>
+#include <util/string/builder.h>
+
+int main(int argc, char** argv) {
+ NCodecs::TCodecBuildInfo info;
+ NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE;
+
+ auto opts = NLastGetopt::TOpts::Default();
+ opts.SetCmdLineDescr("-m 'Training set: 100000 qtrees taken from web mmeta logs' -f base64 qtrees.sample.txt");
+ opts.SetTitle("Teaches the codec and serializes it as a file named CODECNAME.hash(CODECDATA).bin");
+
opts.AddLongOption('m', "message").RequiredArgument("training_set_comment").StoreResult(&info.TrainingSetComment).Required().Help("a human description for the training set");
-
+
opts.AddLongOption('r', "resource").RequiredArgument("training_set_res_id").StoreResult(&info.TrainingSetResId).Optional().Help("sandbox resource id for the training set");
-
+
opts.AddLongOption('c', "codec").RequiredArgument("codec_name").StoreResult(&info.CodecName).Optional().DefaultValue(info.CodecName);
-
+
opts.AddLongOption('s', "sample-multiplier").RequiredArgument("multiplier").StoreResult(&info.SampleSizeMultiplier).Optional().DefaultValue(ToString(info.SampleSizeMultiplier)).Help("multiplier for default sample size");
-
+
opts.AddLongOption('f', "format").RequiredArgument(TStringBuilder() << "(" << NCodecs::DSF_PLAIN_LF << "|" << NCodecs::DSF_BASE64_LF << ")").StoreResult(&fmt).Required().Help("training set input file format");
-
+
opts.AddLongOption("list-codecs").NoArgument().Handler0([]() {
Cout << JoinStrings(NCodecs::ICodec::GetCodecsList(), "\n") << Endl;
exit(0);
})
.Optional()
.Help("list available codecs");
-
+
opts.AddLongOption("fake-revision").RequiredArgument("revision").StoreResult(&info.RevisionInfo).Optional().Hidden(); // replace static_codec_generator revision in debug info
-
+
opts.AddLongOption("fake-timestamp").RequiredArgument("timestamp").StoreResult(&info.Timestamp).Optional().Hidden(); // replace generating timestamp in debug info
-
- opts.SetFreeArgsMin(0);
- opts.SetFreeArgTitle(0, "training_set_input_file", "training set input files");
-
- NLastGetopt::TOptsParseResult res(&opts, argc, argv);
-
- Cout << "Reading training set data ... " << Flush;
+
+ opts.SetFreeArgsMin(0);
+ opts.SetFreeArgTitle(0, "training_set_input_file", "training set input files");
+
+ NLastGetopt::TOptsParseResult res(&opts, argc, argv);
+
+ Cout << "Reading training set data ... " << Flush;
TVector<TString> allData;
- for (const auto& freeArg : res.GetFreeArgs()) {
- NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg));
- }
-
- if (!res.GetFreeArgs()) {
- NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-"));
- }
- Cout << "Done" << Endl << Endl;
-
- Cout << "records: " << allData.size() << Endl;
- Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl;
-
- Cout << "Training " << info.CodecName << " , sample size multiplier is " << info.SampleSizeMultiplier << " ... " << Flush;
- auto codec = NCodecs::BuildStaticCodec(allData, info);
- Cout << "Done" << Endl;
-
+ for (const auto& freeArg : res.GetFreeArgs()) {
+ NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg));
+ }
+
+ if (!res.GetFreeArgs()) {
+ NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-"));
+ }
+ Cout << "Done" << Endl << Endl;
+
+ Cout << "records: " << allData.size() << Endl;
+ Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl;
+
+ Cout << "Training " << info.CodecName << " , sample size multiplier is " << info.SampleSizeMultiplier << " ... " << Flush;
+ auto codec = NCodecs::BuildStaticCodec(allData, info);
+ Cout << "Done" << Endl;
+
TString codecName = NCodecs::GetStandardFileName(codec);
- NCodecs::TCodecPtr codecPtr = NCodecs::ICodec::RestoreFromString(codec.GetStoredCodec());
-
- Cout << "Testing compression ... " << Flush;
- auto stats = NCodecs::TestCodec(*codecPtr, allData);
- Cout << "Done" << Endl << Endl;
-
- codec.MutableDebugInfo()->SetCompression(stats.Compression());
-
- Cout << stats.Format(codec, false) << Endl;
-
- Cout << "Saving as " << codecName << " ... " << Flush;
- {
+ NCodecs::TCodecPtr codecPtr = NCodecs::ICodec::RestoreFromString(codec.GetStoredCodec());
+
+ Cout << "Testing compression ... " << Flush;
+ auto stats = NCodecs::TestCodec(*codecPtr, allData);
+ Cout << "Done" << Endl << Endl;
+
+ codec.MutableDebugInfo()->SetCompression(stats.Compression());
+
+ Cout << stats.Format(codec, false) << Endl;
+
+ Cout << "Saving as " << codecName << " ... " << Flush;
+ {
TUnbufferedFileOutput fout{codecName};
- NCodecs::SaveCodecInfoToStream(fout, codec);
- fout.Finish();
- }
- Cout << "Done" << Endl << Endl;
-}
+ NCodecs::SaveCodecInfoToStream(fout, codec);
+ fout.Finish();
+ }
+ Cout << "Done" << Endl << Endl;
+}
diff --git a/library/cpp/codecs/static/tools/static_codec_generator/ya.make b/library/cpp/codecs/static/tools/static_codec_generator/ya.make
index efbc440dd1..21750dde49 100644
--- a/library/cpp/codecs/static/tools/static_codec_generator/ya.make
+++ b/library/cpp/codecs/static/tools/static_codec_generator/ya.make
@@ -1,17 +1,17 @@
-PROGRAM()
-
+PROGRAM()
+
OWNER(velavokr)
-
-SRCS(
- static_codec_generator.cpp
-)
-
-PEERDIR(
+
+SRCS(
+ static_codec_generator.cpp
+)
+
+PEERDIR(
library/cpp/codecs
library/cpp/codecs/static
library/cpp/codecs/static/tools/common
library/cpp/digest/md5
library/cpp/getopt/small
-)
-
-END()
+)
+
+END()
diff --git a/library/cpp/codecs/static/tools/tests/static_codec_tools.py b/library/cpp/codecs/static/tools/tests/static_codec_tools.py
index db4140e370..a5baa262f7 100644
--- a/library/cpp/codecs/static/tools/tests/static_codec_tools.py
+++ b/library/cpp/codecs/static/tools/tests/static_codec_tools.py
@@ -1,18 +1,18 @@
-#!/usr/bin/env python
-
-import yatest.common as tt
-import os.path as op
-
-def test_static_codec_tools():
+#!/usr/bin/env python
+
+import yatest.common as tt
+import os.path as op
+
+def test_static_codec_tools():
tt.execute([tt.binary_path("library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator")]
- + ["-m", "test codec", "-r", "sbr://143310406", "-f", "plain", "-c", "solar-8k-a:huffman", "-s", "1",
- "--fake-revision", "r2385905", "--fake-timestamp", "1467494385", "sample.txt"],
- timeout=60)
- assert(op.exists("solar-8k-a.huffman.1467494385.codec_info"))
+ + ["-m", "test codec", "-r", "sbr://143310406", "-f", "plain", "-c", "solar-8k-a:huffman", "-s", "1",
+ "--fake-revision", "r2385905", "--fake-timestamp", "1467494385", "sample.txt"],
+ timeout=60)
+ assert(op.exists("solar-8k-a.huffman.1467494385.codec_info"))
tt.canonical_execute(tt.binary_path("library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker"),
- args=["-c", "solar-8k-a.huffman.1467494385.codec_info"],
- timeout=60)
+ args=["-c", "solar-8k-a.huffman.1467494385.codec_info"],
+ timeout=60)
tt.execute([tt.binary_path("library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker")]
- + ["-c", "solar-8k-a.huffman.1467494385.codec_info", "-f", "plain", "-t", "sample.txt"],
- timeout=60)
- return tt.canonical_file("solar-8k-a.huffman.1467494385.codec_info")
+ + ["-c", "solar-8k-a.huffman.1467494385.codec_info", "-f", "plain", "-t", "sample.txt"],
+ timeout=60)
+ return tt.canonical_file("solar-8k-a.huffman.1467494385.codec_info")
diff --git a/library/cpp/codecs/static/tools/tests/ya.make b/library/cpp/codecs/static/tools/tests/ya.make
index c5324eaf53..5555d90cae 100644
--- a/library/cpp/codecs/static/tools/tests/ya.make
+++ b/library/cpp/codecs/static/tools/tests/ya.make
@@ -1,20 +1,20 @@
PY2TEST()
-
-OWNER(velavokr)
-
-TEST_SRCS(static_codec_tools.py)
-
-DATA(sbr://143310406)
-
-TIMEOUT(4200)
-
+
+OWNER(velavokr)
+
+TEST_SRCS(static_codec_tools.py)
+
+DATA(sbr://143310406)
+
+TIMEOUT(4200)
+
TAG(ya:not_autocheck)
-DEPENDS(
+DEPENDS(
library/cpp/codecs/static/tools/static_codec_checker
library/cpp/codecs/static/tools/static_codec_generator
-)
-
-
+)
+
+
-END()
+END()
diff --git a/library/cpp/codecs/static/tools/ya.make b/library/cpp/codecs/static/tools/ya.make
index dd3e8437aa..ab72769153 100644
--- a/library/cpp/codecs/static/tools/ya.make
+++ b/library/cpp/codecs/static/tools/ya.make
@@ -1,5 +1,5 @@
-RECURSE(
- common
- static_codec_generator
- static_codec_checker
-)
+RECURSE(
+ common
+ static_codec_generator
+ static_codec_checker
+)
diff --git a/library/cpp/codecs/static/ut/builder_ut.cpp b/library/cpp/codecs/static/ut/builder_ut.cpp
index b47c279ed1..48d5c98d5d 100644
--- a/library/cpp/codecs/static/ut/builder_ut.cpp
+++ b/library/cpp/codecs/static/ut/builder_ut.cpp
@@ -1,57 +1,57 @@
#include <library/cpp/testing/unittest/registar.h>
#include <library/cpp/codecs/static/builder.h>
#include <library/cpp/codecs/static/static_codec_info.pb.h>
-#include <util/string/vector.h>
-
+#include <util/string/vector.h>
+
class TStaticCodecInfoBuilderTest: public NUnitTest::TTestBase {
- UNIT_TEST_SUITE(TStaticCodecInfoBuilderTest)
+ UNIT_TEST_SUITE(TStaticCodecInfoBuilderTest)
UNIT_TEST(TestBuild)
- UNIT_TEST_SUITE_END();
+ UNIT_TEST_SUITE_END();
-private:
+private:
TVector<TString> PrepareData() {
TVector<TString> data;
- for (ui32 i = 'a'; i <= 'z'; ++i) {
+ for (ui32 i = 'a'; i <= 'z'; ++i) {
data.push_back(TString(1, (char)i));
- }
- return data;
- }
-
- void TestBuild() {
+ }
+ return data;
+ }
+
+ void TestBuild() {
TVector<TString> data;
- NCodecs::TCodecBuildInfo info;
- info.CodecName = "huffman";
- info.SampleSizeMultiplier = 2;
- info.Timestamp = 1467494385;
- info.RevisionInfo = "r2385905";
- info.TrainingSetComment = "some dummy data";
- info.TrainingSetResId = "sbr://1234";
- auto res = NCodecs::BuildStaticCodec(PrepareData(), info);
- UNIT_ASSERT_VALUES_EQUAL(res.ShortUtf8DebugString(),
- "StoredCodec: \"\\007\\000huffman@S\\000a"
- "\\006b\\005c\\005d\\005e\\005f\\005g\\005h\\005i\\005j\\005k\\005l\\005m\\005n\\005o"
- "\\005p\\005q\\005r\\005s\\005t\\005u\\004v\\004w\\004x\\004y\\004z\\004\xC7?\xC8>"
- "\xC9=\xCA<\xCB;\xCC:\3159\3168\3177\3206\3215\3224\3233\3242\3251\3260\xD7/\xD8."
- "\xD9-\xDA,\xDB+\xDC*\xDD)\xDE(\xDF\\'\xE0&\xE1%\xE2$\xE3#\xE4\\\"\xE5!\xE6 \xE7"
- "\\037\xE8\\036\xE9\\035\xEA\\034\xEB\\033\xEC\\032\xED\\031\xEE\\030\xEF\\027\xF0"
- "\\026\xF1\\025\xF2\\024\xF3\\023\xF4\\022\xF5\\021\xF6\\020\xF7\\017\xF8\\016\xF9"
- "\\r\xFA\\014\xFB\\013\xFC\\n\xFD\\t\xFE\\010\xFF\\007\" "
- "DebugInfo { "
- "CodecName: \"huffman\" "
- "Timestamp: 1467494385 "
- "RevisionInfo: \"r2385905\" "
- "SampleSizeMultiplier: 2 "
- "TrainingSetComment: \"some dummy data\" "
- "TrainingSetResId: \"sbr://1234\" "
- "StoredCodecHash: 2509195835471488613 "
- "}");
-
- UNIT_ASSERT_VALUES_EQUAL(NCodecs::GetStandardFileName(res), "huffman.1467494385.codec_info");
- UNIT_ASSERT_VALUES_EQUAL(res.GetDebugInfo().GetStoredCodecHash(), 2509195835471488613ULL);
-
- auto res1 = NCodecs::LoadCodecInfoFromString(NCodecs::SaveCodecInfoToString(res));
- UNIT_ASSERT_VALUES_EQUAL(res1.ShortUtf8DebugString(), res.ShortUtf8DebugString());
- }
-};
-
-UNIT_TEST_SUITE_REGISTRATION(TStaticCodecInfoBuilderTest);
+ NCodecs::TCodecBuildInfo info;
+ info.CodecName = "huffman";
+ info.SampleSizeMultiplier = 2;
+ info.Timestamp = 1467494385;
+ info.RevisionInfo = "r2385905";
+ info.TrainingSetComment = "some dummy data";
+ info.TrainingSetResId = "sbr://1234";
+ auto res = NCodecs::BuildStaticCodec(PrepareData(), info);
+ UNIT_ASSERT_VALUES_EQUAL(res.ShortUtf8DebugString(),
+ "StoredCodec: \"\\007\\000huffman@S\\000a"
+ "\\006b\\005c\\005d\\005e\\005f\\005g\\005h\\005i\\005j\\005k\\005l\\005m\\005n\\005o"
+ "\\005p\\005q\\005r\\005s\\005t\\005u\\004v\\004w\\004x\\004y\\004z\\004\xC7?\xC8>"
+ "\xC9=\xCA<\xCB;\xCC:\3159\3168\3177\3206\3215\3224\3233\3242\3251\3260\xD7/\xD8."
+ "\xD9-\xDA,\xDB+\xDC*\xDD)\xDE(\xDF\\'\xE0&\xE1%\xE2$\xE3#\xE4\\\"\xE5!\xE6 \xE7"
+ "\\037\xE8\\036\xE9\\035\xEA\\034\xEB\\033\xEC\\032\xED\\031\xEE\\030\xEF\\027\xF0"
+ "\\026\xF1\\025\xF2\\024\xF3\\023\xF4\\022\xF5\\021\xF6\\020\xF7\\017\xF8\\016\xF9"
+ "\\r\xFA\\014\xFB\\013\xFC\\n\xFD\\t\xFE\\010\xFF\\007\" "
+ "DebugInfo { "
+ "CodecName: \"huffman\" "
+ "Timestamp: 1467494385 "
+ "RevisionInfo: \"r2385905\" "
+ "SampleSizeMultiplier: 2 "
+ "TrainingSetComment: \"some dummy data\" "
+ "TrainingSetResId: \"sbr://1234\" "
+ "StoredCodecHash: 2509195835471488613 "
+ "}");
+
+ UNIT_ASSERT_VALUES_EQUAL(NCodecs::GetStandardFileName(res), "huffman.1467494385.codec_info");
+ UNIT_ASSERT_VALUES_EQUAL(res.GetDebugInfo().GetStoredCodecHash(), 2509195835471488613ULL);
+
+ auto res1 = NCodecs::LoadCodecInfoFromString(NCodecs::SaveCodecInfoToString(res));
+ UNIT_ASSERT_VALUES_EQUAL(res1.ShortUtf8DebugString(), res.ShortUtf8DebugString());
+ }
+};
+
+UNIT_TEST_SUITE_REGISTRATION(TStaticCodecInfoBuilderTest);
diff --git a/library/cpp/codecs/static/ut/static_ut.cpp b/library/cpp/codecs/static/ut/static_ut.cpp
index 57e1e62887..fda9ffcccb 100644
--- a/library/cpp/codecs/static/ut/static_ut.cpp
+++ b/library/cpp/codecs/static/ut/static_ut.cpp
@@ -1,27 +1,27 @@
#include <library/cpp/testing/unittest/registar.h>
#include <library/cpp/codecs/static/example/example.h>
-
+
class TStaticCodecUsageTest: public NUnitTest::TTestBase {
- UNIT_TEST_SUITE(TStaticCodecUsageTest)
+ UNIT_TEST_SUITE(TStaticCodecUsageTest)
UNIT_TEST(TestUsage)
- UNIT_TEST_SUITE_END();
+ UNIT_TEST_SUITE_END();
-private:
- void DoTestUsage(NStaticCodecExample::EDictVersion dv, size_t expectedSize) {
+private:
+ void DoTestUsage(NStaticCodecExample::EDictVersion dv, size_t expectedSize) {
const TStringBuf letov = "Всё идёт по плану";
-
- TBuffer outEnc, outDec;
- NStaticCodecExample::Encode(outEnc, letov, dv);
+
+ TBuffer outEnc, outDec;
+ NStaticCodecExample::Encode(outEnc, letov, dv);
NStaticCodecExample::Decode(outDec, TStringBuf{outEnc.data(), outEnc.size()});
-
- UNIT_ASSERT_VALUES_EQUAL(outEnc.Size(), expectedSize);
+
+ UNIT_ASSERT_VALUES_EQUAL(outEnc.Size(), expectedSize);
UNIT_ASSERT_EQUAL(TStringBuf(outDec.data(), outDec.size()), letov);
- }
-
- void TestUsage() {
- DoTestUsage(NStaticCodecExample::DV_HUFF_20160707, 18u);
- DoTestUsage(NStaticCodecExample::DV_SA_HUFF_20160707, 22u);
- }
-};
-
-UNIT_TEST_SUITE_REGISTRATION(TStaticCodecUsageTest)
+ }
+
+ void TestUsage() {
+ DoTestUsage(NStaticCodecExample::DV_HUFF_20160707, 18u);
+ DoTestUsage(NStaticCodecExample::DV_SA_HUFF_20160707, 22u);
+ }
+};
+
+UNIT_TEST_SUITE_REGISTRATION(TStaticCodecUsageTest)
diff --git a/library/cpp/codecs/static/ut/ya.make b/library/cpp/codecs/static/ut/ya.make
index b9116097d8..5bb2017fac 100644
--- a/library/cpp/codecs/static/ut/ya.make
+++ b/library/cpp/codecs/static/ut/ya.make
@@ -1,14 +1,14 @@
UNITTEST_FOR(library/cpp/codecs/static)
-
-OWNER(velavokr)
-
-SRCS(
- builder_ut.cpp
- static_ut.cpp
-)
-
-PEERDIR(
+
+OWNER(velavokr)
+
+SRCS(
+ builder_ut.cpp
+ static_ut.cpp
+)
+
+PEERDIR(
library/cpp/codecs/static/example
-)
-
-END()
+)
+
+END()
diff --git a/library/cpp/codecs/static/ya.make b/library/cpp/codecs/static/ya.make
index 00e00fd8d4..a2698b9432 100644
--- a/library/cpp/codecs/static/ya.make
+++ b/library/cpp/codecs/static/ya.make
@@ -1,18 +1,18 @@
-LIBRARY()
-
-OWNER(velavokr)
-
-SRCS(
- builder.cpp
- static_codec_info.proto
- static.cpp
-)
-
-PEERDIR(
+LIBRARY()
+
+OWNER(velavokr)
+
+SRCS(
+ builder.cpp
+ static_codec_info.proto
+ static.cpp
+)
+
+PEERDIR(
library/cpp/codecs
library/cpp/archive
library/cpp/svnversion
- util/draft
-)
-
-END()
+ util/draft
+)
+
+END()