aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/codecs/static/tools
diff options
context:
space:
mode:
authorRuslan Kovalev <ruslan.a.kovalev@gmail.com>2022-02-10 16:46:44 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:44 +0300
commit59e19371de37995fcb36beb16cd6ec030af960bc (patch)
treefa68e36093ebff8b805462e9e6d331fe9d348214 /library/cpp/codecs/static/tools
parent89db6fe2fe2c32d2a832ddfeb04e8d078e301084 (diff)
downloadydb-59e19371de37995fcb36beb16cd6ec030af960bc.tar.gz
Restoring authorship annotation for Ruslan Kovalev <ruslan.a.kovalev@gmail.com>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/codecs/static/tools')
-rw-r--r--library/cpp/codecs/static/tools/common/ct_common.cpp118
-rw-r--r--library/cpp/codecs/static/tools/common/ct_common.h140
-rw-r--r--library/cpp/codecs/static/tools/common/ya.make26
-rw-r--r--library/cpp/codecs/static/tools/static_codec_checker/README4
-rw-r--r--library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp114
-rw-r--r--library/cpp/codecs/static/tools/static_codec_checker/ya.make22
-rw-r--r--library/cpp/codecs/static/tools/static_codec_generator/README4
-rw-r--r--library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp122
-rw-r--r--library/cpp/codecs/static/tools/static_codec_generator/ya.make22
-rw-r--r--library/cpp/codecs/static/tools/tests/static_codec_tools.py30
-rw-r--r--library/cpp/codecs/static/tools/tests/ya.make28
-rw-r--r--library/cpp/codecs/static/tools/ya.make10
12 files changed, 320 insertions, 320 deletions
diff --git a/library/cpp/codecs/static/tools/common/ct_common.cpp b/library/cpp/codecs/static/tools/common/ct_common.cpp
index fe776912805..cea40506e13 100644
--- a/library/cpp/codecs/static/tools/common/ct_common.cpp
+++ b/library/cpp/codecs/static/tools/common/ct_common.cpp
@@ -1,74 +1,74 @@
-#include "ct_common.h"
-
+#include "ct_common.h"
+
#include <library/cpp/codecs/codecs.h>
#include <library/cpp/codecs/static/static_codec_info.pb.h>
#include <library/cpp/string_utils/base64/base64.h>
-
+
#include <util/stream/output.h>
-#include <util/string/builder.h>
-#include <util/system/hp_timer.h>
-
-namespace NCodecs {
+#include <util/string/builder.h>
+#include <util/system/hp_timer.h>
+
+namespace NCodecs {
TString TComprStats::Format(const TStaticCodecInfo& info, bool checkMode) const {
- TStringBuilder s;
- s << "raw size/item: " << RawSizePerRecord() << Endl;
- s << "enc.size/item: " << EncSizePerRecord() << Endl;
- if (checkMode) {
- s << "orig.enc.size/item: " << OldEncSizePerRecord(info.GetDebugInfo().GetCompression()) << Endl;
- }
- s << "enc time us/item: " << EncTimePerRecordUS() << Endl;
- s << "dec time us/item: " << DecTimePerRecordUS() << Endl;
- s << "dict size: " << info.GetStoredCodec().Size() << Endl;
- s << "compression: " << AsPercent(Compression()) << " %" << Endl;
- if (checkMode) {
- s << "orig.compression: " << AsPercent(info.GetDebugInfo().GetCompression()) << " %" << Endl;
- }
- return s;
- }
-
+ TStringBuilder s;
+ s << "raw size/item: " << RawSizePerRecord() << Endl;
+ s << "enc.size/item: " << EncSizePerRecord() << Endl;
+ if (checkMode) {
+ s << "orig.enc.size/item: " << OldEncSizePerRecord(info.GetDebugInfo().GetCompression()) << Endl;
+ }
+ s << "enc time us/item: " << EncTimePerRecordUS() << Endl;
+ s << "dec time us/item: " << DecTimePerRecordUS() << Endl;
+ s << "dict size: " << info.GetStoredCodec().Size() << Endl;
+ s << "compression: " << AsPercent(Compression()) << " %" << Endl;
+ if (checkMode) {
+ s << "orig.compression: " << AsPercent(info.GetDebugInfo().GetCompression()) << " %" << Endl;
+ }
+ return s;
+ }
+
TComprStats TestCodec(const ICodec& c, const TVector<TString>& input) {
- TComprStats stats;
-
- TBuffer encodeBuffer;
- TBuffer decodeBuffer;
- for (const auto& data : input) {
- encodeBuffer.Clear();
- decodeBuffer.Clear();
-
- stats.Records += 1;
+ TComprStats stats;
+
+ TBuffer encodeBuffer;
+ TBuffer decodeBuffer;
+ for (const auto& data : input) {
+ encodeBuffer.Clear();
+ decodeBuffer.Clear();
+
+ stats.Records += 1;
stats.RawSize += data.size();
-
- THPTimer timer;
- c.Encode(data, encodeBuffer);
+
+ THPTimer timer;
+ c.Encode(data, encodeBuffer);
stats.EncSize += encodeBuffer.size();
- stats.EncSeconds += timer.PassedReset();
-
+ stats.EncSeconds += timer.PassedReset();
+
c.Decode(TStringBuf{encodeBuffer.data(), encodeBuffer.size()}, decodeBuffer);
- stats.DecSeconds += timer.PassedReset();
+ stats.DecSeconds += timer.PassedReset();
Y_ENSURE(data == TStringBuf(decodeBuffer.data(), decodeBuffer.size()), "invalid encoding at record " << stats.Records);
- }
-
- return stats;
- }
-
+ }
+
+ return stats;
+ }
+
void ParseBlob(TVector<TString>& result, EDataStreamFormat fmt, const TBlob& blob) {
TStringBuf bin(blob.AsCharPtr(), blob.Size());
- TStringBuf line;
+ TStringBuf line;
TString buffer;
- while (bin.ReadLine(line)) {
- if (DSF_BASE64_LF == fmt) {
- Base64Decode(line, buffer);
- line = buffer;
- }
- if (!line) {
- continue;
- }
+ while (bin.ReadLine(line)) {
+ if (DSF_BASE64_LF == fmt) {
+ Base64Decode(line, buffer);
+ line = buffer;
+ }
+ if (!line) {
+ continue;
+ }
result.emplace_back(line.data(), line.size());
- }
- }
-
+ }
+ }
+
TBlob GetInputBlob(const TString& dataFile) {
- return dataFile && dataFile != "-" ? TBlob::FromFile(dataFile) : TBlob::FromStream(Cin);
- }
-
-}
+ return dataFile && dataFile != "-" ? TBlob::FromFile(dataFile) : TBlob::FromStream(Cin);
+ }
+
+}
diff --git a/library/cpp/codecs/static/tools/common/ct_common.h b/library/cpp/codecs/static/tools/common/ct_common.h
index 9d3dcbda934..de531b27e6b 100644
--- a/library/cpp/codecs/static/tools/common/ct_common.h
+++ b/library/cpp/codecs/static/tools/common/ct_common.h
@@ -1,75 +1,75 @@
-#pragma once
-
+#pragma once
+
#include <util/generic/string.h>
-#include <util/generic/vector.h>
-#include <util/memory/blob.h>
-#include <cmath>
-
-namespace NCodecs {
- class TStaticCodecInfo;
- class ICodec;
-
- struct TComprStats {
- double EncSeconds = 0;
- double DecSeconds = 0;
- size_t Records = 0;
- size_t RawSize = 0;
- size_t EncSize = 0;
-
- static double Round(double n, size_t decPlaces = 2) {
- double p = pow(10, decPlaces);
- return round(n * p) / p;
- }
-
- static double AsPercent(double n) {
- return Round(n * 100);
- }
-
- static double AsMicroSecond(double s) {
- return s * 1000000;
- }
-
- double PerRecord(double n) const {
- return Round((double)(Records ? n / Records : 0));
- }
-
- double Compression() const {
- return ((double)RawSize - (double)EncSize) / RawSize;
- }
-
- double EncTimePerRecordUS() const {
- return PerRecord(AsMicroSecond(EncSeconds));
- }
-
- double DecTimePerRecordUS() const {
- return PerRecord(AsMicroSecond(DecSeconds));
- }
-
- double RawSizePerRecord() const {
- return PerRecord(RawSize);
- }
-
- double EncSizePerRecord() const {
- return PerRecord(EncSize);
- }
-
- double OldEncSizePerRecord(double compr) const {
- return PerRecord((1 - compr) * RawSize);
- }
-
+#include <util/generic/vector.h>
+#include <util/memory/blob.h>
+#include <cmath>
+
+namespace NCodecs {
+ class TStaticCodecInfo;
+ class ICodec;
+
+ struct TComprStats {
+ double EncSeconds = 0;
+ double DecSeconds = 0;
+ size_t Records = 0;
+ size_t RawSize = 0;
+ size_t EncSize = 0;
+
+ static double Round(double n, size_t decPlaces = 2) {
+ double p = pow(10, decPlaces);
+ return round(n * p) / p;
+ }
+
+ static double AsPercent(double n) {
+ return Round(n * 100);
+ }
+
+ static double AsMicroSecond(double s) {
+ return s * 1000000;
+ }
+
+ double PerRecord(double n) const {
+ return Round((double)(Records ? n / Records : 0));
+ }
+
+ double Compression() const {
+ return ((double)RawSize - (double)EncSize) / RawSize;
+ }
+
+ double EncTimePerRecordUS() const {
+ return PerRecord(AsMicroSecond(EncSeconds));
+ }
+
+ double DecTimePerRecordUS() const {
+ return PerRecord(AsMicroSecond(DecSeconds));
+ }
+
+ double RawSizePerRecord() const {
+ return PerRecord(RawSize);
+ }
+
+ double EncSizePerRecord() const {
+ return PerRecord(EncSize);
+ }
+
+ double OldEncSizePerRecord(double compr) const {
+ return PerRecord((1 - compr) * RawSize);
+ }
+
TString Format(const TStaticCodecInfo&, bool checkMode) const;
- };
-
+ };
+
TComprStats TestCodec(const ICodec&, const TVector<TString>& data);
-
- enum EDataStreamFormat {
- DSF_NONE,
- DSF_PLAIN_LF /* "plain" */,
- DSF_BASE64_LF /* "base64" */,
- };
-
+
+ enum EDataStreamFormat {
+ DSF_NONE,
+ DSF_PLAIN_LF /* "plain" */,
+ DSF_BASE64_LF /* "base64" */,
+ };
+
void ParseBlob(TVector<TString>&, EDataStreamFormat, const TBlob&);
-
+
TBlob GetInputBlob(const TString& dataFile);
-
-}
+
+}
diff --git a/library/cpp/codecs/static/tools/common/ya.make b/library/cpp/codecs/static/tools/common/ya.make
index d624222dad0..5f575a2f283 100644
--- a/library/cpp/codecs/static/tools/common/ya.make
+++ b/library/cpp/codecs/static/tools/common/ya.make
@@ -1,19 +1,19 @@
-LIBRARY()
-
+LIBRARY()
+
OWNER(velavokr)
-
-SRCS(
- ct_common.cpp
-)
-
-PEERDIR(
+
+SRCS(
+ ct_common.cpp
+)
+
+PEERDIR(
library/cpp/codecs
library/cpp/codecs/static
library/cpp/getopt/small
library/cpp/string_utils/base64
- util/draft
-)
-
+ util/draft
+)
+
GENERATE_ENUM_SERIALIZATION(ct_common.h)
-
-END()
+
+END()
diff --git a/library/cpp/codecs/static/tools/static_codec_checker/README b/library/cpp/codecs/static/tools/static_codec_checker/README
index 723a68300b0..c66703227d1 100644
--- a/library/cpp/codecs/static/tools/static_codec_checker/README
+++ b/library/cpp/codecs/static/tools/static_codec_checker/README
@@ -1,4 +1,4 @@
This is a viewer for generated codec and utility for verification of the compression quality on a new data.
-
+
Usage:
-static_codec_checker -t -c 029b29ff64a74927.codec_info -f plain samples.txt
+static_codec_checker -t -c 029b29ff64a74927.codec_info -f plain samples.txt
diff --git a/library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp b/library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp
index 9c8d568d823..5ae901d8f83 100644
--- a/library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp
+++ b/library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp
@@ -3,25 +3,25 @@
#include <library/cpp/codecs/static/static_codec_info.pb.h>
#include <library/cpp/codecs/codecs.h>
#include <library/cpp/getopt/small/last_getopt.h>
-
-#include <util/digest/city.h>
-#include <util/generic/yexception.h>
-#include <util/stream/file.h>
-#include <util/stream/buffer.h>
-#include <util/stream/format.h>
-#include <util/string/builder.h>
-
-int main(int argc, char** argv) {
- NCodecs::TCodecPtr codecPtr;
- NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE;
+
+#include <util/digest/city.h>
+#include <util/generic/yexception.h>
+#include <util/stream/file.h>
+#include <util/stream/buffer.h>
+#include <util/stream/format.h>
+#include <util/string/builder.h>
+
+int main(int argc, char** argv) {
+ NCodecs::TCodecPtr codecPtr;
+ NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE;
TString codecFile;
- bool testCompression = false;
-
- auto opts = NLastGetopt::TOpts::Default();
- opts.SetTitle("Prints a .codec_info file and optionally checks its performance on new data. See also static_codec_generator.");
- opts.SetCmdLineDescr("-c 9089f3e9b7a0f0d4.codec_info -t -f base64 qtrees.sample.txt");
- NCodecs::TStaticCodecInfo codec;
-
+ bool testCompression = false;
+
+ auto opts = NLastGetopt::TOpts::Default();
+ opts.SetTitle("Prints a .codec_info file and optionally checks its performance on new data. See also static_codec_generator.");
+ opts.SetCmdLineDescr("-c 9089f3e9b7a0f0d4.codec_info -t -f base64 qtrees.sample.txt");
+ NCodecs::TStaticCodecInfo codec;
+
opts.AddLongOption('c', "codec-info").RequiredArgument("codec_info").Handler1T<TString>([&codecFile, &codec, &codecPtr](TString name) {
codecFile = name;
codec.CopyFrom(NCodecs::LoadCodecInfoFromString(TUnbufferedFileInput(name).ReadAll()));
@@ -29,45 +29,45 @@ int main(int argc, char** argv) {
})
.Required()
.Help(".codec_info file with serialized static data for codec");
-
+
opts.AddLongOption('t', "test").NoArgument().StoreValue(&testCompression, true).Optional().Help("test current performance");
-
+
opts.AddLongOption('f', "format").RequiredArgument(TStringBuilder() << "(" << NCodecs::DSF_PLAIN_LF << "|" << NCodecs::DSF_BASE64_LF << ")").StoreResult(&fmt).Optional().Help("test set input file format");
-
- opts.SetFreeArgsMin(0);
- opts.SetFreeArgTitle(0, "testing_set_input_file", "testing set input files");
-
- NLastGetopt::TOptsParseResult res(&opts, argc, argv);
-
- Cout << codecFile << Endl;
- Cout << NCodecs::FormatCodecInfo(codec) << Endl;
-
- if (testCompression) {
- if (NCodecs::DSF_NONE == fmt) {
- Cerr << "Specify format (-f|--format) for testing set input" << Endl;
- exit(1);
- }
-
- Cout << "Reading testing set data ... " << Flush;
-
+
+ opts.SetFreeArgsMin(0);
+ opts.SetFreeArgTitle(0, "testing_set_input_file", "testing set input files");
+
+ NLastGetopt::TOptsParseResult res(&opts, argc, argv);
+
+ Cout << codecFile << Endl;
+ Cout << NCodecs::FormatCodecInfo(codec) << Endl;
+
+ if (testCompression) {
+ if (NCodecs::DSF_NONE == fmt) {
+ Cerr << "Specify format (-f|--format) for testing set input" << Endl;
+ exit(1);
+ }
+
+ Cout << "Reading testing set data ... " << Flush;
+
TVector<TString> allData;
- for (const auto& freeArg : res.GetFreeArgs()) {
- NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg));
- }
-
- if (!res.GetFreeArgs()) {
- NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-"));
- }
-
- Cout << "Done" << Endl << Endl;
-
- Cout << "records: " << allData.size() << Endl;
- Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl;
-
- Cout << "Testing compression ... " << Flush;
- auto stats = NCodecs::TestCodec(*codecPtr, allData);
- Cout << "Done" << Endl << Endl;
-
- Cout << stats.Format(codec, true) << Endl;
- }
-}
+ for (const auto& freeArg : res.GetFreeArgs()) {
+ NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg));
+ }
+
+ if (!res.GetFreeArgs()) {
+ NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-"));
+ }
+
+ Cout << "Done" << Endl << Endl;
+
+ Cout << "records: " << allData.size() << Endl;
+ Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl;
+
+ Cout << "Testing compression ... " << Flush;
+ auto stats = NCodecs::TestCodec(*codecPtr, allData);
+ Cout << "Done" << Endl << Endl;
+
+ Cout << stats.Format(codec, true) << Endl;
+ }
+}
diff --git a/library/cpp/codecs/static/tools/static_codec_checker/ya.make b/library/cpp/codecs/static/tools/static_codec_checker/ya.make
index 90e06ca448d..86b73dff6c4 100644
--- a/library/cpp/codecs/static/tools/static_codec_checker/ya.make
+++ b/library/cpp/codecs/static/tools/static_codec_checker/ya.make
@@ -1,16 +1,16 @@
-PROGRAM()
-
+PROGRAM()
+
OWNER(velavokr)
-
-SRCS(
- static_codec_checker.cpp
-)
-
-PEERDIR(
+
+SRCS(
+ static_codec_checker.cpp
+)
+
+PEERDIR(
library/cpp/codecs
library/cpp/codecs/static
library/cpp/codecs/static/tools/common
library/cpp/getopt/small
-)
-
-END()
+)
+
+END()
diff --git a/library/cpp/codecs/static/tools/static_codec_generator/README b/library/cpp/codecs/static/tools/static_codec_generator/README
index e6bb52b9591..f0fffd745ad 100644
--- a/library/cpp/codecs/static/tools/static_codec_generator/README
+++ b/library/cpp/codecs/static/tools/static_codec_generator/README
@@ -1,4 +1,4 @@
This is a utility for reproducible teaching of a codec. And also for saving it into a file with a unique name for a static compilation as a resource.
-
+
Usage:
-static_codec_generator -t -m 'the training data description' -f plain samples.txt
+static_codec_generator -t -m 'the training data description' -f plain samples.txt
diff --git a/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp b/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp
index 45fdb5c5fe8..b37a0f686d5 100644
--- a/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp
+++ b/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp
@@ -2,81 +2,81 @@
#include <library/cpp/codecs/static/static_codec_info.pb.h>
#include <library/cpp/codecs/static/builder.h>
#include <library/cpp/codecs/codecs.h>
-
+
#include <library/cpp/getopt/small/last_getopt.h>
-
-#include <util/generic/yexception.h>
-#include <util/stream/file.h>
-#include <util/string/builder.h>
-
-int main(int argc, char** argv) {
- NCodecs::TCodecBuildInfo info;
- NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE;
-
- auto opts = NLastGetopt::TOpts::Default();
- opts.SetCmdLineDescr("-m 'Training set: 100000 qtrees taken from web mmeta logs' -f base64 qtrees.sample.txt");
- opts.SetTitle("Teaches the codec and serializes it as a file named CODECNAME.hash(CODECDATA).bin");
-
+
+#include <util/generic/yexception.h>
+#include <util/stream/file.h>
+#include <util/string/builder.h>
+
+int main(int argc, char** argv) {
+ NCodecs::TCodecBuildInfo info;
+ NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE;
+
+ auto opts = NLastGetopt::TOpts::Default();
+ opts.SetCmdLineDescr("-m 'Training set: 100000 qtrees taken from web mmeta logs' -f base64 qtrees.sample.txt");
+ opts.SetTitle("Teaches the codec and serializes it as a file named CODECNAME.hash(CODECDATA).bin");
+
opts.AddLongOption('m', "message").RequiredArgument("training_set_comment").StoreResult(&info.TrainingSetComment).Required().Help("a human description for the training set");
-
+
opts.AddLongOption('r', "resource").RequiredArgument("training_set_res_id").StoreResult(&info.TrainingSetResId).Optional().Help("sandbox resource id for the training set");
-
+
opts.AddLongOption('c', "codec").RequiredArgument("codec_name").StoreResult(&info.CodecName).Optional().DefaultValue(info.CodecName);
-
+
opts.AddLongOption('s', "sample-multiplier").RequiredArgument("multiplier").StoreResult(&info.SampleSizeMultiplier).Optional().DefaultValue(ToString(info.SampleSizeMultiplier)).Help("multiplier for default sample size");
-
+
opts.AddLongOption('f', "format").RequiredArgument(TStringBuilder() << "(" << NCodecs::DSF_PLAIN_LF << "|" << NCodecs::DSF_BASE64_LF << ")").StoreResult(&fmt).Required().Help("training set input file format");
-
+
opts.AddLongOption("list-codecs").NoArgument().Handler0([]() {
Cout << JoinStrings(NCodecs::ICodec::GetCodecsList(), "\n") << Endl;
exit(0);
})
.Optional()
.Help("list available codecs");
-
+
opts.AddLongOption("fake-revision").RequiredArgument("revision").StoreResult(&info.RevisionInfo).Optional().Hidden(); // replace static_codec_generator revision in debug info
-
+
opts.AddLongOption("fake-timestamp").RequiredArgument("timestamp").StoreResult(&info.Timestamp).Optional().Hidden(); // replace generating timestamp in debug info
-
- opts.SetFreeArgsMin(0);
- opts.SetFreeArgTitle(0, "training_set_input_file", "training set input files");
-
- NLastGetopt::TOptsParseResult res(&opts, argc, argv);
-
- Cout << "Reading training set data ... " << Flush;
+
+ opts.SetFreeArgsMin(0);
+ opts.SetFreeArgTitle(0, "training_set_input_file", "training set input files");
+
+ NLastGetopt::TOptsParseResult res(&opts, argc, argv);
+
+ Cout << "Reading training set data ... " << Flush;
TVector<TString> allData;
- for (const auto& freeArg : res.GetFreeArgs()) {
- NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg));
- }
-
- if (!res.GetFreeArgs()) {
- NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-"));
- }
- Cout << "Done" << Endl << Endl;
-
- Cout << "records: " << allData.size() << Endl;
- Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl;
-
- Cout << "Training " << info.CodecName << " , sample size multiplier is " << info.SampleSizeMultiplier << " ... " << Flush;
- auto codec = NCodecs::BuildStaticCodec(allData, info);
- Cout << "Done" << Endl;
-
+ for (const auto& freeArg : res.GetFreeArgs()) {
+ NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg));
+ }
+
+ if (!res.GetFreeArgs()) {
+ NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-"));
+ }
+ Cout << "Done" << Endl << Endl;
+
+ Cout << "records: " << allData.size() << Endl;
+ Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl;
+
+ Cout << "Training " << info.CodecName << " , sample size multiplier is " << info.SampleSizeMultiplier << " ... " << Flush;
+ auto codec = NCodecs::BuildStaticCodec(allData, info);
+ Cout << "Done" << Endl;
+
TString codecName = NCodecs::GetStandardFileName(codec);
- NCodecs::TCodecPtr codecPtr = NCodecs::ICodec::RestoreFromString(codec.GetStoredCodec());
-
- Cout << "Testing compression ... " << Flush;
- auto stats = NCodecs::TestCodec(*codecPtr, allData);
- Cout << "Done" << Endl << Endl;
-
- codec.MutableDebugInfo()->SetCompression(stats.Compression());
-
- Cout << stats.Format(codec, false) << Endl;
-
- Cout << "Saving as " << codecName << " ... " << Flush;
- {
+ NCodecs::TCodecPtr codecPtr = NCodecs::ICodec::RestoreFromString(codec.GetStoredCodec());
+
+ Cout << "Testing compression ... " << Flush;
+ auto stats = NCodecs::TestCodec(*codecPtr, allData);
+ Cout << "Done" << Endl << Endl;
+
+ codec.MutableDebugInfo()->SetCompression(stats.Compression());
+
+ Cout << stats.Format(codec, false) << Endl;
+
+ Cout << "Saving as " << codecName << " ... " << Flush;
+ {
TUnbufferedFileOutput fout{codecName};
- NCodecs::SaveCodecInfoToStream(fout, codec);
- fout.Finish();
- }
- Cout << "Done" << Endl << Endl;
-}
+ NCodecs::SaveCodecInfoToStream(fout, codec);
+ fout.Finish();
+ }
+ Cout << "Done" << Endl << Endl;
+}
diff --git a/library/cpp/codecs/static/tools/static_codec_generator/ya.make b/library/cpp/codecs/static/tools/static_codec_generator/ya.make
index efbc440dd18..21750dde49b 100644
--- a/library/cpp/codecs/static/tools/static_codec_generator/ya.make
+++ b/library/cpp/codecs/static/tools/static_codec_generator/ya.make
@@ -1,17 +1,17 @@
-PROGRAM()
-
+PROGRAM()
+
OWNER(velavokr)
-
-SRCS(
- static_codec_generator.cpp
-)
-
-PEERDIR(
+
+SRCS(
+ static_codec_generator.cpp
+)
+
+PEERDIR(
library/cpp/codecs
library/cpp/codecs/static
library/cpp/codecs/static/tools/common
library/cpp/digest/md5
library/cpp/getopt/small
-)
-
-END()
+)
+
+END()
diff --git a/library/cpp/codecs/static/tools/tests/static_codec_tools.py b/library/cpp/codecs/static/tools/tests/static_codec_tools.py
index db4140e3703..a5baa262f79 100644
--- a/library/cpp/codecs/static/tools/tests/static_codec_tools.py
+++ b/library/cpp/codecs/static/tools/tests/static_codec_tools.py
@@ -1,18 +1,18 @@
-#!/usr/bin/env python
-
-import yatest.common as tt
-import os.path as op
-
-def test_static_codec_tools():
+#!/usr/bin/env python
+
+import yatest.common as tt
+import os.path as op
+
+def test_static_codec_tools():
tt.execute([tt.binary_path("library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator")]
- + ["-m", "test codec", "-r", "sbr://143310406", "-f", "plain", "-c", "solar-8k-a:huffman", "-s", "1",
- "--fake-revision", "r2385905", "--fake-timestamp", "1467494385", "sample.txt"],
- timeout=60)
- assert(op.exists("solar-8k-a.huffman.1467494385.codec_info"))
+ + ["-m", "test codec", "-r", "sbr://143310406", "-f", "plain", "-c", "solar-8k-a:huffman", "-s", "1",
+ "--fake-revision", "r2385905", "--fake-timestamp", "1467494385", "sample.txt"],
+ timeout=60)
+ assert(op.exists("solar-8k-a.huffman.1467494385.codec_info"))
tt.canonical_execute(tt.binary_path("library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker"),
- args=["-c", "solar-8k-a.huffman.1467494385.codec_info"],
- timeout=60)
+ args=["-c", "solar-8k-a.huffman.1467494385.codec_info"],
+ timeout=60)
tt.execute([tt.binary_path("library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker")]
- + ["-c", "solar-8k-a.huffman.1467494385.codec_info", "-f", "plain", "-t", "sample.txt"],
- timeout=60)
- return tt.canonical_file("solar-8k-a.huffman.1467494385.codec_info")
+ + ["-c", "solar-8k-a.huffman.1467494385.codec_info", "-f", "plain", "-t", "sample.txt"],
+ timeout=60)
+ return tt.canonical_file("solar-8k-a.huffman.1467494385.codec_info")
diff --git a/library/cpp/codecs/static/tools/tests/ya.make b/library/cpp/codecs/static/tools/tests/ya.make
index c5324eaf53b..5555d90caed 100644
--- a/library/cpp/codecs/static/tools/tests/ya.make
+++ b/library/cpp/codecs/static/tools/tests/ya.make
@@ -1,20 +1,20 @@
PY2TEST()
-
-OWNER(velavokr)
-
-TEST_SRCS(static_codec_tools.py)
-
-DATA(sbr://143310406)
-
-TIMEOUT(4200)
-
+
+OWNER(velavokr)
+
+TEST_SRCS(static_codec_tools.py)
+
+DATA(sbr://143310406)
+
+TIMEOUT(4200)
+
TAG(ya:not_autocheck)
-DEPENDS(
+DEPENDS(
library/cpp/codecs/static/tools/static_codec_checker
library/cpp/codecs/static/tools/static_codec_generator
-)
-
-
+)
+
+
-END()
+END()
diff --git a/library/cpp/codecs/static/tools/ya.make b/library/cpp/codecs/static/tools/ya.make
index dd3e8437aa4..ab727691537 100644
--- a/library/cpp/codecs/static/tools/ya.make
+++ b/library/cpp/codecs/static/tools/ya.make
@@ -1,5 +1,5 @@
-RECURSE(
- common
- static_codec_generator
- static_codec_checker
-)
+RECURSE(
+ common
+ static_codec_generator
+ static_codec_checker
+)