summaryrefslogtreecommitdiffstats
path: root/library/cpp/codecs/static/tools/static_codec_generator
diff options
context:
space:
mode:
authorRuslan Kovalev <[email protected]>2022-02-10 16:46:44 +0300
committerDaniil Cherednik <[email protected]>2022-02-10 16:46:44 +0300
commit59e19371de37995fcb36beb16cd6ec030af960bc (patch)
treefa68e36093ebff8b805462e9e6d331fe9d348214 /library/cpp/codecs/static/tools/static_codec_generator
parent89db6fe2fe2c32d2a832ddfeb04e8d078e301084 (diff)
Restoring authorship annotation for Ruslan Kovalev <[email protected]>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/codecs/static/tools/static_codec_generator')
-rw-r--r--library/cpp/codecs/static/tools/static_codec_generator/README4
-rw-r--r--library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp122
-rw-r--r--library/cpp/codecs/static/tools/static_codec_generator/ya.make22
3 files changed, 74 insertions, 74 deletions
diff --git a/library/cpp/codecs/static/tools/static_codec_generator/README b/library/cpp/codecs/static/tools/static_codec_generator/README
index e6bb52b9591..f0fffd745ad 100644
--- a/library/cpp/codecs/static/tools/static_codec_generator/README
+++ b/library/cpp/codecs/static/tools/static_codec_generator/README
@@ -1,4 +1,4 @@
This is a utility for reproducible teaching of a codec. And also for saving it into a file with a unique name for a static compilation as a resource.
-
+
Usage:
-static_codec_generator -t -m 'the training data description' -f plain samples.txt
+static_codec_generator -t -m 'the training data description' -f plain samples.txt
diff --git a/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp b/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp
index 45fdb5c5fe8..b37a0f686d5 100644
--- a/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp
+++ b/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp
@@ -2,81 +2,81 @@
#include <library/cpp/codecs/static/static_codec_info.pb.h>
#include <library/cpp/codecs/static/builder.h>
#include <library/cpp/codecs/codecs.h>
-
+
#include <library/cpp/getopt/small/last_getopt.h>
-
-#include <util/generic/yexception.h>
-#include <util/stream/file.h>
-#include <util/string/builder.h>
-
-int main(int argc, char** argv) {
- NCodecs::TCodecBuildInfo info;
- NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE;
-
- auto opts = NLastGetopt::TOpts::Default();
- opts.SetCmdLineDescr("-m 'Training set: 100000 qtrees taken from web mmeta logs' -f base64 qtrees.sample.txt");
- opts.SetTitle("Teaches the codec and serializes it as a file named CODECNAME.hash(CODECDATA).bin");
-
+
+#include <util/generic/yexception.h>
+#include <util/stream/file.h>
+#include <util/string/builder.h>
+
+int main(int argc, char** argv) {
+ NCodecs::TCodecBuildInfo info;
+ NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE;
+
+ auto opts = NLastGetopt::TOpts::Default();
+ opts.SetCmdLineDescr("-m 'Training set: 100000 qtrees taken from web mmeta logs' -f base64 qtrees.sample.txt");
+ opts.SetTitle("Teaches the codec and serializes it as a file named CODECNAME.hash(CODECDATA).bin");
+
opts.AddLongOption('m', "message").RequiredArgument("training_set_comment").StoreResult(&info.TrainingSetComment).Required().Help("a human description for the training set");
-
+
opts.AddLongOption('r', "resource").RequiredArgument("training_set_res_id").StoreResult(&info.TrainingSetResId).Optional().Help("sandbox resource id for the training set");
-
+
opts.AddLongOption('c', "codec").RequiredArgument("codec_name").StoreResult(&info.CodecName).Optional().DefaultValue(info.CodecName);
-
+
opts.AddLongOption('s', "sample-multiplier").RequiredArgument("multiplier").StoreResult(&info.SampleSizeMultiplier).Optional().DefaultValue(ToString(info.SampleSizeMultiplier)).Help("multiplier for default sample size");
-
+
opts.AddLongOption('f', "format").RequiredArgument(TStringBuilder() << "(" << NCodecs::DSF_PLAIN_LF << "|" << NCodecs::DSF_BASE64_LF << ")").StoreResult(&fmt).Required().Help("training set input file format");
-
+
opts.AddLongOption("list-codecs").NoArgument().Handler0([]() {
Cout << JoinStrings(NCodecs::ICodec::GetCodecsList(), "\n") << Endl;
exit(0);
})
.Optional()
.Help("list available codecs");
-
+
opts.AddLongOption("fake-revision").RequiredArgument("revision").StoreResult(&info.RevisionInfo).Optional().Hidden(); // replace static_codec_generator revision in debug info
-
+
opts.AddLongOption("fake-timestamp").RequiredArgument("timestamp").StoreResult(&info.Timestamp).Optional().Hidden(); // replace generating timestamp in debug info
-
- opts.SetFreeArgsMin(0);
- opts.SetFreeArgTitle(0, "training_set_input_file", "training set input files");
-
- NLastGetopt::TOptsParseResult res(&opts, argc, argv);
-
- Cout << "Reading training set data ... " << Flush;
+
+ opts.SetFreeArgsMin(0);
+ opts.SetFreeArgTitle(0, "training_set_input_file", "training set input files");
+
+ NLastGetopt::TOptsParseResult res(&opts, argc, argv);
+
+ Cout << "Reading training set data ... " << Flush;
TVector<TString> allData;
- for (const auto& freeArg : res.GetFreeArgs()) {
- NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg));
- }
-
- if (!res.GetFreeArgs()) {
- NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-"));
- }
- Cout << "Done" << Endl << Endl;
-
- Cout << "records: " << allData.size() << Endl;
- Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl;
-
- Cout << "Training " << info.CodecName << " , sample size multiplier is " << info.SampleSizeMultiplier << " ... " << Flush;
- auto codec = NCodecs::BuildStaticCodec(allData, info);
- Cout << "Done" << Endl;
-
+ for (const auto& freeArg : res.GetFreeArgs()) {
+ NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg));
+ }
+
+ if (!res.GetFreeArgs()) {
+ NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-"));
+ }
+ Cout << "Done" << Endl << Endl;
+
+ Cout << "records: " << allData.size() << Endl;
+ Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl;
+
+ Cout << "Training " << info.CodecName << " , sample size multiplier is " << info.SampleSizeMultiplier << " ... " << Flush;
+ auto codec = NCodecs::BuildStaticCodec(allData, info);
+ Cout << "Done" << Endl;
+
TString codecName = NCodecs::GetStandardFileName(codec);
- NCodecs::TCodecPtr codecPtr = NCodecs::ICodec::RestoreFromString(codec.GetStoredCodec());
-
- Cout << "Testing compression ... " << Flush;
- auto stats = NCodecs::TestCodec(*codecPtr, allData);
- Cout << "Done" << Endl << Endl;
-
- codec.MutableDebugInfo()->SetCompression(stats.Compression());
-
- Cout << stats.Format(codec, false) << Endl;
-
- Cout << "Saving as " << codecName << " ... " << Flush;
- {
+ NCodecs::TCodecPtr codecPtr = NCodecs::ICodec::RestoreFromString(codec.GetStoredCodec());
+
+ Cout << "Testing compression ... " << Flush;
+ auto stats = NCodecs::TestCodec(*codecPtr, allData);
+ Cout << "Done" << Endl << Endl;
+
+ codec.MutableDebugInfo()->SetCompression(stats.Compression());
+
+ Cout << stats.Format(codec, false) << Endl;
+
+ Cout << "Saving as " << codecName << " ... " << Flush;
+ {
TUnbufferedFileOutput fout{codecName};
- NCodecs::SaveCodecInfoToStream(fout, codec);
- fout.Finish();
- }
- Cout << "Done" << Endl << Endl;
-}
+ NCodecs::SaveCodecInfoToStream(fout, codec);
+ fout.Finish();
+ }
+ Cout << "Done" << Endl << Endl;
+}
diff --git a/library/cpp/codecs/static/tools/static_codec_generator/ya.make b/library/cpp/codecs/static/tools/static_codec_generator/ya.make
index efbc440dd18..21750dde49b 100644
--- a/library/cpp/codecs/static/tools/static_codec_generator/ya.make
+++ b/library/cpp/codecs/static/tools/static_codec_generator/ya.make
@@ -1,17 +1,17 @@
-PROGRAM()
-
+PROGRAM()
+
OWNER(velavokr)
-
-SRCS(
- static_codec_generator.cpp
-)
-
-PEERDIR(
+
+SRCS(
+ static_codec_generator.cpp
+)
+
+PEERDIR(
library/cpp/codecs
library/cpp/codecs/static
library/cpp/codecs/static/tools/common
library/cpp/digest/md5
library/cpp/getopt/small
-)
-
-END()
+)
+
+END()