diff options
author | Ruslan Kovalev <[email protected]> | 2022-02-10 16:46:44 +0300 |
---|---|---|
committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:46:44 +0300 |
commit | 59e19371de37995fcb36beb16cd6ec030af960bc (patch) | |
tree | fa68e36093ebff8b805462e9e6d331fe9d348214 /library/cpp/codecs/static/tools/static_codec_generator | |
parent | 89db6fe2fe2c32d2a832ddfeb04e8d078e301084 (diff) |
Restoring authorship annotation for Ruslan Kovalev <[email protected]>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/codecs/static/tools/static_codec_generator')
3 files changed, 74 insertions, 74 deletions
diff --git a/library/cpp/codecs/static/tools/static_codec_generator/README b/library/cpp/codecs/static/tools/static_codec_generator/README index e6bb52b9591..f0fffd745ad 100644 --- a/library/cpp/codecs/static/tools/static_codec_generator/README +++ b/library/cpp/codecs/static/tools/static_codec_generator/README @@ -1,4 +1,4 @@ This is a utility for reproducible teaching of a codec. And also for saving it into a file with a unique name for a static compilation as a resource. - + Usage: -static_codec_generator -t -m 'the training data description' -f plain samples.txt +static_codec_generator -t -m 'the training data description' -f plain samples.txt diff --git a/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp b/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp index 45fdb5c5fe8..b37a0f686d5 100644 --- a/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp +++ b/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp @@ -2,81 +2,81 @@ #include <library/cpp/codecs/static/static_codec_info.pb.h> #include <library/cpp/codecs/static/builder.h> #include <library/cpp/codecs/codecs.h> - + #include <library/cpp/getopt/small/last_getopt.h> - -#include <util/generic/yexception.h> -#include <util/stream/file.h> -#include <util/string/builder.h> - -int main(int argc, char** argv) { - NCodecs::TCodecBuildInfo info; - NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE; - - auto opts = NLastGetopt::TOpts::Default(); - opts.SetCmdLineDescr("-m 'Training set: 100000 qtrees taken from web mmeta logs' -f base64 qtrees.sample.txt"); - opts.SetTitle("Teaches the codec and serializes it as a file named CODECNAME.hash(CODECDATA).bin"); - + +#include <util/generic/yexception.h> +#include <util/stream/file.h> +#include <util/string/builder.h> + +int main(int argc, char** argv) { + NCodecs::TCodecBuildInfo info; + NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE; + + auto opts = NLastGetopt::TOpts::Default(); + opts.SetCmdLineDescr("-m 'Training set: 100000 qtrees taken from web mmeta logs' -f base64 qtrees.sample.txt"); + opts.SetTitle("Teaches the codec and serializes it as a file named CODECNAME.hash(CODECDATA).bin"); + opts.AddLongOption('m', "message").RequiredArgument("training_set_comment").StoreResult(&info.TrainingSetComment).Required().Help("a human description for the training set"); - + opts.AddLongOption('r', "resource").RequiredArgument("training_set_res_id").StoreResult(&info.TrainingSetResId).Optional().Help("sandbox resource id for the training set"); - + opts.AddLongOption('c', "codec").RequiredArgument("codec_name").StoreResult(&info.CodecName).Optional().DefaultValue(info.CodecName); - + opts.AddLongOption('s', "sample-multiplier").RequiredArgument("multiplier").StoreResult(&info.SampleSizeMultiplier).Optional().DefaultValue(ToString(info.SampleSizeMultiplier)).Help("multiplier for default sample size"); - + opts.AddLongOption('f', "format").RequiredArgument(TStringBuilder() << "(" << NCodecs::DSF_PLAIN_LF << "|" << NCodecs::DSF_BASE64_LF << ")").StoreResult(&fmt).Required().Help("training set input file format"); - + opts.AddLongOption("list-codecs").NoArgument().Handler0([]() { Cout << JoinStrings(NCodecs::ICodec::GetCodecsList(), "\n") << Endl; exit(0); }) .Optional() .Help("list available codecs"); - + opts.AddLongOption("fake-revision").RequiredArgument("revision").StoreResult(&info.RevisionInfo).Optional().Hidden(); // replace static_codec_generator revision in debug info - + opts.AddLongOption("fake-timestamp").RequiredArgument("timestamp").StoreResult(&info.Timestamp).Optional().Hidden(); // replace generating timestamp in debug info - - opts.SetFreeArgsMin(0); - opts.SetFreeArgTitle(0, "training_set_input_file", "training set input files"); - - NLastGetopt::TOptsParseResult res(&opts, argc, argv); - - Cout << "Reading training set data ... " << Flush; + + opts.SetFreeArgsMin(0); + opts.SetFreeArgTitle(0, "training_set_input_file", "training set input files"); + + NLastGetopt::TOptsParseResult res(&opts, argc, argv); + + Cout << "Reading training set data ... " << Flush; TVector<TString> allData; - for (const auto& freeArg : res.GetFreeArgs()) { - NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg)); - } - - if (!res.GetFreeArgs()) { - NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-")); - } - Cout << "Done" << Endl << Endl; - - Cout << "records: " << allData.size() << Endl; - Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl; - - Cout << "Training " << info.CodecName << " , sample size multiplier is " << info.SampleSizeMultiplier << " ... " << Flush; - auto codec = NCodecs::BuildStaticCodec(allData, info); - Cout << "Done" << Endl; - + for (const auto& freeArg : res.GetFreeArgs()) { + NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg)); + } + + if (!res.GetFreeArgs()) { + NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-")); + } + Cout << "Done" << Endl << Endl; + + Cout << "records: " << allData.size() << Endl; + Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl; + + Cout << "Training " << info.CodecName << " , sample size multiplier is " << info.SampleSizeMultiplier << " ... " << Flush; + auto codec = NCodecs::BuildStaticCodec(allData, info); + Cout << "Done" << Endl; + TString codecName = NCodecs::GetStandardFileName(codec); - NCodecs::TCodecPtr codecPtr = NCodecs::ICodec::RestoreFromString(codec.GetStoredCodec()); - - Cout << "Testing compression ... " << Flush; - auto stats = NCodecs::TestCodec(*codecPtr, allData); - Cout << "Done" << Endl << Endl; - - codec.MutableDebugInfo()->SetCompression(stats.Compression()); - - Cout << stats.Format(codec, false) << Endl; - - Cout << "Saving as " << codecName << " ... " << Flush; - { + NCodecs::TCodecPtr codecPtr = NCodecs::ICodec::RestoreFromString(codec.GetStoredCodec()); + + Cout << "Testing compression ... " << Flush; + auto stats = NCodecs::TestCodec(*codecPtr, allData); + Cout << "Done" << Endl << Endl; + + codec.MutableDebugInfo()->SetCompression(stats.Compression()); + + Cout << stats.Format(codec, false) << Endl; + + Cout << "Saving as " << codecName << " ... " << Flush; + { TUnbufferedFileOutput fout{codecName}; - NCodecs::SaveCodecInfoToStream(fout, codec); - fout.Finish(); - } - Cout << "Done" << Endl << Endl; -} + NCodecs::SaveCodecInfoToStream(fout, codec); + fout.Finish(); + } + Cout << "Done" << Endl << Endl; +} diff --git a/library/cpp/codecs/static/tools/static_codec_generator/ya.make b/library/cpp/codecs/static/tools/static_codec_generator/ya.make index efbc440dd18..21750dde49b 100644 --- a/library/cpp/codecs/static/tools/static_codec_generator/ya.make +++ b/library/cpp/codecs/static/tools/static_codec_generator/ya.make @@ -1,17 +1,17 @@ -PROGRAM() - +PROGRAM() + OWNER(velavokr) - -SRCS( - static_codec_generator.cpp -) - -PEERDIR( + +SRCS( + static_codec_generator.cpp +) + +PEERDIR( library/cpp/codecs library/cpp/codecs/static library/cpp/codecs/static/tools/common library/cpp/digest/md5 library/cpp/getopt/small -) - -END() +) + +END() |