diff options
| author | monster <[email protected]> | 2022-07-07 14:41:37 +0300 |
|---|---|---|
| committer | monster <[email protected]> | 2022-07-07 14:41:37 +0300 |
| commit | 06e5c21a835c0e923506c4ff27929f34e00761c2 (patch) | |
| tree | 75efcbc6854ef9bd476eb8bf00cc5c900da436a2 /library/cpp/codecs/static/tools | |
| parent | 03f024c4412e3aa613bb543cf1660176320ba8f4 (diff) | |
fix ya.make
Diffstat (limited to 'library/cpp/codecs/static/tools')
8 files changed, 0 insertions, 336 deletions
diff --git a/library/cpp/codecs/static/tools/common/ct_common.cpp b/library/cpp/codecs/static/tools/common/ct_common.cpp deleted file mode 100644 index fe776912805..00000000000 --- a/library/cpp/codecs/static/tools/common/ct_common.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include "ct_common.h" - -#include <library/cpp/codecs/codecs.h> -#include <library/cpp/codecs/static/static_codec_info.pb.h> -#include <library/cpp/string_utils/base64/base64.h> - -#include <util/stream/output.h> -#include <util/string/builder.h> -#include <util/system/hp_timer.h> - -namespace NCodecs { - TString TComprStats::Format(const TStaticCodecInfo& info, bool checkMode) const { - TStringBuilder s; - s << "raw size/item: " << RawSizePerRecord() << Endl; - s << "enc.size/item: " << EncSizePerRecord() << Endl; - if (checkMode) { - s << "orig.enc.size/item: " << OldEncSizePerRecord(info.GetDebugInfo().GetCompression()) << Endl; - } - s << "enc time us/item: " << EncTimePerRecordUS() << Endl; - s << "dec time us/item: " << DecTimePerRecordUS() << Endl; - s << "dict size: " << info.GetStoredCodec().Size() << Endl; - s << "compression: " << AsPercent(Compression()) << " %" << Endl; - if (checkMode) { - s << "orig.compression: " << AsPercent(info.GetDebugInfo().GetCompression()) << " %" << Endl; - } - return s; - } - - TComprStats TestCodec(const ICodec& c, const TVector<TString>& input) { - TComprStats stats; - - TBuffer encodeBuffer; - TBuffer decodeBuffer; - for (const auto& data : input) { - encodeBuffer.Clear(); - decodeBuffer.Clear(); - - stats.Records += 1; - stats.RawSize += data.size(); - - THPTimer timer; - c.Encode(data, encodeBuffer); - stats.EncSize += encodeBuffer.size(); - stats.EncSeconds += timer.PassedReset(); - - c.Decode(TStringBuf{encodeBuffer.data(), encodeBuffer.size()}, decodeBuffer); - stats.DecSeconds += timer.PassedReset(); - Y_ENSURE(data == TStringBuf(decodeBuffer.data(), decodeBuffer.size()), "invalid encoding at record " << stats.Records); - } - - return stats; - } - - void ParseBlob(TVector<TString>& result, EDataStreamFormat fmt, const TBlob& blob) { - TStringBuf bin(blob.AsCharPtr(), blob.Size()); - TStringBuf line; - TString buffer; - while (bin.ReadLine(line)) { - if (DSF_BASE64_LF == fmt) { - Base64Decode(line, buffer); - line = buffer; - } - if (!line) { - continue; - } - result.emplace_back(line.data(), line.size()); - } - } - - TBlob GetInputBlob(const TString& dataFile) { - return dataFile && dataFile != "-" ? TBlob::FromFile(dataFile) : TBlob::FromStream(Cin); - } - -} diff --git a/library/cpp/codecs/static/tools/common/ct_common.h b/library/cpp/codecs/static/tools/common/ct_common.h deleted file mode 100644 index 9d3dcbda934..00000000000 --- a/library/cpp/codecs/static/tools/common/ct_common.h +++ /dev/null @@ -1,75 +0,0 @@ -#pragma once - -#include <util/generic/string.h> -#include <util/generic/vector.h> -#include <util/memory/blob.h> -#include <cmath> - -namespace NCodecs { - class TStaticCodecInfo; - class ICodec; - - struct TComprStats { - double EncSeconds = 0; - double DecSeconds = 0; - size_t Records = 0; - size_t RawSize = 0; - size_t EncSize = 0; - - static double Round(double n, size_t decPlaces = 2) { - double p = pow(10, decPlaces); - return round(n * p) / p; - } - - static double AsPercent(double n) { - return Round(n * 100); - } - - static double AsMicroSecond(double s) { - return s * 1000000; - } - - double PerRecord(double n) const { - return Round((double)(Records ? n / Records : 0)); - } - - double Compression() const { - return ((double)RawSize - (double)EncSize) / RawSize; - } - - double EncTimePerRecordUS() const { - return PerRecord(AsMicroSecond(EncSeconds)); - } - - double DecTimePerRecordUS() const { - return PerRecord(AsMicroSecond(DecSeconds)); - } - - double RawSizePerRecord() const { - return PerRecord(RawSize); - } - - double EncSizePerRecord() const { - return PerRecord(EncSize); - } - - double OldEncSizePerRecord(double compr) const { - return PerRecord((1 - compr) * RawSize); - } - - TString Format(const TStaticCodecInfo&, bool checkMode) const; - }; - - TComprStats TestCodec(const ICodec&, const TVector<TString>& data); - - enum EDataStreamFormat { - DSF_NONE, - DSF_PLAIN_LF /* "plain" */, - DSF_BASE64_LF /* "base64" */, - }; - - void ParseBlob(TVector<TString>&, EDataStreamFormat, const TBlob&); - - TBlob GetInputBlob(const TString& dataFile); - -} diff --git a/library/cpp/codecs/static/tools/static_codec_checker/README b/library/cpp/codecs/static/tools/static_codec_checker/README deleted file mode 100644 index 723a68300b0..00000000000 --- a/library/cpp/codecs/static/tools/static_codec_checker/README +++ /dev/null @@ -1,4 +0,0 @@ -This is a viewer for generated codec and utility for verification of the compression quality on a new data. - -Usage: -static_codec_checker -t -c 029b29ff64a74927.codec_info -f plain samples.txt diff --git a/library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp b/library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp deleted file mode 100644 index 9c8d568d823..00000000000 --- a/library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp +++ /dev/null @@ -1,73 +0,0 @@ -#include <library/cpp/codecs/static/tools/common/ct_common.h> -#include <library/cpp/codecs/static/static.h> -#include <library/cpp/codecs/static/static_codec_info.pb.h> -#include <library/cpp/codecs/codecs.h> -#include <library/cpp/getopt/small/last_getopt.h> - -#include <util/digest/city.h> -#include <util/generic/yexception.h> -#include <util/stream/file.h> -#include <util/stream/buffer.h> -#include <util/stream/format.h> -#include <util/string/builder.h> - -int main(int argc, char** argv) { - NCodecs::TCodecPtr codecPtr; - NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE; - TString codecFile; - bool testCompression = false; - - auto opts = NLastGetopt::TOpts::Default(); - opts.SetTitle("Prints a .codec_info file and optionally checks its performance on new data. See also static_codec_generator."); - opts.SetCmdLineDescr("-c 9089f3e9b7a0f0d4.codec_info -t -f base64 qtrees.sample.txt"); - NCodecs::TStaticCodecInfo codec; - - opts.AddLongOption('c', "codec-info").RequiredArgument("codec_info").Handler1T<TString>([&codecFile, &codec, &codecPtr](TString name) { - codecFile = name; - codec.CopyFrom(NCodecs::LoadCodecInfoFromString(TUnbufferedFileInput(name).ReadAll())); - codecPtr = NCodecs::ICodec::RestoreFromString(codec.GetStoredCodec()); - }) - .Required() - .Help(".codec_info file with serialized static data for codec"); - - opts.AddLongOption('t', "test").NoArgument().StoreValue(&testCompression, true).Optional().Help("test current performance"); - - opts.AddLongOption('f', "format").RequiredArgument(TStringBuilder() << "(" << NCodecs::DSF_PLAIN_LF << "|" << NCodecs::DSF_BASE64_LF << ")").StoreResult(&fmt).Optional().Help("test set input file format"); - - opts.SetFreeArgsMin(0); - opts.SetFreeArgTitle(0, "testing_set_input_file", "testing set input files"); - - NLastGetopt::TOptsParseResult res(&opts, argc, argv); - - Cout << codecFile << Endl; - Cout << NCodecs::FormatCodecInfo(codec) << Endl; - - if (testCompression) { - if (NCodecs::DSF_NONE == fmt) { - Cerr << "Specify format (-f|--format) for testing set input" << Endl; - exit(1); - } - - Cout << "Reading testing set data ... " << Flush; - - TVector<TString> allData; - for (const auto& freeArg : res.GetFreeArgs()) { - NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg)); - } - - if (!res.GetFreeArgs()) { - NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-")); - } - - Cout << "Done" << Endl << Endl; - - Cout << "records: " << allData.size() << Endl; - Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl; - - Cout << "Testing compression ... " << Flush; - auto stats = NCodecs::TestCodec(*codecPtr, allData); - Cout << "Done" << Endl << Endl; - - Cout << stats.Format(codec, true) << Endl; - } -} diff --git a/library/cpp/codecs/static/tools/static_codec_generator/README b/library/cpp/codecs/static/tools/static_codec_generator/README deleted file mode 100644 index e6bb52b9591..00000000000 --- a/library/cpp/codecs/static/tools/static_codec_generator/README +++ /dev/null @@ -1,4 +0,0 @@ -This is a utility for reproducible teaching of a codec. And also for saving it into a file with a unique name for a static compilation as a resource. - -Usage: -static_codec_generator -t -m 'the training data description' -f plain samples.txt diff --git a/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp b/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp deleted file mode 100644 index 45fdb5c5fe8..00000000000 --- a/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp +++ /dev/null @@ -1,82 +0,0 @@ -#include <library/cpp/codecs/static/tools/common/ct_common.h> -#include <library/cpp/codecs/static/static_codec_info.pb.h> -#include <library/cpp/codecs/static/builder.h> -#include <library/cpp/codecs/codecs.h> - -#include <library/cpp/getopt/small/last_getopt.h> - -#include <util/generic/yexception.h> -#include <util/stream/file.h> -#include <util/string/builder.h> - -int main(int argc, char** argv) { - NCodecs::TCodecBuildInfo info; - NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE; - - auto opts = NLastGetopt::TOpts::Default(); - opts.SetCmdLineDescr("-m 'Training set: 100000 qtrees taken from web mmeta logs' -f base64 qtrees.sample.txt"); - opts.SetTitle("Teaches the codec and serializes it as a file named CODECNAME.hash(CODECDATA).bin"); - - opts.AddLongOption('m', "message").RequiredArgument("training_set_comment").StoreResult(&info.TrainingSetComment).Required().Help("a human description for the training set"); - - opts.AddLongOption('r', "resource").RequiredArgument("training_set_res_id").StoreResult(&info.TrainingSetResId).Optional().Help("sandbox resource id for the training set"); - - opts.AddLongOption('c', "codec").RequiredArgument("codec_name").StoreResult(&info.CodecName).Optional().DefaultValue(info.CodecName); - - opts.AddLongOption('s', "sample-multiplier").RequiredArgument("multiplier").StoreResult(&info.SampleSizeMultiplier).Optional().DefaultValue(ToString(info.SampleSizeMultiplier)).Help("multiplier for default sample size"); - - opts.AddLongOption('f', "format").RequiredArgument(TStringBuilder() << "(" << NCodecs::DSF_PLAIN_LF << "|" << NCodecs::DSF_BASE64_LF << ")").StoreResult(&fmt).Required().Help("training set input file format"); - - opts.AddLongOption("list-codecs").NoArgument().Handler0([]() { - Cout << JoinStrings(NCodecs::ICodec::GetCodecsList(), "\n") << Endl; - exit(0); - }) - .Optional() - .Help("list available codecs"); - - opts.AddLongOption("fake-revision").RequiredArgument("revision").StoreResult(&info.RevisionInfo).Optional().Hidden(); // replace static_codec_generator revision in debug info - - opts.AddLongOption("fake-timestamp").RequiredArgument("timestamp").StoreResult(&info.Timestamp).Optional().Hidden(); // replace generating timestamp in debug info - - opts.SetFreeArgsMin(0); - opts.SetFreeArgTitle(0, "training_set_input_file", "training set input files"); - - NLastGetopt::TOptsParseResult res(&opts, argc, argv); - - Cout << "Reading training set data ... " << Flush; - TVector<TString> allData; - for (const auto& freeArg : res.GetFreeArgs()) { - NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg)); - } - - if (!res.GetFreeArgs()) { - NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-")); - } - Cout << "Done" << Endl << Endl; - - Cout << "records: " << allData.size() << Endl; - Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl; - - Cout << "Training " << info.CodecName << " , sample size multiplier is " << info.SampleSizeMultiplier << " ... " << Flush; - auto codec = NCodecs::BuildStaticCodec(allData, info); - Cout << "Done" << Endl; - - TString codecName = NCodecs::GetStandardFileName(codec); - NCodecs::TCodecPtr codecPtr = NCodecs::ICodec::RestoreFromString(codec.GetStoredCodec()); - - Cout << "Testing compression ... " << Flush; - auto stats = NCodecs::TestCodec(*codecPtr, allData); - Cout << "Done" << Endl << Endl; - - codec.MutableDebugInfo()->SetCompression(stats.Compression()); - - Cout << stats.Format(codec, false) << Endl; - - Cout << "Saving as " << codecName << " ... " << Flush; - { - TUnbufferedFileOutput fout{codecName}; - NCodecs::SaveCodecInfoToStream(fout, codec); - fout.Finish(); - } - Cout << "Done" << Endl << Endl; -} diff --git a/library/cpp/codecs/static/tools/tests/canondata/result.json b/library/cpp/codecs/static/tools/tests/canondata/result.json deleted file mode 100644 index 7a637c6763a..00000000000 --- a/library/cpp/codecs/static/tools/tests/canondata/result.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "static_codec_tools.test_static_codec_tools": { - "checksum": "960e3c8c57fb846ab53ccbd07e287233", - "uri": "sbr://144512644/static_codec_tools.test_static_codec_tools/solar-8k-a.huffman.1467494385.codec_info" - } -}
\ No newline at end of file diff --git a/library/cpp/codecs/static/tools/tests/static_codec_tools.py b/library/cpp/codecs/static/tools/tests/static_codec_tools.py deleted file mode 100644 index db4140e3703..00000000000 --- a/library/cpp/codecs/static/tools/tests/static_codec_tools.py +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python - -import yatest.common as tt -import os.path as op - -def test_static_codec_tools(): - tt.execute([tt.binary_path("library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator")] - + ["-m", "test codec", "-r", "sbr://143310406", "-f", "plain", "-c", "solar-8k-a:huffman", "-s", "1", - "--fake-revision", "r2385905", "--fake-timestamp", "1467494385", "sample.txt"], - timeout=60) - assert(op.exists("solar-8k-a.huffman.1467494385.codec_info")) - tt.canonical_execute(tt.binary_path("library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker"), - args=["-c", "solar-8k-a.huffman.1467494385.codec_info"], - timeout=60) - tt.execute([tt.binary_path("library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker")] - + ["-c", "solar-8k-a.huffman.1467494385.codec_info", "-f", "plain", "-t", "sample.txt"], - timeout=60) - return tt.canonical_file("solar-8k-a.huffman.1467494385.codec_info") |
