diff options
author | Devtools Arcadia <[email protected]> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <[email protected]> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/codecs/static/tools/common |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/codecs/static/tools/common')
-rw-r--r-- | library/cpp/codecs/static/tools/common/ct_common.cpp | 74 | ||||
-rw-r--r-- | library/cpp/codecs/static/tools/common/ct_common.h | 75 | ||||
-rw-r--r-- | library/cpp/codecs/static/tools/common/ya.make | 19 |
3 files changed, 168 insertions, 0 deletions
diff --git a/library/cpp/codecs/static/tools/common/ct_common.cpp b/library/cpp/codecs/static/tools/common/ct_common.cpp new file mode 100644 index 00000000000..fe776912805 --- /dev/null +++ b/library/cpp/codecs/static/tools/common/ct_common.cpp @@ -0,0 +1,74 @@ +#include "ct_common.h" + +#include <library/cpp/codecs/codecs.h> +#include <library/cpp/codecs/static/static_codec_info.pb.h> +#include <library/cpp/string_utils/base64/base64.h> + +#include <util/stream/output.h> +#include <util/string/builder.h> +#include <util/system/hp_timer.h> + +namespace NCodecs { + TString TComprStats::Format(const TStaticCodecInfo& info, bool checkMode) const { + TStringBuilder s; + s << "raw size/item: " << RawSizePerRecord() << Endl; + s << "enc.size/item: " << EncSizePerRecord() << Endl; + if (checkMode) { + s << "orig.enc.size/item: " << OldEncSizePerRecord(info.GetDebugInfo().GetCompression()) << Endl; + } + s << "enc time us/item: " << EncTimePerRecordUS() << Endl; + s << "dec time us/item: " << DecTimePerRecordUS() << Endl; + s << "dict size: " << info.GetStoredCodec().Size() << Endl; + s << "compression: " << AsPercent(Compression()) << " %" << Endl; + if (checkMode) { + s << "orig.compression: " << AsPercent(info.GetDebugInfo().GetCompression()) << " %" << Endl; + } + return s; + } + + TComprStats TestCodec(const ICodec& c, const TVector<TString>& input) { + TComprStats stats; + + TBuffer encodeBuffer; + TBuffer decodeBuffer; + for (const auto& data : input) { + encodeBuffer.Clear(); + decodeBuffer.Clear(); + + stats.Records += 1; + stats.RawSize += data.size(); + + THPTimer timer; + c.Encode(data, encodeBuffer); + stats.EncSize += encodeBuffer.size(); + stats.EncSeconds += timer.PassedReset(); + + c.Decode(TStringBuf{encodeBuffer.data(), encodeBuffer.size()}, decodeBuffer); + stats.DecSeconds += timer.PassedReset(); + Y_ENSURE(data == TStringBuf(decodeBuffer.data(), decodeBuffer.size()), "invalid encoding at record " << stats.Records); + } + + return stats; + } + + void ParseBlob(TVector<TString>& result, EDataStreamFormat fmt, const TBlob& blob) { + TStringBuf bin(blob.AsCharPtr(), blob.Size()); + TStringBuf line; + TString buffer; + while (bin.ReadLine(line)) { + if (DSF_BASE64_LF == fmt) { + Base64Decode(line, buffer); + line = buffer; + } + if (!line) { + continue; + } + result.emplace_back(line.data(), line.size()); + } + } + + TBlob GetInputBlob(const TString& dataFile) { + return dataFile && dataFile != "-" ? TBlob::FromFile(dataFile) : TBlob::FromStream(Cin); + } + +} diff --git a/library/cpp/codecs/static/tools/common/ct_common.h b/library/cpp/codecs/static/tools/common/ct_common.h new file mode 100644 index 00000000000..9d3dcbda934 --- /dev/null +++ b/library/cpp/codecs/static/tools/common/ct_common.h @@ -0,0 +1,75 @@ +#pragma once + +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/memory/blob.h> +#include <cmath> + +namespace NCodecs { + class TStaticCodecInfo; + class ICodec; + + struct TComprStats { + double EncSeconds = 0; + double DecSeconds = 0; + size_t Records = 0; + size_t RawSize = 0; + size_t EncSize = 0; + + static double Round(double n, size_t decPlaces = 2) { + double p = pow(10, decPlaces); + return round(n * p) / p; + } + + static double AsPercent(double n) { + return Round(n * 100); + } + + static double AsMicroSecond(double s) { + return s * 1000000; + } + + double PerRecord(double n) const { + return Round((double)(Records ? n / Records : 0)); + } + + double Compression() const { + return ((double)RawSize - (double)EncSize) / RawSize; + } + + double EncTimePerRecordUS() const { + return PerRecord(AsMicroSecond(EncSeconds)); + } + + double DecTimePerRecordUS() const { + return PerRecord(AsMicroSecond(DecSeconds)); + } + + double RawSizePerRecord() const { + return PerRecord(RawSize); + } + + double EncSizePerRecord() const { + return PerRecord(EncSize); + } + + double OldEncSizePerRecord(double compr) const { + return PerRecord((1 - compr) * RawSize); + } + + TString Format(const TStaticCodecInfo&, bool checkMode) const; + }; + + TComprStats TestCodec(const ICodec&, const TVector<TString>& data); + + enum EDataStreamFormat { + DSF_NONE, + DSF_PLAIN_LF /* "plain" */, + DSF_BASE64_LF /* "base64" */, + }; + + void ParseBlob(TVector<TString>&, EDataStreamFormat, const TBlob&); + + TBlob GetInputBlob(const TString& dataFile); + +} diff --git a/library/cpp/codecs/static/tools/common/ya.make b/library/cpp/codecs/static/tools/common/ya.make new file mode 100644 index 00000000000..d624222dad0 --- /dev/null +++ b/library/cpp/codecs/static/tools/common/ya.make @@ -0,0 +1,19 @@ +LIBRARY() + +OWNER(velavokr) + +SRCS( + ct_common.cpp +) + +PEERDIR( + library/cpp/codecs + library/cpp/codecs/static + library/cpp/getopt/small + library/cpp/string_utils/base64 + util/draft +) + +GENERATE_ENUM_SERIALIZATION(ct_common.h) + +END() |