summaryrefslogtreecommitdiffstats
path: root/library/cpp/codecs/static/tools/common
diff options
context:
space:
mode:
authorDevtools Arcadia <[email protected]>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <[email protected]>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/codecs/static/tools/common
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/codecs/static/tools/common')
-rw-r--r--library/cpp/codecs/static/tools/common/ct_common.cpp74
-rw-r--r--library/cpp/codecs/static/tools/common/ct_common.h75
-rw-r--r--library/cpp/codecs/static/tools/common/ya.make19
3 files changed, 168 insertions, 0 deletions
diff --git a/library/cpp/codecs/static/tools/common/ct_common.cpp b/library/cpp/codecs/static/tools/common/ct_common.cpp
new file mode 100644
index 00000000000..fe776912805
--- /dev/null
+++ b/library/cpp/codecs/static/tools/common/ct_common.cpp
@@ -0,0 +1,74 @@
+#include "ct_common.h"
+
+#include <library/cpp/codecs/codecs.h>
+#include <library/cpp/codecs/static/static_codec_info.pb.h>
+#include <library/cpp/string_utils/base64/base64.h>
+
+#include <util/stream/output.h>
+#include <util/string/builder.h>
+#include <util/system/hp_timer.h>
+
+namespace NCodecs {
+ TString TComprStats::Format(const TStaticCodecInfo& info, bool checkMode) const {
+ TStringBuilder s;
+ s << "raw size/item: " << RawSizePerRecord() << Endl;
+ s << "enc.size/item: " << EncSizePerRecord() << Endl;
+ if (checkMode) {
+ s << "orig.enc.size/item: " << OldEncSizePerRecord(info.GetDebugInfo().GetCompression()) << Endl;
+ }
+ s << "enc time us/item: " << EncTimePerRecordUS() << Endl;
+ s << "dec time us/item: " << DecTimePerRecordUS() << Endl;
+ s << "dict size: " << info.GetStoredCodec().Size() << Endl;
+ s << "compression: " << AsPercent(Compression()) << " %" << Endl;
+ if (checkMode) {
+ s << "orig.compression: " << AsPercent(info.GetDebugInfo().GetCompression()) << " %" << Endl;
+ }
+ return s;
+ }
+
+ TComprStats TestCodec(const ICodec& c, const TVector<TString>& input) {
+ TComprStats stats;
+
+ TBuffer encodeBuffer;
+ TBuffer decodeBuffer;
+ for (const auto& data : input) {
+ encodeBuffer.Clear();
+ decodeBuffer.Clear();
+
+ stats.Records += 1;
+ stats.RawSize += data.size();
+
+ THPTimer timer;
+ c.Encode(data, encodeBuffer);
+ stats.EncSize += encodeBuffer.size();
+ stats.EncSeconds += timer.PassedReset();
+
+ c.Decode(TStringBuf{encodeBuffer.data(), encodeBuffer.size()}, decodeBuffer);
+ stats.DecSeconds += timer.PassedReset();
+ Y_ENSURE(data == TStringBuf(decodeBuffer.data(), decodeBuffer.size()), "invalid encoding at record " << stats.Records);
+ }
+
+ return stats;
+ }
+
+ void ParseBlob(TVector<TString>& result, EDataStreamFormat fmt, const TBlob& blob) {
+ TStringBuf bin(blob.AsCharPtr(), blob.Size());
+ TStringBuf line;
+ TString buffer;
+ while (bin.ReadLine(line)) {
+ if (DSF_BASE64_LF == fmt) {
+ Base64Decode(line, buffer);
+ line = buffer;
+ }
+ if (!line) {
+ continue;
+ }
+ result.emplace_back(line.data(), line.size());
+ }
+ }
+
+ TBlob GetInputBlob(const TString& dataFile) {
+ return dataFile && dataFile != "-" ? TBlob::FromFile(dataFile) : TBlob::FromStream(Cin);
+ }
+
+}
diff --git a/library/cpp/codecs/static/tools/common/ct_common.h b/library/cpp/codecs/static/tools/common/ct_common.h
new file mode 100644
index 00000000000..9d3dcbda934
--- /dev/null
+++ b/library/cpp/codecs/static/tools/common/ct_common.h
@@ -0,0 +1,75 @@
+#pragma once
+
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+#include <util/memory/blob.h>
+#include <cmath>
+
+namespace NCodecs {
+ class TStaticCodecInfo;
+ class ICodec;
+
+ struct TComprStats {
+ double EncSeconds = 0;
+ double DecSeconds = 0;
+ size_t Records = 0;
+ size_t RawSize = 0;
+ size_t EncSize = 0;
+
+ static double Round(double n, size_t decPlaces = 2) {
+ double p = pow(10, decPlaces);
+ return round(n * p) / p;
+ }
+
+ static double AsPercent(double n) {
+ return Round(n * 100);
+ }
+
+ static double AsMicroSecond(double s) {
+ return s * 1000000;
+ }
+
+ double PerRecord(double n) const {
+ return Round((double)(Records ? n / Records : 0));
+ }
+
+ double Compression() const {
+ return ((double)RawSize - (double)EncSize) / RawSize;
+ }
+
+ double EncTimePerRecordUS() const {
+ return PerRecord(AsMicroSecond(EncSeconds));
+ }
+
+ double DecTimePerRecordUS() const {
+ return PerRecord(AsMicroSecond(DecSeconds));
+ }
+
+ double RawSizePerRecord() const {
+ return PerRecord(RawSize);
+ }
+
+ double EncSizePerRecord() const {
+ return PerRecord(EncSize);
+ }
+
+ double OldEncSizePerRecord(double compr) const {
+ return PerRecord((1 - compr) * RawSize);
+ }
+
+ TString Format(const TStaticCodecInfo&, bool checkMode) const;
+ };
+
+ TComprStats TestCodec(const ICodec&, const TVector<TString>& data);
+
+ enum EDataStreamFormat {
+ DSF_NONE,
+ DSF_PLAIN_LF /* "plain" */,
+ DSF_BASE64_LF /* "base64" */,
+ };
+
+ void ParseBlob(TVector<TString>&, EDataStreamFormat, const TBlob&);
+
+ TBlob GetInputBlob(const TString& dataFile);
+
+}
diff --git a/library/cpp/codecs/static/tools/common/ya.make b/library/cpp/codecs/static/tools/common/ya.make
new file mode 100644
index 00000000000..d624222dad0
--- /dev/null
+++ b/library/cpp/codecs/static/tools/common/ya.make
@@ -0,0 +1,19 @@
+LIBRARY()
+
+OWNER(velavokr)
+
+SRCS(
+ ct_common.cpp
+)
+
+PEERDIR(
+ library/cpp/codecs
+ library/cpp/codecs/static
+ library/cpp/getopt/small
+ library/cpp/string_utils/base64
+ util/draft
+)
+
+GENERATE_ENUM_SERIALIZATION(ct_common.h)
+
+END()