aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/codecs/static/tools/common/ct_common.cpp
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/codecs/static/tools/common/ct_common.cpp
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/codecs/static/tools/common/ct_common.cpp')
-rw-r--r--library/cpp/codecs/static/tools/common/ct_common.cpp74
1 files changed, 74 insertions, 0 deletions
diff --git a/library/cpp/codecs/static/tools/common/ct_common.cpp b/library/cpp/codecs/static/tools/common/ct_common.cpp
new file mode 100644
index 0000000000..fe77691280
--- /dev/null
+++ b/library/cpp/codecs/static/tools/common/ct_common.cpp
@@ -0,0 +1,74 @@
+#include "ct_common.h"
+
+#include <library/cpp/codecs/codecs.h>
+#include <library/cpp/codecs/static/static_codec_info.pb.h>
+#include <library/cpp/string_utils/base64/base64.h>
+
+#include <util/stream/output.h>
+#include <util/string/builder.h>
+#include <util/system/hp_timer.h>
+
+namespace NCodecs {
+ TString TComprStats::Format(const TStaticCodecInfo& info, bool checkMode) const {
+ TStringBuilder s;
+ s << "raw size/item: " << RawSizePerRecord() << Endl;
+ s << "enc.size/item: " << EncSizePerRecord() << Endl;
+ if (checkMode) {
+ s << "orig.enc.size/item: " << OldEncSizePerRecord(info.GetDebugInfo().GetCompression()) << Endl;
+ }
+ s << "enc time us/item: " << EncTimePerRecordUS() << Endl;
+ s << "dec time us/item: " << DecTimePerRecordUS() << Endl;
+ s << "dict size: " << info.GetStoredCodec().Size() << Endl;
+ s << "compression: " << AsPercent(Compression()) << " %" << Endl;
+ if (checkMode) {
+ s << "orig.compression: " << AsPercent(info.GetDebugInfo().GetCompression()) << " %" << Endl;
+ }
+ return s;
+ }
+
+ TComprStats TestCodec(const ICodec& c, const TVector<TString>& input) {
+ TComprStats stats;
+
+ TBuffer encodeBuffer;
+ TBuffer decodeBuffer;
+ for (const auto& data : input) {
+ encodeBuffer.Clear();
+ decodeBuffer.Clear();
+
+ stats.Records += 1;
+ stats.RawSize += data.size();
+
+ THPTimer timer;
+ c.Encode(data, encodeBuffer);
+ stats.EncSize += encodeBuffer.size();
+ stats.EncSeconds += timer.PassedReset();
+
+ c.Decode(TStringBuf{encodeBuffer.data(), encodeBuffer.size()}, decodeBuffer);
+ stats.DecSeconds += timer.PassedReset();
+ Y_ENSURE(data == TStringBuf(decodeBuffer.data(), decodeBuffer.size()), "invalid encoding at record " << stats.Records);
+ }
+
+ return stats;
+ }
+
+ void ParseBlob(TVector<TString>& result, EDataStreamFormat fmt, const TBlob& blob) {
+ TStringBuf bin(blob.AsCharPtr(), blob.Size());
+ TStringBuf line;
+ TString buffer;
+ while (bin.ReadLine(line)) {
+ if (DSF_BASE64_LF == fmt) {
+ Base64Decode(line, buffer);
+ line = buffer;
+ }
+ if (!line) {
+ continue;
+ }
+ result.emplace_back(line.data(), line.size());
+ }
+ }
+
+ TBlob GetInputBlob(const TString& dataFile) {
+ return dataFile && dataFile != "-" ? TBlob::FromFile(dataFile) : TBlob::FromStream(Cin);
+ }
+
+}