diff options
author | Ruslan Kovalev <ruslan.a.kovalev@gmail.com> | 2022-02-10 16:46:44 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:44 +0300 |
commit | 59e19371de37995fcb36beb16cd6ec030af960bc (patch) | |
tree | fa68e36093ebff8b805462e9e6d331fe9d348214 /library/cpp/codecs/static | |
parent | 89db6fe2fe2c32d2a832ddfeb04e8d078e301084 (diff) | |
download | ydb-59e19371de37995fcb36beb16cd6ec030af960bc.tar.gz |
Restoring authorship annotation for Ruslan Kovalev <ruslan.a.kovalev@gmail.com>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/codecs/static')
25 files changed, 687 insertions, 687 deletions
diff --git a/library/cpp/codecs/static/builder.cpp b/library/cpp/codecs/static/builder.cpp index 93e34a3edb..083f0fc6f6 100644 --- a/library/cpp/codecs/static/builder.cpp +++ b/library/cpp/codecs/static/builder.cpp @@ -1,39 +1,39 @@ -#include "builder.h" -#include "common.h" - +#include "builder.h" +#include "common.h" + #include <library/cpp/codecs/static/static_codec_info.pb.h> - + #include <library/cpp/codecs/codecs.h> - -#include <util/generic/yexception.h> -#include <util/string/subst.h> - -namespace NCodecs { + +#include <util/generic/yexception.h> +#include <util/string/subst.h> + +namespace NCodecs { TStaticCodecInfo BuildStaticCodec(const TVector<TString>& trainingData, const TCodecBuildInfo& info) { - TStaticCodecInfo result; - TCodecPtr codec = ICodec::GetInstance(info.CodecName); - Y_ENSURE_EX(codec, TCodecException() << "empty codec is not allowed"); - - codec->LearnX(trainingData.begin(), trainingData.end(), info.SampleSizeMultiplier); - { - TStringOutput sout{*result.MutableStoredCodec()}; - ICodec::Store(&sout, codec); - } - - auto& debugInfo = *result.MutableDebugInfo(); - debugInfo.SetStoredCodecHash(DataSignature(result.GetStoredCodec())); - debugInfo.SetCodecName(info.CodecName); - debugInfo.SetSampleSizeMultiplier(info.SampleSizeMultiplier); - debugInfo.SetTimestamp(info.Timestamp); - debugInfo.SetRevisionInfo(info.RevisionInfo); - debugInfo.SetTrainingSetComment(info.TrainingSetComment); - debugInfo.SetTrainingSetResId(info.TrainingSetResId); - return result; - } - + TStaticCodecInfo result; + TCodecPtr codec = ICodec::GetInstance(info.CodecName); + Y_ENSURE_EX(codec, TCodecException() << "empty codec is not allowed"); + + codec->LearnX(trainingData.begin(), trainingData.end(), info.SampleSizeMultiplier); + { + TStringOutput sout{*result.MutableStoredCodec()}; + ICodec::Store(&sout, codec); + } + + auto& debugInfo = *result.MutableDebugInfo(); + debugInfo.SetStoredCodecHash(DataSignature(result.GetStoredCodec())); + debugInfo.SetCodecName(info.CodecName); + debugInfo.SetSampleSizeMultiplier(info.SampleSizeMultiplier); + debugInfo.SetTimestamp(info.Timestamp); + debugInfo.SetRevisionInfo(info.RevisionInfo); + debugInfo.SetTrainingSetComment(info.TrainingSetComment); + debugInfo.SetTrainingSetResId(info.TrainingSetResId); + return result; + } + TString GetStandardFileName(const TStaticCodecInfo& info) { TString cName = info.GetDebugInfo().GetCodecName(); - SubstGlobal(cName, ':', '.'); - return TStringBuilder() << cName << "." << info.GetDebugInfo().GetTimestamp() << ".codec_info"; - } -} + SubstGlobal(cName, ':', '.'); + return TStringBuilder() << cName << "." << info.GetDebugInfo().GetTimestamp() << ".codec_info"; + } +} diff --git a/library/cpp/codecs/static/builder.h b/library/cpp/codecs/static/builder.h index d7533be4d5..234ad42dff 100644 --- a/library/cpp/codecs/static/builder.h +++ b/library/cpp/codecs/static/builder.h @@ -1,29 +1,29 @@ -#pragma once - -#include "static.h" - +#pragma once + +#include "static.h" + #include <library/cpp/svnversion/svnversion.h> - -#include <util/datetime/base.h> + +#include <util/datetime/base.h> #include <util/generic/string.h> -#include <util/generic/vector.h> -#include <util/string/builder.h> - -namespace NCodecs { - struct TCodecBuildInfo { - // optimal values from SEARCH-1655 +#include <util/generic/vector.h> +#include <util/string/builder.h> + +namespace NCodecs { + struct TCodecBuildInfo { + // optimal values from SEARCH-1655 TString CodecName = "solar-8k-a:zstd08d-1"; - float SampleSizeMultiplier = 1; - - // debug info: - time_t Timestamp = TInstant::Now().TimeT(); + float SampleSizeMultiplier = 1; + + // debug info: + time_t Timestamp = TInstant::Now().TimeT(); TString RevisionInfo = (TStringBuilder() << "r" << ToString(GetProgramSvnRevision())); TString TrainingSetComment; // a human comment on the training data TString TrainingSetResId; // sandbox resid of the training set - }; - + }; + TStaticCodecInfo BuildStaticCodec(const TVector<TString>& trainingData, const TCodecBuildInfo&); - + TString GetStandardFileName(const TStaticCodecInfo&); - -} + +} diff --git a/library/cpp/codecs/static/common.h b/library/cpp/codecs/static/common.h index 211de2a27d..84b0349d82 100644 --- a/library/cpp/codecs/static/common.h +++ b/library/cpp/codecs/static/common.h @@ -1,32 +1,32 @@ -#pragma once - -#include <util/string/hex.h> -#include <util/digest/city.h> -#include <util/system/byteorder.h> - -namespace NCodecs { - template <class T> - ui64 DataSignature(const T& t) { - static_assert(!std::is_scalar<T>::value, "no scalars"); +#pragma once + +#include <util/string/hex.h> +#include <util/digest/city.h> +#include <util/system/byteorder.h> + +namespace NCodecs { + template <class T> + ui64 DataSignature(const T& t) { + static_assert(!std::is_scalar<T>::value, "no scalars"); return CityHash64(t.data(), t.size()); - } - - template <class T> + } + + template <class T> TString HexWriteScalar(T t) { - static_assert(std::is_scalar<T>::value, "scalars only"); - t = LittleToBig(t); + static_assert(std::is_scalar<T>::value, "scalars only"); + t = LittleToBig(t); TString res = HexEncode(&t, sizeof(t)); - res.to_lower(); - return res; - } - - template <class T> - T HexReadScalar(TStringBuf s) { - static_assert(std::is_scalar<T>::value, "scalars only"); - T t = 0; + res.to_lower(); + return res; + } + + template <class T> + T HexReadScalar(TStringBuf s) { + static_assert(std::is_scalar<T>::value, "scalars only"); + T t = 0; HexDecode(s.data(), Min(s.size(), sizeof(T)), &t); - t = BigToLittle(t); - return t; - } - -} + t = BigToLittle(t); + return t; + } + +} diff --git a/library/cpp/codecs/static/example/example.cpp b/library/cpp/codecs/static/example/example.cpp index 5b750b717e..0c50a1a5be 100644 --- a/library/cpp/codecs/static/example/example.cpp +++ b/library/cpp/codecs/static/example/example.cpp @@ -1,43 +1,43 @@ -#include "example.h" - +#include "example.h" + #include <library/cpp/codecs/static/static.h> - -#include <util/generic/yexception.h> - -extern "C" { + +#include <util/generic/yexception.h> + +extern "C" { extern const ui8 codec_info_huff_20160707[]; extern const ui32 codec_info_huff_20160707Size; extern const ui8 codec_info_sa_huff_20160707[]; extern const ui32 codec_info_sa_huff_20160707Size; -}; - -namespace NStaticCodecExample { - static const NCodecs::TCodecConstPtr CODECS[] = { - nullptr, - NCodecs::RestoreCodecFromArchive(codec_info_huff_20160707, codec_info_huff_20160707Size), - NCodecs::RestoreCodecFromArchive(codec_info_sa_huff_20160707, codec_info_sa_huff_20160707Size), - }; - - static_assert(Y_ARRAY_SIZE(CODECS) == DV_COUNT, "bad array size"); - - void Encode(TBuffer& out, TStringBuf in, EDictVersion dv) { - Y_ENSURE(dv > DV_NULL && dv < DV_COUNT, "invalid dict version: " << (int)dv); - out.Clear(); - if (!in) { - return; - } - CODECS[dv]->Encode(in, out); - out.Append((char)dv); - } - - void Decode(TBuffer& out, TStringBuf in) { - out.Clear(); - if (!in) { - return; - } - EDictVersion dv = (EDictVersion)in.back(); - Y_ENSURE(dv > DV_NULL && dv < DV_COUNT, "invalid dict version: " << (int)dv); - in.Chop(1); - CODECS[dv]->Decode(in, out); - } -} +}; + +namespace NStaticCodecExample { + static const NCodecs::TCodecConstPtr CODECS[] = { + nullptr, + NCodecs::RestoreCodecFromArchive(codec_info_huff_20160707, codec_info_huff_20160707Size), + NCodecs::RestoreCodecFromArchive(codec_info_sa_huff_20160707, codec_info_sa_huff_20160707Size), + }; + + static_assert(Y_ARRAY_SIZE(CODECS) == DV_COUNT, "bad array size"); + + void Encode(TBuffer& out, TStringBuf in, EDictVersion dv) { + Y_ENSURE(dv > DV_NULL && dv < DV_COUNT, "invalid dict version: " << (int)dv); + out.Clear(); + if (!in) { + return; + } + CODECS[dv]->Encode(in, out); + out.Append((char)dv); + } + + void Decode(TBuffer& out, TStringBuf in) { + out.Clear(); + if (!in) { + return; + } + EDictVersion dv = (EDictVersion)in.back(); + Y_ENSURE(dv > DV_NULL && dv < DV_COUNT, "invalid dict version: " << (int)dv); + in.Chop(1); + CODECS[dv]->Decode(in, out); + } +} diff --git a/library/cpp/codecs/static/example/example.h b/library/cpp/codecs/static/example/example.h index f9b3a7324b..070ca90f02 100644 --- a/library/cpp/codecs/static/example/example.h +++ b/library/cpp/codecs/static/example/example.h @@ -1,17 +1,17 @@ -#pragma once - -#include <util/generic/strbuf.h> -#include <util/generic/buffer.h> - -namespace NStaticCodecExample { +#pragma once + +#include <util/generic/strbuf.h> +#include <util/generic/buffer.h> + +namespace NStaticCodecExample { enum EDictVersion : ui8 { DV_NULL = 0, DV_HUFF_20160707, DV_SA_HUFF_20160707, DV_COUNT - }; - - void Encode(TBuffer&, TStringBuf, EDictVersion dv = DV_SA_HUFF_20160707); - - void Decode(TBuffer&, TStringBuf); -} + }; + + void Encode(TBuffer&, TStringBuf, EDictVersion dv = DV_SA_HUFF_20160707); + + void Decode(TBuffer&, TStringBuf); +} diff --git a/library/cpp/codecs/static/example/ya.make b/library/cpp/codecs/static/example/ya.make index ca6c5fd900..85dc222624 100644 --- a/library/cpp/codecs/static/example/ya.make +++ b/library/cpp/codecs/static/example/ya.make @@ -1,24 +1,24 @@ -LIBRARY() - -OWNER(velavokr) - -SRCS( - GLOBAL example.cpp -) - -PEERDIR( +LIBRARY() + +OWNER(velavokr) + +SRCS( + GLOBAL example.cpp +) + +PEERDIR( library/cpp/codecs library/cpp/codecs/static -) - -ARCHIVE_ASM( +) + +ARCHIVE_ASM( "solar-8k-a.huffman.1467494385.codec_info" NAME codec_info_sa_huff_20160707 -) - -ARCHIVE_ASM( +) + +ARCHIVE_ASM( "huffman.1467494385.codec_info" NAME codec_info_huff_20160707 -) - -END() +) + +END() diff --git a/library/cpp/codecs/static/static.cpp b/library/cpp/codecs/static/static.cpp index 44a07dd73a..d2c99a15ee 100644 --- a/library/cpp/codecs/static/static.cpp +++ b/library/cpp/codecs/static/static.cpp @@ -1,98 +1,98 @@ -#include "static.h" -#include "common.h" - +#include "static.h" +#include "common.h" + #include <library/cpp/codecs/static/static_codec_info.pb.h> #include <library/cpp/archive/yarchive.h> - -#include <util/draft/datetime.h> - -#include <util/string/builder.h> -#include <util/stream/buffer.h> -#include <util/stream/mem.h> -#include <util/string/hex.h> -#include <util/ysaveload.h> - -namespace NCodecs { + +#include <util/draft/datetime.h> + +#include <util/string/builder.h> +#include <util/stream/buffer.h> +#include <util/stream/mem.h> +#include <util/string/hex.h> +#include <util/ysaveload.h> + +namespace NCodecs { static constexpr TStringBuf STATIC_CODEC_INFO_MAGIC = "CodecInf"; - - static TStringBuf GetStaticCodecInfoMagic() { + + static TStringBuf GetStaticCodecInfoMagic() { return STATIC_CODEC_INFO_MAGIC; - } - + } + void SaveCodecInfoToStream(IOutputStream& out, const TStaticCodecInfo& info) { - TBufferOutput bout; + TBufferOutput bout; info.SerializeToArcadiaStream(&bout); - ui64 hash = DataSignature(bout.Buffer()); - out.Write(GetStaticCodecInfoMagic()); - ::Save(&out, hash); - ::Save(&out, bout.Buffer()); - } - + ui64 hash = DataSignature(bout.Buffer()); + out.Write(GetStaticCodecInfoMagic()); + ::Save(&out, hash); + ::Save(&out, bout.Buffer()); + } + TStaticCodecInfo LoadCodecInfoFromStream(IInputStream& in) { - { - TBuffer magic; + { + TBuffer magic; magic.Resize(GetStaticCodecInfoMagic().size()); Y_ENSURE_EX(in.Read(magic.Data(), GetStaticCodecInfoMagic().size()) == GetStaticCodecInfoMagic().size(), - TCodecException() << "bad codec info"); + TCodecException() << "bad codec info"); Y_ENSURE_EX(TStringBuf(magic.data(), magic.size()) == GetStaticCodecInfoMagic(), - TCodecException() << "bad codec info"); - } - - ui64 hash; - ::Load(&in, hash); - TBuffer info; - ::Load(&in, info); - Y_ENSURE_EX(hash == DataSignature(info), TCodecException() << "bad codec info"); - - TStaticCodecInfo result; + TCodecException() << "bad codec info"); + } + + ui64 hash; + ::Load(&in, hash); + TBuffer info; + ::Load(&in, info); + Y_ENSURE_EX(hash == DataSignature(info), TCodecException() << "bad codec info"); + + TStaticCodecInfo result; Y_ENSURE_EX(result.ParseFromArray(info.data(), info.size()), TCodecException() << "bad codec info"); - - return result; - } - + + return result; + } + TString SaveCodecInfoToString(const TStaticCodecInfo& info) { - TStringStream s; - SaveCodecInfoToStream(s, info); - return s.Str(); - } - - TStaticCodecInfo LoadCodecInfoFromString(TStringBuf data) { + TStringStream s; + SaveCodecInfoToStream(s, info); + return s.Str(); + } + + TStaticCodecInfo LoadCodecInfoFromString(TStringBuf data) { TMemoryInput m{data.data(), data.size()}; - return LoadCodecInfoFromStream(m); - } - + return LoadCodecInfoFromStream(m); + } + TString FormatCodecInfo(const TStaticCodecInfo& ci) { - TStringBuilder s; - s << "codec name: " << ci.GetDebugInfo().GetCodecName() << Endl; - s << "codec hash: " << HexWriteScalar(ci.GetDebugInfo().GetStoredCodecHash()) << Endl; - s << "dict size: " << ci.GetStoredCodec().Size() << Endl; - s << "sample mult: " << ci.GetDebugInfo().GetSampleSizeMultiplier() << Endl; - s << "orig.compress: " << ci.GetDebugInfo().GetCompression() * 100 << " %" << Endl; - s << "timestamp: " << ci.GetDebugInfo().GetTimestamp() << " (" + TStringBuilder s; + s << "codec name: " << ci.GetDebugInfo().GetCodecName() << Endl; + s << "codec hash: " << HexWriteScalar(ci.GetDebugInfo().GetStoredCodecHash()) << Endl; + s << "dict size: " << ci.GetStoredCodec().Size() << Endl; + s << "sample mult: " << ci.GetDebugInfo().GetSampleSizeMultiplier() << Endl; + s << "orig.compress: " << ci.GetDebugInfo().GetCompression() * 100 << " %" << Endl; + s << "timestamp: " << ci.GetDebugInfo().GetTimestamp() << " (" << NDatetime::TSimpleTM::NewLocal(ci.GetDebugInfo().GetTimestamp()).ToString() << ")" << Endl; - s << "revision: " << ci.GetDebugInfo().GetRevisionInfo() << Endl; - s << "training set comment: " << ci.GetDebugInfo().GetTrainingSetComment() << Endl; - s << "training set resId: " << ci.GetDebugInfo().GetTrainingSetResId() << Endl; - return s; - } - + s << "revision: " << ci.GetDebugInfo().GetRevisionInfo() << Endl; + s << "training set comment: " << ci.GetDebugInfo().GetTrainingSetComment() << Endl; + s << "training set resId: " << ci.GetDebugInfo().GetTrainingSetResId() << Endl; + return s; + } + TString LoadStringFromArchive(const ui8* begin, size_t size) { - TArchiveReader ar(TBlob::NoCopy(begin, size)); - Y_VERIFY(ar.Count() == 1, "invalid number of entries"); - auto blob = ar.ObjectBlobByKey(ar.KeyByIndex(0)); + TArchiveReader ar(TBlob::NoCopy(begin, size)); + Y_VERIFY(ar.Count() == 1, "invalid number of entries"); + auto blob = ar.ObjectBlobByKey(ar.KeyByIndex(0)); return TString{blob.AsCharPtr(), blob.Size()}; - } - - TCodecConstPtr RestoreCodecFromCodecInfo(const TStaticCodecInfo& info) { - return NCodecs::ICodec::RestoreFromString(info.GetStoredCodec()); - } - - TCodecConstPtr RestoreCodecFromArchive(const ui8* begin, size_t size) { - const auto& data = LoadStringFromArchive(begin, size); - const auto& info = LoadCodecInfoFromString(data); - const auto& codec = RestoreCodecFromCodecInfo(info); - Y_ENSURE_EX(codec, TCodecException() << "null codec"); - return codec; - } -} + } + + TCodecConstPtr RestoreCodecFromCodecInfo(const TStaticCodecInfo& info) { + return NCodecs::ICodec::RestoreFromString(info.GetStoredCodec()); + } + + TCodecConstPtr RestoreCodecFromArchive(const ui8* begin, size_t size) { + const auto& data = LoadStringFromArchive(begin, size); + const auto& info = LoadCodecInfoFromString(data); + const auto& codec = RestoreCodecFromCodecInfo(info); + Y_ENSURE_EX(codec, TCodecException() << "null codec"); + return codec; + } +} diff --git a/library/cpp/codecs/static/static.h b/library/cpp/codecs/static/static.h index c1eaed2a74..efa9c60c22 100644 --- a/library/cpp/codecs/static/static.h +++ b/library/cpp/codecs/static/static.h @@ -1,34 +1,34 @@ -#pragma once - +#pragma once + #include <library/cpp/codecs/codecs.h> - -#include <util/generic/strbuf.h> + +#include <util/generic/strbuf.h> #include <util/generic/string.h> #include <util/stream/output.h> - -namespace NCodecs { - class TStaticCodecInfo; - - // load - - TCodecConstPtr RestoreCodecFromCodecInfo(const TStaticCodecInfo&); - - TStaticCodecInfo LoadCodecInfoFromString(TStringBuf data); - + +namespace NCodecs { + class TStaticCodecInfo; + + // load + + TCodecConstPtr RestoreCodecFromCodecInfo(const TStaticCodecInfo&); + + TStaticCodecInfo LoadCodecInfoFromString(TStringBuf data); + TString LoadStringFromArchive(const ui8* begin, size_t size); - - TCodecConstPtr RestoreCodecFromArchive(const ui8* begin, size_t size); - - // save - + + TCodecConstPtr RestoreCodecFromArchive(const ui8* begin, size_t size); + + // save + TString SaveCodecInfoToString(const TStaticCodecInfo&); - + void SaveCodecInfoToStream(IOutputStream& out, const TStaticCodecInfo&); - - // misc - + + // misc + TStaticCodecInfo LoadCodecInfoFromStream(IInputStream& in); - + TString FormatCodecInfo(const TStaticCodecInfo&); - -} + +} diff --git a/library/cpp/codecs/static/static_codec_info.proto b/library/cpp/codecs/static/static_codec_info.proto index 362abb4dad..178459784b 100644 --- a/library/cpp/codecs/static/static_codec_info.proto +++ b/library/cpp/codecs/static/static_codec_info.proto @@ -1,17 +1,17 @@ -package NCodecs; - -message TStaticCodecInfo { - message TDebugInfo { - optional string CodecName = 1; // the exact codec variant name - optional uint64 Timestamp = 2; // when the codec was built - optional string RevisionInfo = 3; // the arcadia revision info - optional float SampleSizeMultiplier = 4; // how the default sample size was modified to improve compression - optional float Compression = 5; // the compression on the training set ((raw_size - coded_size) / raw_size) - optional string TrainingSetComment = 6; // a human readable description of the training set - optional string TrainingSetResId = 7; // the training set sandbox resource id - optional uint64 StoredCodecHash = 8; // cityhash64(data) - } - - optional bytes StoredCodec = 1; // the data of the codec - optional TDebugInfo DebugInfo = 2; // misc debug info which could be useful in finding whereabouts later -} +package NCodecs; + +message TStaticCodecInfo { + message TDebugInfo { + optional string CodecName = 1; // the exact codec variant name + optional uint64 Timestamp = 2; // when the codec was built + optional string RevisionInfo = 3; // the arcadia revision info + optional float SampleSizeMultiplier = 4; // how the default sample size was modified to improve compression + optional float Compression = 5; // the compression on the training set ((raw_size - coded_size) / raw_size) + optional string TrainingSetComment = 6; // a human readable description of the training set + optional string TrainingSetResId = 7; // the training set sandbox resource id + optional uint64 StoredCodecHash = 8; // cityhash64(data) + } + + optional bytes StoredCodec = 1; // the data of the codec + optional TDebugInfo DebugInfo = 2; // misc debug info which could be useful in finding whereabouts later +} diff --git a/library/cpp/codecs/static/tools/common/ct_common.cpp b/library/cpp/codecs/static/tools/common/ct_common.cpp index fe77691280..cea40506e1 100644 --- a/library/cpp/codecs/static/tools/common/ct_common.cpp +++ b/library/cpp/codecs/static/tools/common/ct_common.cpp @@ -1,74 +1,74 @@ -#include "ct_common.h" - +#include "ct_common.h" + #include <library/cpp/codecs/codecs.h> #include <library/cpp/codecs/static/static_codec_info.pb.h> #include <library/cpp/string_utils/base64/base64.h> - + #include <util/stream/output.h> -#include <util/string/builder.h> -#include <util/system/hp_timer.h> - -namespace NCodecs { +#include <util/string/builder.h> +#include <util/system/hp_timer.h> + +namespace NCodecs { TString TComprStats::Format(const TStaticCodecInfo& info, bool checkMode) const { - TStringBuilder s; - s << "raw size/item: " << RawSizePerRecord() << Endl; - s << "enc.size/item: " << EncSizePerRecord() << Endl; - if (checkMode) { - s << "orig.enc.size/item: " << OldEncSizePerRecord(info.GetDebugInfo().GetCompression()) << Endl; - } - s << "enc time us/item: " << EncTimePerRecordUS() << Endl; - s << "dec time us/item: " << DecTimePerRecordUS() << Endl; - s << "dict size: " << info.GetStoredCodec().Size() << Endl; - s << "compression: " << AsPercent(Compression()) << " %" << Endl; - if (checkMode) { - s << "orig.compression: " << AsPercent(info.GetDebugInfo().GetCompression()) << " %" << Endl; - } - return s; - } - + TStringBuilder s; + s << "raw size/item: " << RawSizePerRecord() << Endl; + s << "enc.size/item: " << EncSizePerRecord() << Endl; + if (checkMode) { + s << "orig.enc.size/item: " << OldEncSizePerRecord(info.GetDebugInfo().GetCompression()) << Endl; + } + s << "enc time us/item: " << EncTimePerRecordUS() << Endl; + s << "dec time us/item: " << DecTimePerRecordUS() << Endl; + s << "dict size: " << info.GetStoredCodec().Size() << Endl; + s << "compression: " << AsPercent(Compression()) << " %" << Endl; + if (checkMode) { + s << "orig.compression: " << AsPercent(info.GetDebugInfo().GetCompression()) << " %" << Endl; + } + return s; + } + TComprStats TestCodec(const ICodec& c, const TVector<TString>& input) { - TComprStats stats; - - TBuffer encodeBuffer; - TBuffer decodeBuffer; - for (const auto& data : input) { - encodeBuffer.Clear(); - decodeBuffer.Clear(); - - stats.Records += 1; + TComprStats stats; + + TBuffer encodeBuffer; + TBuffer decodeBuffer; + for (const auto& data : input) { + encodeBuffer.Clear(); + decodeBuffer.Clear(); + + stats.Records += 1; stats.RawSize += data.size(); - - THPTimer timer; - c.Encode(data, encodeBuffer); + + THPTimer timer; + c.Encode(data, encodeBuffer); stats.EncSize += encodeBuffer.size(); - stats.EncSeconds += timer.PassedReset(); - + stats.EncSeconds += timer.PassedReset(); + c.Decode(TStringBuf{encodeBuffer.data(), encodeBuffer.size()}, decodeBuffer); - stats.DecSeconds += timer.PassedReset(); + stats.DecSeconds += timer.PassedReset(); Y_ENSURE(data == TStringBuf(decodeBuffer.data(), decodeBuffer.size()), "invalid encoding at record " << stats.Records); - } - - return stats; - } - + } + + return stats; + } + void ParseBlob(TVector<TString>& result, EDataStreamFormat fmt, const TBlob& blob) { TStringBuf bin(blob.AsCharPtr(), blob.Size()); - TStringBuf line; + TStringBuf line; TString buffer; - while (bin.ReadLine(line)) { - if (DSF_BASE64_LF == fmt) { - Base64Decode(line, buffer); - line = buffer; - } - if (!line) { - continue; - } + while (bin.ReadLine(line)) { + if (DSF_BASE64_LF == fmt) { + Base64Decode(line, buffer); + line = buffer; + } + if (!line) { + continue; + } result.emplace_back(line.data(), line.size()); - } - } - + } + } + TBlob GetInputBlob(const TString& dataFile) { - return dataFile && dataFile != "-" ? TBlob::FromFile(dataFile) : TBlob::FromStream(Cin); - } - -} + return dataFile && dataFile != "-" ? TBlob::FromFile(dataFile) : TBlob::FromStream(Cin); + } + +} diff --git a/library/cpp/codecs/static/tools/common/ct_common.h b/library/cpp/codecs/static/tools/common/ct_common.h index 9d3dcbda93..de531b27e6 100644 --- a/library/cpp/codecs/static/tools/common/ct_common.h +++ b/library/cpp/codecs/static/tools/common/ct_common.h @@ -1,75 +1,75 @@ -#pragma once - +#pragma once + #include <util/generic/string.h> -#include <util/generic/vector.h> -#include <util/memory/blob.h> -#include <cmath> - -namespace NCodecs { - class TStaticCodecInfo; - class ICodec; - - struct TComprStats { - double EncSeconds = 0; - double DecSeconds = 0; - size_t Records = 0; - size_t RawSize = 0; - size_t EncSize = 0; - - static double Round(double n, size_t decPlaces = 2) { - double p = pow(10, decPlaces); - return round(n * p) / p; - } - - static double AsPercent(double n) { - return Round(n * 100); - } - - static double AsMicroSecond(double s) { - return s * 1000000; - } - - double PerRecord(double n) const { - return Round((double)(Records ? n / Records : 0)); - } - - double Compression() const { - return ((double)RawSize - (double)EncSize) / RawSize; - } - - double EncTimePerRecordUS() const { - return PerRecord(AsMicroSecond(EncSeconds)); - } - - double DecTimePerRecordUS() const { - return PerRecord(AsMicroSecond(DecSeconds)); - } - - double RawSizePerRecord() const { - return PerRecord(RawSize); - } - - double EncSizePerRecord() const { - return PerRecord(EncSize); - } - - double OldEncSizePerRecord(double compr) const { - return PerRecord((1 - compr) * RawSize); - } - +#include <util/generic/vector.h> +#include <util/memory/blob.h> +#include <cmath> + +namespace NCodecs { + class TStaticCodecInfo; + class ICodec; + + struct TComprStats { + double EncSeconds = 0; + double DecSeconds = 0; + size_t Records = 0; + size_t RawSize = 0; + size_t EncSize = 0; + + static double Round(double n, size_t decPlaces = 2) { + double p = pow(10, decPlaces); + return round(n * p) / p; + } + + static double AsPercent(double n) { + return Round(n * 100); + } + + static double AsMicroSecond(double s) { + return s * 1000000; + } + + double PerRecord(double n) const { + return Round((double)(Records ? n / Records : 0)); + } + + double Compression() const { + return ((double)RawSize - (double)EncSize) / RawSize; + } + + double EncTimePerRecordUS() const { + return PerRecord(AsMicroSecond(EncSeconds)); + } + + double DecTimePerRecordUS() const { + return PerRecord(AsMicroSecond(DecSeconds)); + } + + double RawSizePerRecord() const { + return PerRecord(RawSize); + } + + double EncSizePerRecord() const { + return PerRecord(EncSize); + } + + double OldEncSizePerRecord(double compr) const { + return PerRecord((1 - compr) * RawSize); + } + TString Format(const TStaticCodecInfo&, bool checkMode) const; - }; - + }; + TComprStats TestCodec(const ICodec&, const TVector<TString>& data); - - enum EDataStreamFormat { - DSF_NONE, - DSF_PLAIN_LF /* "plain" */, - DSF_BASE64_LF /* "base64" */, - }; - + + enum EDataStreamFormat { + DSF_NONE, + DSF_PLAIN_LF /* "plain" */, + DSF_BASE64_LF /* "base64" */, + }; + void ParseBlob(TVector<TString>&, EDataStreamFormat, const TBlob&); - + TBlob GetInputBlob(const TString& dataFile); - -} + +} diff --git a/library/cpp/codecs/static/tools/common/ya.make b/library/cpp/codecs/static/tools/common/ya.make index d624222dad..5f575a2f28 100644 --- a/library/cpp/codecs/static/tools/common/ya.make +++ b/library/cpp/codecs/static/tools/common/ya.make @@ -1,19 +1,19 @@ -LIBRARY() - +LIBRARY() + OWNER(velavokr) - -SRCS( - ct_common.cpp -) - -PEERDIR( + +SRCS( + ct_common.cpp +) + +PEERDIR( library/cpp/codecs library/cpp/codecs/static library/cpp/getopt/small library/cpp/string_utils/base64 - util/draft -) - + util/draft +) + GENERATE_ENUM_SERIALIZATION(ct_common.h) - -END() + +END() diff --git a/library/cpp/codecs/static/tools/static_codec_checker/README b/library/cpp/codecs/static/tools/static_codec_checker/README index 723a68300b..c66703227d 100644 --- a/library/cpp/codecs/static/tools/static_codec_checker/README +++ b/library/cpp/codecs/static/tools/static_codec_checker/README @@ -1,4 +1,4 @@ This is a viewer for generated codec and utility for verification of the compression quality on a new data. - + Usage: -static_codec_checker -t -c 029b29ff64a74927.codec_info -f plain samples.txt +static_codec_checker -t -c 029b29ff64a74927.codec_info -f plain samples.txt diff --git a/library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp b/library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp index 9c8d568d82..5ae901d8f8 100644 --- a/library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp +++ b/library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker.cpp @@ -3,25 +3,25 @@ #include <library/cpp/codecs/static/static_codec_info.pb.h> #include <library/cpp/codecs/codecs.h> #include <library/cpp/getopt/small/last_getopt.h> - -#include <util/digest/city.h> -#include <util/generic/yexception.h> -#include <util/stream/file.h> -#include <util/stream/buffer.h> -#include <util/stream/format.h> -#include <util/string/builder.h> - -int main(int argc, char** argv) { - NCodecs::TCodecPtr codecPtr; - NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE; + +#include <util/digest/city.h> +#include <util/generic/yexception.h> +#include <util/stream/file.h> +#include <util/stream/buffer.h> +#include <util/stream/format.h> +#include <util/string/builder.h> + +int main(int argc, char** argv) { + NCodecs::TCodecPtr codecPtr; + NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE; TString codecFile; - bool testCompression = false; - - auto opts = NLastGetopt::TOpts::Default(); - opts.SetTitle("Prints a .codec_info file and optionally checks its performance on new data. See also static_codec_generator."); - opts.SetCmdLineDescr("-c 9089f3e9b7a0f0d4.codec_info -t -f base64 qtrees.sample.txt"); - NCodecs::TStaticCodecInfo codec; - + bool testCompression = false; + + auto opts = NLastGetopt::TOpts::Default(); + opts.SetTitle("Prints a .codec_info file and optionally checks its performance on new data. See also static_codec_generator."); + opts.SetCmdLineDescr("-c 9089f3e9b7a0f0d4.codec_info -t -f base64 qtrees.sample.txt"); + NCodecs::TStaticCodecInfo codec; + opts.AddLongOption('c', "codec-info").RequiredArgument("codec_info").Handler1T<TString>([&codecFile, &codec, &codecPtr](TString name) { codecFile = name; codec.CopyFrom(NCodecs::LoadCodecInfoFromString(TUnbufferedFileInput(name).ReadAll())); @@ -29,45 +29,45 @@ int main(int argc, char** argv) { }) .Required() .Help(".codec_info file with serialized static data for codec"); - + opts.AddLongOption('t', "test").NoArgument().StoreValue(&testCompression, true).Optional().Help("test current performance"); - + opts.AddLongOption('f', "format").RequiredArgument(TStringBuilder() << "(" << NCodecs::DSF_PLAIN_LF << "|" << NCodecs::DSF_BASE64_LF << ")").StoreResult(&fmt).Optional().Help("test set input file format"); - - opts.SetFreeArgsMin(0); - opts.SetFreeArgTitle(0, "testing_set_input_file", "testing set input files"); - - NLastGetopt::TOptsParseResult res(&opts, argc, argv); - - Cout << codecFile << Endl; - Cout << NCodecs::FormatCodecInfo(codec) << Endl; - - if (testCompression) { - if (NCodecs::DSF_NONE == fmt) { - Cerr << "Specify format (-f|--format) for testing set input" << Endl; - exit(1); - } - - Cout << "Reading testing set data ... " << Flush; - + + opts.SetFreeArgsMin(0); + opts.SetFreeArgTitle(0, "testing_set_input_file", "testing set input files"); + + NLastGetopt::TOptsParseResult res(&opts, argc, argv); + + Cout << codecFile << Endl; + Cout << NCodecs::FormatCodecInfo(codec) << Endl; + + if (testCompression) { + if (NCodecs::DSF_NONE == fmt) { + Cerr << "Specify format (-f|--format) for testing set input" << Endl; + exit(1); + } + + Cout << "Reading testing set data ... " << Flush; + TVector<TString> allData; - for (const auto& freeArg : res.GetFreeArgs()) { - NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg)); - } - - if (!res.GetFreeArgs()) { - NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-")); - } - - Cout << "Done" << Endl << Endl; - - Cout << "records: " << allData.size() << Endl; - Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl; - - Cout << "Testing compression ... " << Flush; - auto stats = NCodecs::TestCodec(*codecPtr, allData); - Cout << "Done" << Endl << Endl; - - Cout << stats.Format(codec, true) << Endl; - } -} + for (const auto& freeArg : res.GetFreeArgs()) { + NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg)); + } + + if (!res.GetFreeArgs()) { + NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-")); + } + + Cout << "Done" << Endl << Endl; + + Cout << "records: " << allData.size() << Endl; + Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl; + + Cout << "Testing compression ... " << Flush; + auto stats = NCodecs::TestCodec(*codecPtr, allData); + Cout << "Done" << Endl << Endl; + + Cout << stats.Format(codec, true) << Endl; + } +} diff --git a/library/cpp/codecs/static/tools/static_codec_checker/ya.make b/library/cpp/codecs/static/tools/static_codec_checker/ya.make index 90e06ca448..86b73dff6c 100644 --- a/library/cpp/codecs/static/tools/static_codec_checker/ya.make +++ b/library/cpp/codecs/static/tools/static_codec_checker/ya.make @@ -1,16 +1,16 @@ -PROGRAM() - +PROGRAM() + OWNER(velavokr) - -SRCS( - static_codec_checker.cpp -) - -PEERDIR( + +SRCS( + static_codec_checker.cpp +) + +PEERDIR( library/cpp/codecs library/cpp/codecs/static library/cpp/codecs/static/tools/common library/cpp/getopt/small -) - -END() +) + +END() diff --git a/library/cpp/codecs/static/tools/static_codec_generator/README b/library/cpp/codecs/static/tools/static_codec_generator/README index e6bb52b959..f0fffd745a 100644 --- a/library/cpp/codecs/static/tools/static_codec_generator/README +++ b/library/cpp/codecs/static/tools/static_codec_generator/README @@ -1,4 +1,4 @@ This is a utility for reproducible teaching of a codec. And also for saving it into a file with a unique name for a static compilation as a resource. - + Usage: -static_codec_generator -t -m 'the training data description' -f plain samples.txt +static_codec_generator -t -m 'the training data description' -f plain samples.txt diff --git a/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp b/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp index 45fdb5c5fe..b37a0f686d 100644 --- a/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp +++ b/library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator.cpp @@ -2,81 +2,81 @@ #include <library/cpp/codecs/static/static_codec_info.pb.h> #include <library/cpp/codecs/static/builder.h> #include <library/cpp/codecs/codecs.h> - + #include <library/cpp/getopt/small/last_getopt.h> - -#include <util/generic/yexception.h> -#include <util/stream/file.h> -#include <util/string/builder.h> - -int main(int argc, char** argv) { - NCodecs::TCodecBuildInfo info; - NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE; - - auto opts = NLastGetopt::TOpts::Default(); - opts.SetCmdLineDescr("-m 'Training set: 100000 qtrees taken from web mmeta logs' -f base64 qtrees.sample.txt"); - opts.SetTitle("Teaches the codec and serializes it as a file named CODECNAME.hash(CODECDATA).bin"); - + +#include <util/generic/yexception.h> +#include <util/stream/file.h> +#include <util/string/builder.h> + +int main(int argc, char** argv) { + NCodecs::TCodecBuildInfo info; + NCodecs::EDataStreamFormat fmt = NCodecs::DSF_NONE; + + auto opts = NLastGetopt::TOpts::Default(); + opts.SetCmdLineDescr("-m 'Training set: 100000 qtrees taken from web mmeta logs' -f base64 qtrees.sample.txt"); + opts.SetTitle("Teaches the codec and serializes it as a file named CODECNAME.hash(CODECDATA).bin"); + opts.AddLongOption('m', "message").RequiredArgument("training_set_comment").StoreResult(&info.TrainingSetComment).Required().Help("a human description for the training set"); - + opts.AddLongOption('r', "resource").RequiredArgument("training_set_res_id").StoreResult(&info.TrainingSetResId).Optional().Help("sandbox resource id for the training set"); - + opts.AddLongOption('c', "codec").RequiredArgument("codec_name").StoreResult(&info.CodecName).Optional().DefaultValue(info.CodecName); - + opts.AddLongOption('s', "sample-multiplier").RequiredArgument("multiplier").StoreResult(&info.SampleSizeMultiplier).Optional().DefaultValue(ToString(info.SampleSizeMultiplier)).Help("multiplier for default sample size"); - + opts.AddLongOption('f', "format").RequiredArgument(TStringBuilder() << "(" << NCodecs::DSF_PLAIN_LF << "|" << NCodecs::DSF_BASE64_LF << ")").StoreResult(&fmt).Required().Help("training set input file format"); - + opts.AddLongOption("list-codecs").NoArgument().Handler0([]() { Cout << JoinStrings(NCodecs::ICodec::GetCodecsList(), "\n") << Endl; exit(0); }) .Optional() .Help("list available codecs"); - + opts.AddLongOption("fake-revision").RequiredArgument("revision").StoreResult(&info.RevisionInfo).Optional().Hidden(); // replace static_codec_generator revision in debug info - + opts.AddLongOption("fake-timestamp").RequiredArgument("timestamp").StoreResult(&info.Timestamp).Optional().Hidden(); // replace generating timestamp in debug info - - opts.SetFreeArgsMin(0); - opts.SetFreeArgTitle(0, "training_set_input_file", "training set input files"); - - NLastGetopt::TOptsParseResult res(&opts, argc, argv); - - Cout << "Reading training set data ... " << Flush; + + opts.SetFreeArgsMin(0); + opts.SetFreeArgTitle(0, "training_set_input_file", "training set input files"); + + NLastGetopt::TOptsParseResult res(&opts, argc, argv); + + Cout << "Reading training set data ... " << Flush; TVector<TString> allData; - for (const auto& freeArg : res.GetFreeArgs()) { - NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg)); - } - - if (!res.GetFreeArgs()) { - NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-")); - } - Cout << "Done" << Endl << Endl; - - Cout << "records: " << allData.size() << Endl; - Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl; - - Cout << "Training " << info.CodecName << " , sample size multiplier is " << info.SampleSizeMultiplier << " ... " << Flush; - auto codec = NCodecs::BuildStaticCodec(allData, info); - Cout << "Done" << Endl; - + for (const auto& freeArg : res.GetFreeArgs()) { + NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob(freeArg)); + } + + if (!res.GetFreeArgs()) { + NCodecs::ParseBlob(allData, fmt, NCodecs::GetInputBlob("-")); + } + Cout << "Done" << Endl << Endl; + + Cout << "records: " << allData.size() << Endl; + Cout << "raw size: " << NCodecs::GetInputSize(allData.begin(), allData.end()) << " bytes" << Endl << Endl; + + Cout << "Training " << info.CodecName << " , sample size multiplier is " << info.SampleSizeMultiplier << " ... " << Flush; + auto codec = NCodecs::BuildStaticCodec(allData, info); + Cout << "Done" << Endl; + TString codecName = NCodecs::GetStandardFileName(codec); - NCodecs::TCodecPtr codecPtr = NCodecs::ICodec::RestoreFromString(codec.GetStoredCodec()); - - Cout << "Testing compression ... " << Flush; - auto stats = NCodecs::TestCodec(*codecPtr, allData); - Cout << "Done" << Endl << Endl; - - codec.MutableDebugInfo()->SetCompression(stats.Compression()); - - Cout << stats.Format(codec, false) << Endl; - - Cout << "Saving as " << codecName << " ... " << Flush; - { + NCodecs::TCodecPtr codecPtr = NCodecs::ICodec::RestoreFromString(codec.GetStoredCodec()); + + Cout << "Testing compression ... " << Flush; + auto stats = NCodecs::TestCodec(*codecPtr, allData); + Cout << "Done" << Endl << Endl; + + codec.MutableDebugInfo()->SetCompression(stats.Compression()); + + Cout << stats.Format(codec, false) << Endl; + + Cout << "Saving as " << codecName << " ... " << Flush; + { TUnbufferedFileOutput fout{codecName}; - NCodecs::SaveCodecInfoToStream(fout, codec); - fout.Finish(); - } - Cout << "Done" << Endl << Endl; -} + NCodecs::SaveCodecInfoToStream(fout, codec); + fout.Finish(); + } + Cout << "Done" << Endl << Endl; +} diff --git a/library/cpp/codecs/static/tools/static_codec_generator/ya.make b/library/cpp/codecs/static/tools/static_codec_generator/ya.make index efbc440dd1..21750dde49 100644 --- a/library/cpp/codecs/static/tools/static_codec_generator/ya.make +++ b/library/cpp/codecs/static/tools/static_codec_generator/ya.make @@ -1,17 +1,17 @@ -PROGRAM() - +PROGRAM() + OWNER(velavokr) - -SRCS( - static_codec_generator.cpp -) - -PEERDIR( + +SRCS( + static_codec_generator.cpp +) + +PEERDIR( library/cpp/codecs library/cpp/codecs/static library/cpp/codecs/static/tools/common library/cpp/digest/md5 library/cpp/getopt/small -) - -END() +) + +END() diff --git a/library/cpp/codecs/static/tools/tests/static_codec_tools.py b/library/cpp/codecs/static/tools/tests/static_codec_tools.py index db4140e370..a5baa262f7 100644 --- a/library/cpp/codecs/static/tools/tests/static_codec_tools.py +++ b/library/cpp/codecs/static/tools/tests/static_codec_tools.py @@ -1,18 +1,18 @@ -#!/usr/bin/env python - -import yatest.common as tt -import os.path as op - -def test_static_codec_tools(): +#!/usr/bin/env python + +import yatest.common as tt +import os.path as op + +def test_static_codec_tools(): tt.execute([tt.binary_path("library/cpp/codecs/static/tools/static_codec_generator/static_codec_generator")] - + ["-m", "test codec", "-r", "sbr://143310406", "-f", "plain", "-c", "solar-8k-a:huffman", "-s", "1", - "--fake-revision", "r2385905", "--fake-timestamp", "1467494385", "sample.txt"], - timeout=60) - assert(op.exists("solar-8k-a.huffman.1467494385.codec_info")) + + ["-m", "test codec", "-r", "sbr://143310406", "-f", "plain", "-c", "solar-8k-a:huffman", "-s", "1", + "--fake-revision", "r2385905", "--fake-timestamp", "1467494385", "sample.txt"], + timeout=60) + assert(op.exists("solar-8k-a.huffman.1467494385.codec_info")) tt.canonical_execute(tt.binary_path("library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker"), - args=["-c", "solar-8k-a.huffman.1467494385.codec_info"], - timeout=60) + args=["-c", "solar-8k-a.huffman.1467494385.codec_info"], + timeout=60) tt.execute([tt.binary_path("library/cpp/codecs/static/tools/static_codec_checker/static_codec_checker")] - + ["-c", "solar-8k-a.huffman.1467494385.codec_info", "-f", "plain", "-t", "sample.txt"], - timeout=60) - return tt.canonical_file("solar-8k-a.huffman.1467494385.codec_info") + + ["-c", "solar-8k-a.huffman.1467494385.codec_info", "-f", "plain", "-t", "sample.txt"], + timeout=60) + return tt.canonical_file("solar-8k-a.huffman.1467494385.codec_info") diff --git a/library/cpp/codecs/static/tools/tests/ya.make b/library/cpp/codecs/static/tools/tests/ya.make index c5324eaf53..5555d90cae 100644 --- a/library/cpp/codecs/static/tools/tests/ya.make +++ b/library/cpp/codecs/static/tools/tests/ya.make @@ -1,20 +1,20 @@ PY2TEST() - -OWNER(velavokr) - -TEST_SRCS(static_codec_tools.py) - -DATA(sbr://143310406) - -TIMEOUT(4200) - + +OWNER(velavokr) + +TEST_SRCS(static_codec_tools.py) + +DATA(sbr://143310406) + +TIMEOUT(4200) + TAG(ya:not_autocheck) -DEPENDS( +DEPENDS( library/cpp/codecs/static/tools/static_codec_checker library/cpp/codecs/static/tools/static_codec_generator -) - - +) + + -END() +END() diff --git a/library/cpp/codecs/static/tools/ya.make b/library/cpp/codecs/static/tools/ya.make index dd3e8437aa..ab72769153 100644 --- a/library/cpp/codecs/static/tools/ya.make +++ b/library/cpp/codecs/static/tools/ya.make @@ -1,5 +1,5 @@ -RECURSE( - common - static_codec_generator - static_codec_checker -) +RECURSE( + common + static_codec_generator + static_codec_checker +) diff --git a/library/cpp/codecs/static/ut/builder_ut.cpp b/library/cpp/codecs/static/ut/builder_ut.cpp index b47c279ed1..48d5c98d5d 100644 --- a/library/cpp/codecs/static/ut/builder_ut.cpp +++ b/library/cpp/codecs/static/ut/builder_ut.cpp @@ -1,57 +1,57 @@ #include <library/cpp/testing/unittest/registar.h> #include <library/cpp/codecs/static/builder.h> #include <library/cpp/codecs/static/static_codec_info.pb.h> -#include <util/string/vector.h> - +#include <util/string/vector.h> + class TStaticCodecInfoBuilderTest: public NUnitTest::TTestBase { - UNIT_TEST_SUITE(TStaticCodecInfoBuilderTest) + UNIT_TEST_SUITE(TStaticCodecInfoBuilderTest) UNIT_TEST(TestBuild) - UNIT_TEST_SUITE_END(); + UNIT_TEST_SUITE_END(); -private: +private: TVector<TString> PrepareData() { TVector<TString> data; - for (ui32 i = 'a'; i <= 'z'; ++i) { + for (ui32 i = 'a'; i <= 'z'; ++i) { data.push_back(TString(1, (char)i)); - } - return data; - } - - void TestBuild() { + } + return data; + } + + void TestBuild() { TVector<TString> data; - NCodecs::TCodecBuildInfo info; - info.CodecName = "huffman"; - info.SampleSizeMultiplier = 2; - info.Timestamp = 1467494385; - info.RevisionInfo = "r2385905"; - info.TrainingSetComment = "some dummy data"; - info.TrainingSetResId = "sbr://1234"; - auto res = NCodecs::BuildStaticCodec(PrepareData(), info); - UNIT_ASSERT_VALUES_EQUAL(res.ShortUtf8DebugString(), - "StoredCodec: \"\\007\\000huffman@S\\000a" - "\\006b\\005c\\005d\\005e\\005f\\005g\\005h\\005i\\005j\\005k\\005l\\005m\\005n\\005o" - "\\005p\\005q\\005r\\005s\\005t\\005u\\004v\\004w\\004x\\004y\\004z\\004\xC7?\xC8>" - "\xC9=\xCA<\xCB;\xCC:\3159\3168\3177\3206\3215\3224\3233\3242\3251\3260\xD7/\xD8." - "\xD9-\xDA,\xDB+\xDC*\xDD)\xDE(\xDF\\'\xE0&\xE1%\xE2$\xE3#\xE4\\\"\xE5!\xE6 \xE7" - "\\037\xE8\\036\xE9\\035\xEA\\034\xEB\\033\xEC\\032\xED\\031\xEE\\030\xEF\\027\xF0" - "\\026\xF1\\025\xF2\\024\xF3\\023\xF4\\022\xF5\\021\xF6\\020\xF7\\017\xF8\\016\xF9" - "\\r\xFA\\014\xFB\\013\xFC\\n\xFD\\t\xFE\\010\xFF\\007\" " - "DebugInfo { " - "CodecName: \"huffman\" " - "Timestamp: 1467494385 " - "RevisionInfo: \"r2385905\" " - "SampleSizeMultiplier: 2 " - "TrainingSetComment: \"some dummy data\" " - "TrainingSetResId: \"sbr://1234\" " - "StoredCodecHash: 2509195835471488613 " - "}"); - - UNIT_ASSERT_VALUES_EQUAL(NCodecs::GetStandardFileName(res), "huffman.1467494385.codec_info"); - UNIT_ASSERT_VALUES_EQUAL(res.GetDebugInfo().GetStoredCodecHash(), 2509195835471488613ULL); - - auto res1 = NCodecs::LoadCodecInfoFromString(NCodecs::SaveCodecInfoToString(res)); - UNIT_ASSERT_VALUES_EQUAL(res1.ShortUtf8DebugString(), res.ShortUtf8DebugString()); - } -}; - -UNIT_TEST_SUITE_REGISTRATION(TStaticCodecInfoBuilderTest); + NCodecs::TCodecBuildInfo info; + info.CodecName = "huffman"; + info.SampleSizeMultiplier = 2; + info.Timestamp = 1467494385; + info.RevisionInfo = "r2385905"; + info.TrainingSetComment = "some dummy data"; + info.TrainingSetResId = "sbr://1234"; + auto res = NCodecs::BuildStaticCodec(PrepareData(), info); + UNIT_ASSERT_VALUES_EQUAL(res.ShortUtf8DebugString(), + "StoredCodec: \"\\007\\000huffman@S\\000a" + "\\006b\\005c\\005d\\005e\\005f\\005g\\005h\\005i\\005j\\005k\\005l\\005m\\005n\\005o" + "\\005p\\005q\\005r\\005s\\005t\\005u\\004v\\004w\\004x\\004y\\004z\\004\xC7?\xC8>" + "\xC9=\xCA<\xCB;\xCC:\3159\3168\3177\3206\3215\3224\3233\3242\3251\3260\xD7/\xD8." + "\xD9-\xDA,\xDB+\xDC*\xDD)\xDE(\xDF\\'\xE0&\xE1%\xE2$\xE3#\xE4\\\"\xE5!\xE6 \xE7" + "\\037\xE8\\036\xE9\\035\xEA\\034\xEB\\033\xEC\\032\xED\\031\xEE\\030\xEF\\027\xF0" + "\\026\xF1\\025\xF2\\024\xF3\\023\xF4\\022\xF5\\021\xF6\\020\xF7\\017\xF8\\016\xF9" + "\\r\xFA\\014\xFB\\013\xFC\\n\xFD\\t\xFE\\010\xFF\\007\" " + "DebugInfo { " + "CodecName: \"huffman\" " + "Timestamp: 1467494385 " + "RevisionInfo: \"r2385905\" " + "SampleSizeMultiplier: 2 " + "TrainingSetComment: \"some dummy data\" " + "TrainingSetResId: \"sbr://1234\" " + "StoredCodecHash: 2509195835471488613 " + "}"); + + UNIT_ASSERT_VALUES_EQUAL(NCodecs::GetStandardFileName(res), "huffman.1467494385.codec_info"); + UNIT_ASSERT_VALUES_EQUAL(res.GetDebugInfo().GetStoredCodecHash(), 2509195835471488613ULL); + + auto res1 = NCodecs::LoadCodecInfoFromString(NCodecs::SaveCodecInfoToString(res)); + UNIT_ASSERT_VALUES_EQUAL(res1.ShortUtf8DebugString(), res.ShortUtf8DebugString()); + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TStaticCodecInfoBuilderTest); diff --git a/library/cpp/codecs/static/ut/static_ut.cpp b/library/cpp/codecs/static/ut/static_ut.cpp index 57e1e62887..fda9ffcccb 100644 --- a/library/cpp/codecs/static/ut/static_ut.cpp +++ b/library/cpp/codecs/static/ut/static_ut.cpp @@ -1,27 +1,27 @@ #include <library/cpp/testing/unittest/registar.h> #include <library/cpp/codecs/static/example/example.h> - + class TStaticCodecUsageTest: public NUnitTest::TTestBase { - UNIT_TEST_SUITE(TStaticCodecUsageTest) + UNIT_TEST_SUITE(TStaticCodecUsageTest) UNIT_TEST(TestUsage) - UNIT_TEST_SUITE_END(); + UNIT_TEST_SUITE_END(); -private: - void DoTestUsage(NStaticCodecExample::EDictVersion dv, size_t expectedSize) { +private: + void DoTestUsage(NStaticCodecExample::EDictVersion dv, size_t expectedSize) { const TStringBuf letov = "Всё идёт по плану"; - - TBuffer outEnc, outDec; - NStaticCodecExample::Encode(outEnc, letov, dv); + + TBuffer outEnc, outDec; + NStaticCodecExample::Encode(outEnc, letov, dv); NStaticCodecExample::Decode(outDec, TStringBuf{outEnc.data(), outEnc.size()}); - - UNIT_ASSERT_VALUES_EQUAL(outEnc.Size(), expectedSize); + + UNIT_ASSERT_VALUES_EQUAL(outEnc.Size(), expectedSize); UNIT_ASSERT_EQUAL(TStringBuf(outDec.data(), outDec.size()), letov); - } - - void TestUsage() { - DoTestUsage(NStaticCodecExample::DV_HUFF_20160707, 18u); - DoTestUsage(NStaticCodecExample::DV_SA_HUFF_20160707, 22u); - } -}; - -UNIT_TEST_SUITE_REGISTRATION(TStaticCodecUsageTest) + } + + void TestUsage() { + DoTestUsage(NStaticCodecExample::DV_HUFF_20160707, 18u); + DoTestUsage(NStaticCodecExample::DV_SA_HUFF_20160707, 22u); + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TStaticCodecUsageTest) diff --git a/library/cpp/codecs/static/ut/ya.make b/library/cpp/codecs/static/ut/ya.make index b9116097d8..5bb2017fac 100644 --- a/library/cpp/codecs/static/ut/ya.make +++ b/library/cpp/codecs/static/ut/ya.make @@ -1,14 +1,14 @@ UNITTEST_FOR(library/cpp/codecs/static) - -OWNER(velavokr) - -SRCS( - builder_ut.cpp - static_ut.cpp -) - -PEERDIR( + +OWNER(velavokr) + +SRCS( + builder_ut.cpp + static_ut.cpp +) + +PEERDIR( library/cpp/codecs/static/example -) - -END() +) + +END() diff --git a/library/cpp/codecs/static/ya.make b/library/cpp/codecs/static/ya.make index 00e00fd8d4..a2698b9432 100644 --- a/library/cpp/codecs/static/ya.make +++ b/library/cpp/codecs/static/ya.make @@ -1,18 +1,18 @@ -LIBRARY() - -OWNER(velavokr) - -SRCS( - builder.cpp - static_codec_info.proto - static.cpp -) - -PEERDIR( +LIBRARY() + +OWNER(velavokr) + +SRCS( + builder.cpp + static_codec_info.proto + static.cpp +) + +PEERDIR( library/cpp/codecs library/cpp/archive library/cpp/svnversion - util/draft -) - -END() + util/draft +) + +END() |