aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/codecs/static/tools/common/ct_common.cpp
blob: ab4a2fac3f49ba781f83d69fddae617e4e3c0658 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#include "ct_common.h"

#include <library/cpp/codecs/codecs.h>
#include <library/cpp/codecs/static/static_codec_info.pb.h>
#include <library/cpp/string_utils/base64/base64.h>

#include <util/stream/output.h>
#include <util/string/builder.h>
#include <util/system/hp_timer.h>

namespace NCodecs {
    TString TComprStats::Format(const TStaticCodecInfo& info, bool checkMode) const { 
        TStringBuilder s;
        s << "raw size/item:      " << RawSizePerRecord() << Endl;
        s << "enc.size/item:      " << EncSizePerRecord() << Endl;
        if (checkMode) {
            s << "orig.enc.size/item: " << OldEncSizePerRecord(info.GetDebugInfo().GetCompression()) << Endl;
        }
        s << "enc time us/item:   " << EncTimePerRecordUS() << Endl;
        s << "dec time us/item:   " << DecTimePerRecordUS() << Endl;
        s << "dict size:          " << info.GetStoredCodec().Size() << Endl;
        s << "compression:        " << AsPercent(Compression()) << " %" << Endl;
        if (checkMode) {
            s << "orig.compression:   " << AsPercent(info.GetDebugInfo().GetCompression()) << " %" << Endl;
        }
        return s;
    }

    TComprStats TestCodec(const ICodec& c, const TVector<TString>& input) { 
        TComprStats stats;

        TBuffer encodeBuffer;
        TBuffer decodeBuffer;
        for (const auto& data : input) {
            encodeBuffer.Clear();
            decodeBuffer.Clear();

            stats.Records += 1;
            stats.RawSize += data.size();

            THPTimer timer;
            c.Encode(data, encodeBuffer);
            stats.EncSize += encodeBuffer.size();
            stats.EncSeconds += timer.PassedReset();

            c.Decode(TStringBuf{encodeBuffer.data(), encodeBuffer.size()}, decodeBuffer);
            stats.DecSeconds += timer.PassedReset();
            Y_ENSURE(data == TStringBuf(decodeBuffer.data(), decodeBuffer.size()), "invalid encoding at record " << stats.Records);
        }

        return stats;
    }

    void ParseBlob(TVector<TString>& result, EDataStreamFormat fmt, const TBlob& blob) { 
        TStringBuf bin(blob.AsCharPtr(), blob.Size());
        TStringBuf line;
        TString buffer; 
        while (bin.ReadLine(line)) {
            if (DSF_BASE64_LF == fmt) {
                Base64Decode(line, buffer);
                line = buffer;
            }
            if (!line) {
                continue;
            }
            result.emplace_back(line.data(), line.size());
        }
    }

    TBlob GetInputBlob(const TString& dataFile) { 
        return dataFile && dataFile != "-" ? TBlob::FromFile(dataFile) : TBlob::FromStream(Cin);
    }

}