aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/codecs/static/tools/common/ct_common.cpp
blob: ded3a503ff780c70c9a1cb161fe8ee4c8d7db845 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#include "ct_common.h"

#include <library/cpp/codecs/codecs.h>
#include <library/cpp/codecs/static/static_codec_info.pb.h>
#include <library/cpp/string_utils/base64/base64.h>

#include <util/stream/output.h>
#include <util/string/builder.h>
#include <util/system/hp_timer.h>

namespace NCodecs {
    TString TComprStats::Format(const TStaticCodecInfo& info, bool checkMode) const {
        TStringBuilder s;
        s << "raw size/item:      " << RawSizePerRecord() << Endl;
        s << "enc.size/item:      " << EncSizePerRecord() << Endl;
        if (checkMode) {
            s << "orig.enc.size/item: " << OldEncSizePerRecord(info.GetDebugInfo().GetCompression()) << Endl;
        }
        s << "enc time us/item:   " << EncTimePerRecordUS() << Endl;
        s << "dec time us/item:   " << DecTimePerRecordUS() << Endl;
        s << "dict size:          " << info.GetStoredCodec().Size() << Endl;
        s << "compression:        " << AsPercent(Compression()) << " %" << Endl;
        if (checkMode) {
            s << "orig.compression:   " << AsPercent(info.GetDebugInfo().GetCompression()) << " %" << Endl;
        }
        return s;
    }

    TComprStats TestCodec(const ICodec& c, const TVector<TString>& input) {
        TComprStats stats;

        TBuffer encodeBuffer;
        TBuffer decodeBuffer;
        for (const auto& data : input) {
            encodeBuffer.Clear();
            decodeBuffer.Clear();

            stats.Records += 1;
            stats.RawSize += data.size(); 

            THPTimer timer;
            c.Encode(data, encodeBuffer);
            stats.EncSize += encodeBuffer.size(); 
            stats.EncSeconds += timer.PassedReset();

            c.Decode(TStringBuf{encodeBuffer.data(), encodeBuffer.size()}, decodeBuffer); 
            stats.DecSeconds += timer.PassedReset();
            Y_ENSURE(data == TStringBuf(decodeBuffer.data(), decodeBuffer.size()), "invalid encoding at record " << stats.Records); 
        }

        return stats;
    }

    void ParseBlob(TVector<TString>& result, EDataStreamFormat fmt, const TBlob& blob) {
        TStringBuf bin(blob.AsCharPtr(), blob.Size()); 
        TStringBuf line;
        TString buffer;
        while (bin.ReadLine(line)) {
            if (DSF_BASE64_LF == fmt) {
                Base64Decode(line, buffer);
                line = buffer;
            }
            if (!line) {
                continue;
            }
            result.emplace_back(line.data(), line.size()); 
        }
    }

    TBlob GetInputBlob(const TString& dataFile) {
        return dataFile && dataFile != "-" ? TBlob::FromFile(dataFile) : TBlob::FromStream(Cin);
    }

}