aboutsummaryrefslogtreecommitdiffstats
path: root/yt/yt/library/column_converters/integer_column_converter.cpp
blob: 6b3a2c6cb0c08eaaac1220872166950dc99e691a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#include "integer_column_converter.h"

#include "helpers.h"

#include <yt/yt/client/table_client/schema.h>
#include <yt/yt/client/table_client/unversioned_row.h>

#include <library/cpp/yt/coding/zig_zag.h>

namespace NYT::NColumnConverters {

////////////////////////////////////////////////////////////////////////////////

namespace {

ui64 EncodeValue(i64 value)
{
    return ZigZagEncode64(value);
}

ui64 EncodeValue(ui64 value)
{
    return value;
}

template <class TValue>
typename std::enable_if<std::is_signed<TValue>::value, TValue>::type
GetValue(const NTableClient::TUnversionedValue& value)
{
    return value.Data.Int64;
}

template <class TValue>
typename std::enable_if<std::is_unsigned<TValue>::value, TValue>::type
GetValue(const NTableClient::TUnversionedValue& value)
{
    return value.Data.Uint64;
}

////////////////////////////////////////////////////////////////////////////////

void FillColumnarIntegerValues(
    NTableClient::IUnversionedColumnarRowBatch::TColumn* column,
    i64 startIndex,
    i64 valueCount,
    NTableClient::EValueType valueType,
    ui64 baseValue,
    TRef data)
{
    column->StartIndex = startIndex;
    column->ValueCount = valueCount;

    auto& values = column->Values.emplace();
    values.BaseValue = baseValue;
    values.BitWidth = 64;
    values.ZigZagEncoded = (valueType == NTableClient::EValueType::Int64);
    values.Data = data;
}

////////////////////////////////////////////////////////////////////////////////

// TValue - i64 or ui64.
template <class TValue>
class TIntegerColumnConverter
    : public IColumnConverter
{
public:
    static_assert(std::is_integral_v<TValue>);

    TIntegerColumnConverter(
        int columnIndex,
        NTableClient::EValueType ValueType,
        NTableClient::TColumnSchema columnSchema)
        : ColumnIndex_(columnIndex)
        , ColumnSchema_(columnSchema)
        , ValueType_(ValueType)
    { }

    TConvertedColumn Convert(TRange<TUnversionedRowValues> rowsValues) override
    {
        Reset();
        AddValues(rowsValues);
        for (i64 index = 0; index < std::ssize(Values_); ++index) {
            if (!NullBitmap_[index]) {
                Values_[index] -= MinValue_;
            }
        }

        auto nullBitmapRef = NullBitmap_.Flush<TConverterTag>();
        auto valuesRef = TSharedRef::MakeCopy<TConverterTag>(TRef(Values_.data(), sizeof(ui64) * Values_.size()));
        auto column = std::make_shared<TBatchColumn>();

        FillColumnarIntegerValues(
            column.get(),
            0,
            RowCount_,
            ValueType_,
            MinValue_,
            valuesRef);

        FillColumnarNullBitmap(
            column.get(),
            0,
            RowCount_,
            nullBitmapRef);

        column->Type = ColumnSchema_.LogicalType();
        column->Id = ColumnIndex_;

        TOwningColumn owner = {
            .Column = std::move(column),
            .NullBitmap = std::move(nullBitmapRef),
            .ValueBuffer = std::move(valuesRef),
        };

        return {{owner}, owner.Column.get()};
    }


private:
    const int ColumnIndex_;
    const NTableClient::TColumnSchema ColumnSchema_;
    const NTableClient::EValueType ValueType_;

    i64 RowCount_ = 0;
    TBitmapOutput NullBitmap_;
    std::vector<ui64> Values_;

    ui64 MaxValue_;
    ui64 MinValue_;

    void Reset()
    {
        Values_.clear();
        RowCount_ = 0;
        MaxValue_ = 0;
        MinValue_ = std::numeric_limits<ui64>::max();
        NullBitmap_.Flush<TConverterTag>();
    }

    void AddValues(TRange<TUnversionedRowValues> rowsValues)
    {
        for (const auto& rowValues : rowsValues) {
            auto value = rowValues[ColumnIndex_];
            bool isNull = !value || value->Type == NTableClient::EValueType::Null;
            ui64 data = 0;
            if (!isNull) {
                YT_VERIFY(value);
                data = EncodeValue(GetValue<TValue>(*value));
            }
            Values_.push_back(data);
            NullBitmap_.Append(isNull);
            ++RowCount_;
        }
    }
};

} // namespace

////////////////////////////////////////////////////////////////////////////////

IColumnConverterPtr CreateInt64ColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema)
{
    return std::make_unique<TIntegerColumnConverter<i64>>(columnIndex, NTableClient::EValueType::Int64, columnSchema);
}


IColumnConverterPtr CreateUint64ColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema)
{
    return std::make_unique<TIntegerColumnConverter<ui64>>(columnIndex, NTableClient::EValueType::Uint64, columnSchema);
}

////////////////////////////////////////////////////////////////////////////////

} // namespace NYT::NColumnConverters