aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Functions/array/arraySplit.cpp
blob: e68d779f67e61570c22eb1fbbdf1d6b8f498b20c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>

#include "FunctionArrayMapped.h"


namespace DB
{
namespace ErrorCodes
{
    extern const int ILLEGAL_COLUMN;
}

template <bool reverse>
struct ArraySplitImpl
{
    static bool needBoolean() { return true; }
    static bool needExpression() { return true; }
    static bool needOneArray() { return false; }

    static DataTypePtr getReturnType(const DataTypePtr & /*expression_return*/, const DataTypePtr & array_element)
    {
        return std::make_shared<DataTypeArray>(
            std::make_shared<DataTypeArray>(array_element)
        );
    }

    static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
    {
        const ColumnUInt8 * column_cut = typeid_cast<const ColumnUInt8 *>(&*mapped);

        const IColumn::Offsets & in_offsets = array.getOffsets();
        auto column_offsets_2 = ColumnArray::ColumnOffsets::create();
        auto column_offsets_1 = ColumnArray::ColumnOffsets::create();
        IColumn::Offsets & out_offsets_2 = column_offsets_2->getData();
        IColumn::Offsets & out_offsets_1 = column_offsets_1->getData();

        if (column_cut)
        {
            const IColumn::Filter & cut = column_cut->getData();

            size_t pos = 0;

            out_offsets_2.reserve(in_offsets.size()); // assume the actual size to be equal or larger
            out_offsets_1.reserve(in_offsets.size());

            for (auto in_offset : in_offsets)
            {
                if (pos < in_offset)
                {
                    pos += !reverse;
                    for (; pos < in_offset - reverse; ++pos)
                        if (cut[pos])
                            out_offsets_2.push_back(pos + reverse);
                    pos += reverse;

                    out_offsets_2.push_back(pos);
                }

                out_offsets_1.push_back(out_offsets_2.size());
            }
        }
        else
        {
            const auto * column_cut_const = checkAndGetColumnConst<ColumnUInt8>(&*mapped);

            if (!column_cut_const)
                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type of cut column");

            if (column_cut_const->getValue<UInt8>())
            {
                out_offsets_2.reserve(in_offsets.back());
                out_offsets_1.reserve(in_offsets.size());

                for (size_t i = 0; i < in_offsets.back(); ++i)
                    out_offsets_2.push_back(i + 1);
                for (auto in_offset : in_offsets)
                    out_offsets_1.push_back(in_offset);
            }
            else
            {
                size_t pos = 0;

                out_offsets_2.reserve(in_offsets.size());
                out_offsets_1.reserve(in_offsets.size());

                for (auto in_offset : in_offsets)
                {
                    if (pos < in_offset)
                    {
                        pos = in_offset;
                        out_offsets_2.push_back(pos);
                    }

                    out_offsets_1.push_back(out_offsets_2.size());
                }
            }
        }

        return ColumnArray::create(
            ColumnArray::create(
                array.getDataPtr(),
                std::move(column_offsets_2)
            ),
            std::move(column_offsets_1)
        );
    }
};

struct NameArraySplit { static constexpr auto name = "arraySplit"; };
struct NameArrayReverseSplit { static constexpr auto name = "arrayReverseSplit"; };
using FunctionArraySplit = FunctionArrayMapped<ArraySplitImpl<false>, NameArraySplit>;
using FunctionArrayReverseSplit = FunctionArrayMapped<ArraySplitImpl<true>, NameArrayReverseSplit>;

REGISTER_FUNCTION(ArraySplit)
{
    factory.registerFunction<FunctionArraySplit>();
    factory.registerFunction<FunctionArrayReverseSplit>();
}

}