summaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Functions/array/length.cpp
blob: 91a5e5fdec2dd89ad07081ffce993dcda40712fc (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringOrArrayToT.h>


namespace DB
{
namespace ErrorCodes
{
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}

/** Calculates the length of a string in bytes.
  */
struct LengthImpl
{
    static constexpr auto is_fixed_to_constant = true;

    static void vector(const ColumnString::Chars & /*data*/, const ColumnString::Offsets & offsets, PaddedPODArray<UInt64> & res)
    {
        size_t size = offsets.size();
        for (size_t i = 0; i < size; ++i)
            res[i] = offsets[i] - 1 - offsets[i - 1];
    }

    static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t n, UInt64 & res)
    {
        res = n;
    }

    static void vectorFixedToVector(const ColumnString::Chars & /*data*/, size_t /*n*/, PaddedPODArray<UInt64> & /*res*/)
    {
    }

    static void array(const ColumnString::Offsets & offsets, PaddedPODArray<UInt64> & res)
    {
        size_t size = offsets.size();
        for (size_t i = 0; i < size; ++i)
            res[i] = offsets[i] - offsets[i - 1];
    }

    [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray<UInt64> &)
    {
        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to UUID argument");
    }

    [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray<UInt64> &)
    {
        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to IPv6 argument");
    }

    [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray<UInt64> &)
    {
        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to IPv4 argument");
    }
};


struct NameLength
{
    static constexpr auto name = "length";
};

using FunctionLength = FunctionStringOrArrayToT<LengthImpl, NameLength, UInt64, false>;

REGISTER_FUNCTION(Length)
{
    factory.registerFunction<FunctionLength>(
        FunctionDocumentation{
            .description=R"(
Calculates the length of the string or array.

For String or FixedString argument: calculates the number of bytes in string.
[example:string1]

For Array argument: calculates the number of elements in the array.
[example:arr1]

If applied for FixedString argument, the function is a constant expression:
[example:constexpr]

Please note that the number of bytes in a string is not the same as the number of Unicode "code points"
and it is not the same as the number of Unicode "grapheme clusters" (what we usually call "characters")
and it is not the same as the visible string width.
[example:unicode]

It is ok to have ASCII NUL bytes in strings, and they will be counted as well.
[example:nul]
)",
            .examples{
                {"string1", "SELECT length('Hello, world!')", ""},
                {"arr1", "SELECT length(['Hello'], ['world'])", ""},
                {"constexpr", "WITH 'hello' || toString(number) AS str\n"
                              "SELECT str, \n"
                              "       isConstant(length(str)) AS str_length_is_constant, \n"
                              "       isConstant(length(str::FixedString(6))) AS fixed_str_length_is_constant\n"
                              "FROM numbers(3)", ""},
                {"unicode", "SELECT 'ёлка' AS str1, length(str1), lengthUTF8(str1), normalizeUTF8NFKD(str1) AS str2, length(str2), lengthUTF8(str2)", ""},
                {"nul", R"(SELECT 'abc\0\0\0' AS str, length(str))", ""},
                },
            .categories{"String", "Array"}
        },
        FunctionFactory::CaseInsensitive);
    factory.registerAlias("OCTET_LENGTH", "length", FunctionFactory::CaseInsensitive);
}

}