aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp
blob: cd4603ddaecef51965df00b9ddaf79ae2339f305 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#include <Databases/MySQL/tryQuoteUnrecognizedTokens.h>
#include <Parsers/CommonParsers.h>
#include <Common/quoteString.h>

namespace DB
{

/// Checks if there are no any tokens (like whitespaces) between current and previous pos
static bool noWhitespaces(const char * to, const char * from)
{
    return static_cast<size_t>(from - to) == 0;
}

/// Checks if the token should be quoted too together with unrecognized
static bool isWordOrNumber(TokenType type)
{
    return type == TokenType::BareWord || type == TokenType::Number;
}

static void quoteLiteral(
    IParser::Pos & pos,
    IParser::Pos & pos_prev,
    const char *& pos_unrecognized,
    const char *& copy_from,
    String & rewritten_query)
{
    /// Copy also whitespaces if any
    const auto * end =
        isWordOrNumber(pos->type) && noWhitespaces(pos_prev->end, pos->begin)
        ? pos->end
        : pos_prev->end;
    String literal(pos_unrecognized, static_cast<size_t>(end - pos_unrecognized));
    rewritten_query.append(copy_from, pos_unrecognized - copy_from).append(backQuoteMySQL(literal));
    copy_from = end;
}

bool tryQuoteUnrecognizedTokens(const String & query, String & res)
{
    Tokens tokens(query.data(), query.data() + query.size());
    IParser::Pos pos(tokens, 0);
    Expected expected;
    String rewritten_query;
    const char * copy_from = query.data();
    auto pos_prev = pos;
    const char * pos_unrecognized = nullptr;
    for (;pos->type != TokenType::EndOfStream; ++pos)
    {
        /// Commit quotes if any whitespaces found or the token is not a word
        bool commit = !noWhitespaces(pos_prev->end, pos->begin) || (pos->type != TokenType::Error && !isWordOrNumber(pos->type));
        if (pos_unrecognized && commit)
        {
            quoteLiteral(
                pos,
                pos_prev,
                pos_unrecognized,
                copy_from,
                rewritten_query);
            pos_unrecognized = nullptr;
        }
        if (pos->type == TokenType::Error)
        {
            /// Find first appearance of the error token
            if (!pos_unrecognized)
            {
                pos_unrecognized =
                    isWordOrNumber(pos_prev->type) && noWhitespaces(pos_prev->end, pos->begin)
                    ? pos_prev->begin
                    : pos->begin;
            }
        }
        pos_prev = pos;
    }

    /// There was EndOfStream but not committed unrecognized token
    if (pos_unrecognized)
    {
        quoteLiteral(
            pos,
            pos_prev,
            pos_unrecognized,
            copy_from,
            rewritten_query);
        pos_unrecognized = nullptr;
    }

    /// If no Errors found
    if (copy_from == query.data())
        return false;

    auto size = static_cast<size_t>(pos->end - copy_from);
    rewritten_query.append(copy_from, size);
    res = rewritten_query;
    return true;
}

}