diff options
| author | vitalyisaev <[email protected]> | 2023-11-14 09:58:56 +0300 |
|---|---|---|
| committer | vitalyisaev <[email protected]> | 2023-11-14 10:20:20 +0300 |
| commit | c2b2dfd9827a400a8495e172a56343462e3ceb82 (patch) | |
| tree | cd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/clickhouse/src/Functions/JSONPath/Parsers | |
| parent | d4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff) | |
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'contrib/clickhouse/src/Functions/JSONPath/Parsers')
14 files changed, 483 insertions, 0 deletions
diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.cpp new file mode 100644 index 00000000000..003e97af38b --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.cpp @@ -0,0 +1,31 @@ +#include <Functions/JSONPath/ASTs/ASTJSONPath.h> +#include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h> +#include <Functions/JSONPath/Parsers/ParserJSONPath.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathQuery.h> + +namespace DB +{ +/** + * Entry parser for JSONPath + */ +bool ParserJSONPath::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto ast_jsonpath = std::make_shared<ASTJSONPath>(); + ParserJSONPathQuery parser_jsonpath_query; + + /// Push back dot AST and brackets AST to query->children + ASTPtr query; + + bool res = parser_jsonpath_query.parse(pos, query, expected); + + if (res) + { + /// Set ASTJSONPathQuery of ASTJSONPath + ast_jsonpath->set(ast_jsonpath->jsonpath_query, query); + } + + node = ast_jsonpath; + return res; +} + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.h new file mode 100644 index 00000000000..7d2c2ad642c --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.h @@ -0,0 +1,21 @@ +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ +/** + * Entry parser for JSONPath + */ +class ParserJSONPath : public IParserBase +{ +private: + const char * getName() const override { return "ParserJSONPath"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +public: + explicit ParserJSONPath() = default; +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp new file mode 100644 index 00000000000..141f25bfe4c --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp @@ -0,0 +1,85 @@ +#include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h> + +#include <Parsers/ASTIdentifier.h> +#include <Parsers/ASTIdentifier_fwd.h> +#include <Parsers/ExpressionElementParsers.h> +#include <Parsers/Lexer.h> +#include <Common/StringUtils/StringUtils.h> + +namespace DB +{ +/** + * + * @param pos token iterator + * @param node node of ASTJSONPathMemberAccess + * @param expected stuff for logging + * @return was parse successful + */ +bool ParserJSONPathMemberAccess::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + // There's a special case, that a path member can begin with number + // some invalid cases as following + // - ".123" is parsed as a number, not a dot and a number + // - ".123abc" is parsed as two parts, a number ".123" and a token "abc" + // - ".abc" is parsed as two parts. a dot and a token "abc" + // "$..123abc" is parsed into three parts, ".", ".123" and "abc" + if (pos->type != TokenType::Dot && pos->type != TokenType::Number) + return false; + if (pos->type != TokenType::Number) + { + ++pos; + // Check the case "$..123abc" + if (pos->type == TokenType::Number) + { + return false; + } + } + + ASTPtr member_name; + + if (pos->type == TokenType::Number)[[unlikely]] + { + for (const auto * c = pos->begin; c != pos->end; ++c) + { + if (*c == '.' && c == pos->begin) + continue; + if (!isNumericASCII(*c)) + { + return false; + } + } + const auto * last_begin = *pos->begin == '.' ? pos->begin + 1 : pos->begin; + const auto * last_end = pos->end; + ++pos; + + if (pos.isValid() && pos->type == TokenType::BareWord && pos->begin == last_end) + { + member_name = std::make_shared<ASTIdentifier>(String(last_begin, pos->end)); + ++pos; + } + else if (!pos.isValid() && pos->type == TokenType::EndOfStream) + { + member_name = std::make_shared<ASTIdentifier>(String(last_begin, last_end)); + } + else + { + return false; + } + } + else + { + if (pos->type != TokenType::BareWord && pos->type != TokenType::QuotedIdentifier) + return false; + + ParserIdentifier name_p; + if (!name_p.parse(pos, member_name, expected)) + return false; + } + + auto member_access = std::make_shared<ASTJSONPathMemberAccess>(); + node = member_access; + return tryGetIdentifierNameInto(member_name, member_access->member_name); +} + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h new file mode 100644 index 00000000000..b28bf37d5ef --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h @@ -0,0 +1,14 @@ +#pragma once + +#include <Parsers/IParserBase.h> + +namespace DB +{ +class ParserJSONPathMemberAccess : public IParserBase +{ + const char * getName() const override { return "ParserJSONPathMemberAccess"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp new file mode 100644 index 00000000000..93e0639ccfe --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp @@ -0,0 +1,44 @@ +#include "ParserJSONPathMemberSquareBracketAccess.h" +#include <memory> +#include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h> +#include <IO/ReadBufferFromMemory.h> +#include <IO/ReadHelpers.h> +#include <Parsers/ASTIdentifier.h> +#include <Parsers/ExpressionElementParsers.h> + +namespace DB +{ +bool ParserJSONPathMemberSquareBracketAccess::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (pos->type != TokenType::OpeningSquareBracket) + return false; + ++pos; + ASTPtr member_name; + if (pos->type == TokenType::BareWord || pos->type == TokenType::QuotedIdentifier) + { + ParserIdentifier name_p; + if (!name_p.parse(pos, member_name, expected)) + return false; + } + else if (pos->type == TokenType::StringLiteral) + { + ReadBufferFromMemory in(pos->begin, pos->size()); + String name; + readQuotedStringWithSQLStyle(name, in); + member_name = std::make_shared<ASTIdentifier>(name); + ++pos; + } + else + { + return false; + } + if (pos->type != TokenType::ClosingSquareBracket) + { + return false; + } + ++pos; + auto member_access = std::make_shared<ASTJSONPathMemberAccess>(); + node = member_access; + return tryGetIdentifierNameInto(member_name, member_access->member_name); +} +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h new file mode 100644 index 00000000000..b682ec5bb96 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h @@ -0,0 +1,17 @@ +#pragma once +#include <Parsers/IParserBase.h> +// cases +// - [ident] +// - ['ident'] +// - ["ident"] +namespace DB +{ +class ParserJSONPathMemberSquareBracketAccess : public IParserBase +{ +private: + const char * getName() const override { return "ParserJSONPathMemberSquareBracketAccess"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserJSONPathMemberSquareBracketAccess() = default; +}; +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp new file mode 100644 index 00000000000..d8d633a1ec9 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp @@ -0,0 +1,51 @@ +#include <Functions/JSONPath/ASTs/ASTJSONPathQuery.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathQuery.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathRoot.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathRange.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathStar.h> + +namespace DB + +{ +/** + * + * @param pos token iterator + * @param query node of ASTJSONPathQuery + * @param expected stuff for logging + * @return was parse successful + */ +bool ParserJSONPathQuery::parseImpl(Pos & pos, ASTPtr & query, Expected & expected) +{ + query = std::make_shared<ASTJSONPathQuery>(); + ParserJSONPathMemberAccess parser_jsonpath_member_access; + ParserJSONPathMemberSquareBracketAccess parser_jsonpath_member_square_bracket_access; + ParserJSONPathRange parser_jsonpath_range; + ParserJSONPathStar parser_jsonpath_star; + ParserJSONPathRoot parser_jsonpath_root; + + ASTPtr path_root; + if (!parser_jsonpath_root.parse(pos, path_root, expected)) + { + return false; + } + query->children.push_back(path_root); + + ASTPtr accessor; + while (parser_jsonpath_member_access.parse(pos, accessor, expected) + || parser_jsonpath_member_square_bracket_access.parse(pos, accessor, expected) + || parser_jsonpath_range.parse(pos, accessor, expected) + || parser_jsonpath_star.parse(pos, accessor, expected)) + { + if (accessor) + { + query->children.push_back(accessor); + accessor = nullptr; + } + } + /// parsing was successful if we reached the end of query by this point + return pos->type == TokenType::EndOfStream; +} + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.h new file mode 100644 index 00000000000..fbe7321562e --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.h @@ -0,0 +1,14 @@ +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ +class ParserJSONPathQuery : public IParserBase +{ +protected: + const char * getName() const override { return "ParserJSONPathQuery"; } + bool parseImpl(Pos & pos, ASTPtr & query, Expected & expected) override; +}; +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp new file mode 100644 index 00000000000..03c006774c0 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp @@ -0,0 +1,94 @@ +#include <Functions/JSONPath/ASTs/ASTJSONPathRange.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathQuery.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathRange.h> + +#include <Parsers/ASTIdentifier.h> +#include <Parsers/ASTLiteral.h> +#include <Parsers/ExpressionElementParsers.h> +#include <Parsers/CommonParsers.h> + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} +/** + * + * @param pos token iterator + * @param node node of ASTJSONPathQuery + * @param expected stuff for logging + * @return was parse successful + */ +bool ParserJSONPathRange::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + + if (pos->type != TokenType::OpeningSquareBracket) + { + return false; + } + ++pos; + + auto range = std::make_shared<ASTJSONPathRange>(); + node = range; + + ParserNumber number_p; + ASTPtr number_ptr; + while (pos->type != TokenType::ClosingSquareBracket) + { + if (pos->type != TokenType::Number) + { + return false; + } + + std::pair<UInt32, UInt32> range_indices; + if (!number_p.parse(pos, number_ptr, expected)) + { + return false; + } + range_indices.first = static_cast<UInt32>(number_ptr->as<ASTLiteral>()->value.get<UInt32>()); + + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingSquareBracket) + { + /// Single index case + range_indices.second = range_indices.first + 1; + } + else if (pos->type == TokenType::BareWord) + { + if (!ParserKeyword("TO").ignore(pos, expected)) + { + return false; + } + if (!number_p.parse(pos, number_ptr, expected)) + { + return false; + } + range_indices.second = static_cast<UInt32>(number_ptr->as<ASTLiteral>()->value.get<UInt32>()); + } + else + { + return false; + } + + if (range_indices.first >= range_indices.second) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Start of range must be greater than end of range, however {} >= {}", + range_indices.first, + range_indices.second); + } + + range->ranges.push_back(std::move(range_indices)); + if (pos->type != TokenType::ClosingSquareBracket) + { + ++pos; + } + } + ++pos; + + /// We can't have both ranges and star present, so parse was successful <=> exactly 1 of these conditions is true + return !range->ranges.empty() ^ range->is_star; +} + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.h new file mode 100644 index 00000000000..94db29577ab --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.h @@ -0,0 +1,18 @@ +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ +class ParserJSONPathRange : public IParserBase +{ +private: + const char * getName() const override { return "ParserJSONPathRange"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +public: + explicit ParserJSONPathRange() = default; +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.cpp new file mode 100644 index 00000000000..86cf793fb52 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.cpp @@ -0,0 +1,27 @@ +#include <Functions/JSONPath/ASTs/ASTJSONPathRoot.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathRoot.h> + +#include <Parsers/Lexer.h> + +namespace DB +{ +/** + * + * @param pos token iterator + * @param node node of ASTJSONPathRoot + * @param expected stuff for logging + * @return was parse successful + */ +bool ParserJSONPathRoot::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (pos->type != TokenType::DollarSign) + { + expected.add(pos, "dollar sign (start of jsonpath)"); + return false; + } + node = std::make_shared<ASTJSONPathRoot>(); + ++pos; + return true; +} + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.h new file mode 100644 index 00000000000..59fed28d63e --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.h @@ -0,0 +1,18 @@ +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ +class ParserJSONPathRoot : public IParserBase +{ +private: + const char * getName() const override { return "ParserJSONPathRoot"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +public: + explicit ParserJSONPathRoot() = default; +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.cpp new file mode 100644 index 00000000000..1338a2064f1 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.cpp @@ -0,0 +1,31 @@ +#include <Functions/JSONPath/Parsers/ParserJSONPathStar.h> + +#include <Functions/JSONPath/ASTs/ASTJSONPathStar.h> + +namespace DB +{ +bool ParserJSONPathStar::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (pos->type != TokenType::OpeningSquareBracket) + { + return false; + } + ++pos; + if (pos->type != TokenType::Asterisk) + { + return false; + } + ++pos; + if (pos->type != TokenType::ClosingSquareBracket) + { + expected.add(pos, "Closing square bracket"); + return false; + } + ++pos; + + node = std::make_shared<ASTJSONPathStar>(); + + return true; +} + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.h new file mode 100644 index 00000000000..543823357de --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.h @@ -0,0 +1,18 @@ +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ +class ParserJSONPathStar : public IParserBase +{ +private: + const char * getName() const override { return "ParserJSONPathStar"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +public: + explicit ParserJSONPathStar() = default; +}; + +} |
