diff options
| author | vitalyisaev <[email protected]> | 2023-11-14 09:58:56 +0300 |
|---|---|---|
| committer | vitalyisaev <[email protected]> | 2023-11-14 10:20:20 +0300 |
| commit | c2b2dfd9827a400a8495e172a56343462e3ceb82 (patch) | |
| tree | cd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/clickhouse/src/Functions/JSONPath | |
| parent | d4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff) | |
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'contrib/clickhouse/src/Functions/JSONPath')
29 files changed, 1047 insertions, 0 deletions
diff --git a/contrib/clickhouse/src/Functions/JSONPath/ASTs/ASTJSONPath.h b/contrib/clickhouse/src/Functions/JSONPath/ASTs/ASTJSONPath.h new file mode 100644 index 00000000000..dfc117db846 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/ASTs/ASTJSONPath.h @@ -0,0 +1,18 @@ +#pragma once + +#include <Functions/JSONPath/ASTs/ASTJSONPathQuery.h> +#include <Parsers/IAST.h> + +namespace DB +{ +class ASTJSONPath : public IAST +{ +public: + String getID(char) const override { return "ASTJSONPath"; } + + ASTPtr clone() const override { return std::make_shared<ASTJSONPath>(*this); } + + ASTJSONPathQuery * jsonpath_query; +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h b/contrib/clickhouse/src/Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h new file mode 100644 index 00000000000..3a5e121b989 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h @@ -0,0 +1,18 @@ +#pragma once + +#include <Parsers/IAST.h> + +namespace DB +{ +class ASTJSONPathMemberAccess : public IAST +{ +public: + String getID(char) const override { return "ASTJSONPathMemberAccess"; } + + ASTPtr clone() const override { return std::make_shared<ASTJSONPathMemberAccess>(*this); } + + /// Member name to lookup in json document (in path: $.some_key.another_key. ...) + String member_name; +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/ASTs/ASTJSONPathQuery.h b/contrib/clickhouse/src/Functions/JSONPath/ASTs/ASTJSONPathQuery.h new file mode 100644 index 00000000000..ed2992777b2 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/ASTs/ASTJSONPathQuery.h @@ -0,0 +1,15 @@ +#pragma once + +#include <Parsers/IAST.h> + +namespace DB +{ +class ASTJSONPathQuery : public IAST +{ +public: + String getID(char) const override { return "ASTJSONPathQuery"; } + + ASTPtr clone() const override { return std::make_shared<ASTJSONPathQuery>(*this); } +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/ASTs/ASTJSONPathRange.h b/contrib/clickhouse/src/Functions/JSONPath/ASTs/ASTJSONPathRange.h new file mode 100644 index 00000000000..083d4b8e3ab --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/ASTs/ASTJSONPathRange.h @@ -0,0 +1,22 @@ +#pragma once + +#include <vector> +#include <Parsers/IAST.h> + +namespace DB +{ +class ASTJSONPathRange : public IAST +{ +public: + String getID(char) const override { return "ASTJSONPathRange"; } + + ASTPtr clone() const override { return std::make_shared<ASTJSONPathRange>(*this); } + + /// Ranges to lookup in json array ($[0, 1, 2, 4 to 9]) + /// Range is represented as <start, end (non-inclusive)> + /// Single index is represented as <start, start + 1> + std::vector<std::pair<UInt32, UInt32>> ranges; + bool is_star = false; +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/ASTs/ASTJSONPathRoot.h b/contrib/clickhouse/src/Functions/JSONPath/ASTs/ASTJSONPathRoot.h new file mode 100644 index 00000000000..1c6469c5b75 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/ASTs/ASTJSONPathRoot.h @@ -0,0 +1,15 @@ +#pragma once + +#include <Parsers/IAST.h> + +namespace DB +{ +class ASTJSONPathRoot : public IAST +{ +public: + String getID(char) const override { return "ASTJSONPathRoot"; } + + ASTPtr clone() const override { return std::make_shared<ASTJSONPathRoot>(*this); } +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/ASTs/ASTJSONPathStar.h b/contrib/clickhouse/src/Functions/JSONPath/ASTs/ASTJSONPathStar.h new file mode 100644 index 00000000000..2aada47c459 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/ASTs/ASTJSONPathStar.h @@ -0,0 +1,15 @@ +#pragma once + +#include <Parsers/IAST.h> + +namespace DB +{ +class ASTJSONPathStar : public IAST +{ +public: + String getID(char) const override { return "ASTJSONPathStar"; } + + ASTPtr clone() const override { return std::make_shared<ASTJSONPathStar>(*this); } +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Generator/GeneratorJSONPath.h b/contrib/clickhouse/src/Functions/JSONPath/Generator/GeneratorJSONPath.h new file mode 100644 index 00000000000..be02656b07d --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Generator/GeneratorJSONPath.h @@ -0,0 +1,128 @@ +#pragma once + +#include <Functions/JSONPath/Generator/IGenerator.h> +#include <Functions/JSONPath/Generator/VisitorJSONPathMemberAccess.h> +#include <Functions/JSONPath/Generator/VisitorJSONPathRange.h> +#include <Functions/JSONPath/Generator/VisitorJSONPathRoot.h> +#include <Functions/JSONPath/Generator/VisitorJSONPathStar.h> +#include <Functions/JSONPath/Generator/VisitorStatus.h> + +#include <Functions/JSONPath/ASTs/ASTJSONPath.h> + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +template <typename JSONParser> +class GeneratorJSONPath : public IGenerator<JSONParser> +{ +public: + /** + * Traverses children ASTs of ASTJSONPathQuery and creates a vector of corresponding visitors + * @param query_ptr_ pointer to ASTJSONPathQuery + */ + explicit GeneratorJSONPath(ASTPtr query_ptr_) + { + query_ptr = query_ptr_; + const auto * path = query_ptr->as<ASTJSONPath>(); + if (!path) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid path"); + } + const auto * query = path->jsonpath_query; + + for (auto child_ast : query->children) + { + if (typeid_cast<ASTJSONPathRoot *>(child_ast.get())) + { + visitors.push_back(std::make_shared<VisitorJSONPathRoot<JSONParser>>(child_ast)); + } + else if (typeid_cast<ASTJSONPathMemberAccess *>(child_ast.get())) + { + visitors.push_back(std::make_shared<VisitorJSONPathMemberAccess<JSONParser>>(child_ast)); + } + else if (typeid_cast<ASTJSONPathRange *>(child_ast.get())) + { + visitors.push_back(std::make_shared<VisitorJSONPathRange<JSONParser>>(child_ast)); + } + else if (typeid_cast<ASTJSONPathStar *>(child_ast.get())) + { + visitors.push_back(std::make_shared<VisitorJSONPathStar<JSONParser>>(child_ast)); + } + } + } + + const char * getName() const override { return "GeneratorJSONPath"; } + + /** + * This method exposes API of traversing all paths, described by JSONPath, + * to SQLJSON Functions. + * Expected usage is to iteratively call this method from inside the function + * and to execute custom logic with received element or handle an error. + * On each such call getNextItem will yield next item into element argument + * and modify its internal state to prepare for next call. + * + * @param element root of JSON document + * @return is the generator exhausted + */ + VisitorStatus getNextItem(typename JSONParser::Element & element) override + { + while (true) + { + /// element passed to us actually is root, so here we assign current to root + auto current = element; + if (current_visitor < 0) + { + return VisitorStatus::Exhausted; + } + + for (int i = 0; i < current_visitor; ++i) + { + visitors[i]->apply(current); + } + + VisitorStatus status = VisitorStatus::Error; + for (size_t i = current_visitor; i < visitors.size(); ++i) + { + status = visitors[i]->visit(current); + current_visitor = static_cast<int>(i); + if (status == VisitorStatus::Error || status == VisitorStatus::Ignore) + { + break; + } + } + updateVisitorsForNextRun(); + + if (status != VisitorStatus::Ignore) + { + element = current; + return status; + } + } + } + +private: + bool updateVisitorsForNextRun() + { + while (current_visitor >= 0 && visitors[current_visitor]->isExhausted()) + { + visitors[current_visitor]->reinitialize(); + current_visitor--; + } + if (current_visitor >= 0) + { + visitors[current_visitor]->updateState(); + } + return current_visitor >= 0; + } + + int current_visitor = 0; + ASTPtr query_ptr; + VisitorList<JSONParser> visitors; +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Generator/IGenerator.h b/contrib/clickhouse/src/Functions/JSONPath/Generator/IGenerator.h new file mode 100644 index 00000000000..323145e07e1 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Generator/IGenerator.h @@ -0,0 +1,29 @@ +#pragma once + +#include <Functions/JSONPath/Generator/IGenerator_fwd.h> +#include <Functions/JSONPath/Generator/VisitorStatus.h> +#include <Parsers/IAST.h> + +namespace DB +{ + +template <typename JSONParser> +class IGenerator +{ +public: + IGenerator() = default; + + virtual const char * getName() const = 0; + + /** + * Used to yield next non-ignored element describes by JSONPath query. + * + * @param element to be extracted into + * @return true if generator is not exhausted + */ + virtual VisitorStatus getNextItem(typename JSONParser::Element & element) = 0; + + virtual ~IGenerator() = default; +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Generator/IGenerator_fwd.h b/contrib/clickhouse/src/Functions/JSONPath/Generator/IGenerator_fwd.h new file mode 100644 index 00000000000..bb5f64cd6f9 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Generator/IGenerator_fwd.h @@ -0,0 +1,16 @@ +#pragma once + +#include <Functions/JSONPath/Generator/IVisitor.h> + +namespace DB +{ +template <typename JSONParser> +class IGenerator; + +template <typename JSONParser> +using IVisitorPtr = std::shared_ptr<IVisitor<JSONParser>>; + +template <typename JSONParser> +using VisitorList = std::vector<IVisitorPtr<JSONParser>>; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Generator/IVisitor.h b/contrib/clickhouse/src/Functions/JSONPath/Generator/IVisitor.h new file mode 100644 index 00000000000..1a94106a435 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Generator/IVisitor.h @@ -0,0 +1,46 @@ +#pragma once + +#include <Functions/JSONPath/Generator/VisitorStatus.h> + +namespace DB +{ +template <typename JSONParser> +class IVisitor +{ +public: + virtual const char * getName() const = 0; + + /** + * Applies this visitor to document and mutates its state + * @param element simdjson element + */ + virtual VisitorStatus visit(typename JSONParser::Element & element) = 0; + + /** + * Applies this visitor to document, but does not mutate state + * @param element simdjson element + */ + virtual VisitorStatus apply(typename JSONParser::Element & element) const = 0; + + /** + * Restores visitor's initial state for later use + */ + virtual void reinitialize() = 0; + + virtual void updateState() = 0; + + bool isExhausted() { return is_exhausted; } + + void setExhausted(bool exhausted) { is_exhausted = exhausted; } + + virtual ~IVisitor() = default; + +private: + /** + * This variable is for detecting whether a visitor's next visit will be able + * to yield a new item. + */ + bool is_exhausted = false; +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Generator/VisitorJSONPathMemberAccess.h b/contrib/clickhouse/src/Functions/JSONPath/Generator/VisitorJSONPathMemberAccess.h new file mode 100644 index 00000000000..8446e1ff3be --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Generator/VisitorJSONPathMemberAccess.h @@ -0,0 +1,50 @@ +#pragma once + +#include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h> +#include <Functions/JSONPath/Generator/IVisitor.h> +#include <Functions/JSONPath/Generator/VisitorStatus.h> + +namespace DB +{ +template <typename JSONParser> +class VisitorJSONPathMemberAccess : public IVisitor<JSONParser> +{ +public: + explicit VisitorJSONPathMemberAccess(ASTPtr member_access_ptr_) + : member_access_ptr(member_access_ptr_->as<ASTJSONPathMemberAccess>()) { } + + const char * getName() const override { return "VisitorJSONPathMemberAccess"; } + + VisitorStatus apply(typename JSONParser::Element & element) const override + { + typename JSONParser::Element result; + element.getObject().find(std::string_view(member_access_ptr->member_name), result); + element = result; + return VisitorStatus::Ok; + } + + VisitorStatus visit(typename JSONParser::Element & element) override + { + this->setExhausted(true); + if (!element.isObject()) + { + return VisitorStatus::Error; + } + typename JSONParser::Element result; + if (!element.getObject().find(std::string_view(member_access_ptr->member_name), result)) + { + return VisitorStatus::Error; + } + apply(element); + return VisitorStatus::Ok; + } + + void reinitialize() override { this->setExhausted(false); } + + void updateState() override { } + +private: + ASTJSONPathMemberAccess * member_access_ptr; +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Generator/VisitorJSONPathRange.h b/contrib/clickhouse/src/Functions/JSONPath/Generator/VisitorJSONPathRange.h new file mode 100644 index 00000000000..708a71f7cf4 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Generator/VisitorJSONPathRange.h @@ -0,0 +1,79 @@ +#pragma once + +#include <Functions/JSONPath/ASTs/ASTJSONPathRange.h> +#include <Functions/JSONPath/Generator/IVisitor.h> +#include <Functions/JSONPath/Generator/VisitorStatus.h> + +namespace DB +{ +template <typename JSONParser> +class VisitorJSONPathRange : public IVisitor<JSONParser> +{ +public: + explicit VisitorJSONPathRange(ASTPtr range_ptr_) : range_ptr(range_ptr_->as<ASTJSONPathRange>()) + { + current_range = 0; + current_index = range_ptr->ranges[current_range].first; + } + + const char * getName() const override { return "VisitorJSONPathRange"; } + + VisitorStatus apply(typename JSONParser::Element & element) const override + { + typename JSONParser::Array array = element.getArray(); + element = array[current_index]; + return VisitorStatus::Ok; + } + + VisitorStatus visit(typename JSONParser::Element & element) override + { + if (!element.isArray()) + { + this->setExhausted(true); + return VisitorStatus::Error; + } + + VisitorStatus status; + if (current_index < element.getArray().size()) + { + apply(element); + status = VisitorStatus::Ok; + } + else + { + status = VisitorStatus::Ignore; + } + + if (current_index + 1 == range_ptr->ranges[current_range].second + && current_range + 1 == range_ptr->ranges.size()) + { + this->setExhausted(true); + } + + return status; + } + + void reinitialize() override + { + current_range = 0; + current_index = range_ptr->ranges[current_range].first; + this->setExhausted(false); + } + + void updateState() override + { + current_index++; + if (current_index == range_ptr->ranges[current_range].second) + { + current_range++; + current_index = range_ptr->ranges[current_range].first; + } + } + +private: + ASTJSONPathRange * range_ptr; + size_t current_range; + UInt32 current_index; +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Generator/VisitorJSONPathRoot.h b/contrib/clickhouse/src/Functions/JSONPath/Generator/VisitorJSONPathRoot.h new file mode 100644 index 00000000000..71569d3c0a0 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Generator/VisitorJSONPathRoot.h @@ -0,0 +1,35 @@ +#pragma once + +#include <Functions/JSONPath/ASTs/ASTJSONPathRoot.h> +#include <Functions/JSONPath/Generator/IVisitor.h> +#include <Functions/JSONPath/Generator/VisitorStatus.h> + +namespace DB +{ +template <typename JSONParser> +class VisitorJSONPathRoot : public IVisitor<JSONParser> +{ +public: + explicit VisitorJSONPathRoot(ASTPtr) { } + + const char * getName() const override { return "VisitorJSONPathRoot"; } + + VisitorStatus apply(typename JSONParser::Element & /*element*/) const override + { + /// No-op on document, since we are already passed document's root + return VisitorStatus::Ok; + } + + VisitorStatus visit(typename JSONParser::Element & element) override + { + apply(element); + this->setExhausted(true); + return VisitorStatus::Ok; + } + + void reinitialize() override { this->setExhausted(false); } + + void updateState() override { } +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Generator/VisitorJSONPathStar.h b/contrib/clickhouse/src/Functions/JSONPath/Generator/VisitorJSONPathStar.h new file mode 100644 index 00000000000..0c297f64316 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Generator/VisitorJSONPathStar.h @@ -0,0 +1,65 @@ +#pragma once + +#include <Functions/JSONPath/ASTs/ASTJSONPathStar.h> +#include <Functions/JSONPath/Generator/IVisitor.h> +#include <Functions/JSONPath/Generator/VisitorStatus.h> + +namespace DB +{ +template <typename JSONParser> +class VisitorJSONPathStar : public IVisitor<JSONParser> +{ +public: + explicit VisitorJSONPathStar(ASTPtr) + { + current_index = 0; + } + + const char * getName() const override { return "VisitorJSONPathStar"; } + + VisitorStatus apply(typename JSONParser::Element & element) const override + { + typename JSONParser::Array array = element.getArray(); + element = array[current_index]; + return VisitorStatus::Ok; + } + + VisitorStatus visit(typename JSONParser::Element & element) override + { + if (!element.isArray()) + { + this->setExhausted(true); + return VisitorStatus::Error; + } + + VisitorStatus status; + if (current_index < element.getArray().size()) + { + apply(element); + status = VisitorStatus::Ok; + } + else + { + status = VisitorStatus::Ignore; + this->setExhausted(true); + } + + return status; + } + + void reinitialize() override + { + current_index = 0; + this->setExhausted(false); + } + + void updateState() override + { + current_index++; + } + +private: + UInt32 current_index; +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Generator/VisitorStatus.h b/contrib/clickhouse/src/Functions/JSONPath/Generator/VisitorStatus.h new file mode 100644 index 00000000000..96b2ea72f18 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Generator/VisitorStatus.h @@ -0,0 +1,13 @@ +#pragma once + +namespace DB +{ +enum VisitorStatus +{ + Ok, + Exhausted, + Error, + Ignore +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.cpp new file mode 100644 index 00000000000..003e97af38b --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.cpp @@ -0,0 +1,31 @@ +#include <Functions/JSONPath/ASTs/ASTJSONPath.h> +#include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h> +#include <Functions/JSONPath/Parsers/ParserJSONPath.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathQuery.h> + +namespace DB +{ +/** + * Entry parser for JSONPath + */ +bool ParserJSONPath::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto ast_jsonpath = std::make_shared<ASTJSONPath>(); + ParserJSONPathQuery parser_jsonpath_query; + + /// Push back dot AST and brackets AST to query->children + ASTPtr query; + + bool res = parser_jsonpath_query.parse(pos, query, expected); + + if (res) + { + /// Set ASTJSONPathQuery of ASTJSONPath + ast_jsonpath->set(ast_jsonpath->jsonpath_query, query); + } + + node = ast_jsonpath; + return res; +} + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.h new file mode 100644 index 00000000000..7d2c2ad642c --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.h @@ -0,0 +1,21 @@ +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ +/** + * Entry parser for JSONPath + */ +class ParserJSONPath : public IParserBase +{ +private: + const char * getName() const override { return "ParserJSONPath"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +public: + explicit ParserJSONPath() = default; +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp new file mode 100644 index 00000000000..141f25bfe4c --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp @@ -0,0 +1,85 @@ +#include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h> + +#include <Parsers/ASTIdentifier.h> +#include <Parsers/ASTIdentifier_fwd.h> +#include <Parsers/ExpressionElementParsers.h> +#include <Parsers/Lexer.h> +#include <Common/StringUtils/StringUtils.h> + +namespace DB +{ +/** + * + * @param pos token iterator + * @param node node of ASTJSONPathMemberAccess + * @param expected stuff for logging + * @return was parse successful + */ +bool ParserJSONPathMemberAccess::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + // There's a special case, that a path member can begin with number + // some invalid cases as following + // - ".123" is parsed as a number, not a dot and a number + // - ".123abc" is parsed as two parts, a number ".123" and a token "abc" + // - ".abc" is parsed as two parts. a dot and a token "abc" + // "$..123abc" is parsed into three parts, ".", ".123" and "abc" + if (pos->type != TokenType::Dot && pos->type != TokenType::Number) + return false; + if (pos->type != TokenType::Number) + { + ++pos; + // Check the case "$..123abc" + if (pos->type == TokenType::Number) + { + return false; + } + } + + ASTPtr member_name; + + if (pos->type == TokenType::Number)[[unlikely]] + { + for (const auto * c = pos->begin; c != pos->end; ++c) + { + if (*c == '.' && c == pos->begin) + continue; + if (!isNumericASCII(*c)) + { + return false; + } + } + const auto * last_begin = *pos->begin == '.' ? pos->begin + 1 : pos->begin; + const auto * last_end = pos->end; + ++pos; + + if (pos.isValid() && pos->type == TokenType::BareWord && pos->begin == last_end) + { + member_name = std::make_shared<ASTIdentifier>(String(last_begin, pos->end)); + ++pos; + } + else if (!pos.isValid() && pos->type == TokenType::EndOfStream) + { + member_name = std::make_shared<ASTIdentifier>(String(last_begin, last_end)); + } + else + { + return false; + } + } + else + { + if (pos->type != TokenType::BareWord && pos->type != TokenType::QuotedIdentifier) + return false; + + ParserIdentifier name_p; + if (!name_p.parse(pos, member_name, expected)) + return false; + } + + auto member_access = std::make_shared<ASTJSONPathMemberAccess>(); + node = member_access; + return tryGetIdentifierNameInto(member_name, member_access->member_name); +} + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h new file mode 100644 index 00000000000..b28bf37d5ef --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h @@ -0,0 +1,14 @@ +#pragma once + +#include <Parsers/IParserBase.h> + +namespace DB +{ +class ParserJSONPathMemberAccess : public IParserBase +{ + const char * getName() const override { return "ParserJSONPathMemberAccess"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp new file mode 100644 index 00000000000..93e0639ccfe --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp @@ -0,0 +1,44 @@ +#include "ParserJSONPathMemberSquareBracketAccess.h" +#include <memory> +#include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h> +#include <IO/ReadBufferFromMemory.h> +#include <IO/ReadHelpers.h> +#include <Parsers/ASTIdentifier.h> +#include <Parsers/ExpressionElementParsers.h> + +namespace DB +{ +bool ParserJSONPathMemberSquareBracketAccess::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (pos->type != TokenType::OpeningSquareBracket) + return false; + ++pos; + ASTPtr member_name; + if (pos->type == TokenType::BareWord || pos->type == TokenType::QuotedIdentifier) + { + ParserIdentifier name_p; + if (!name_p.parse(pos, member_name, expected)) + return false; + } + else if (pos->type == TokenType::StringLiteral) + { + ReadBufferFromMemory in(pos->begin, pos->size()); + String name; + readQuotedStringWithSQLStyle(name, in); + member_name = std::make_shared<ASTIdentifier>(name); + ++pos; + } + else + { + return false; + } + if (pos->type != TokenType::ClosingSquareBracket) + { + return false; + } + ++pos; + auto member_access = std::make_shared<ASTJSONPathMemberAccess>(); + node = member_access; + return tryGetIdentifierNameInto(member_name, member_access->member_name); +} +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h new file mode 100644 index 00000000000..b682ec5bb96 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h @@ -0,0 +1,17 @@ +#pragma once +#include <Parsers/IParserBase.h> +// cases +// - [ident] +// - ['ident'] +// - ["ident"] +namespace DB +{ +class ParserJSONPathMemberSquareBracketAccess : public IParserBase +{ +private: + const char * getName() const override { return "ParserJSONPathMemberSquareBracketAccess"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserJSONPathMemberSquareBracketAccess() = default; +}; +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp new file mode 100644 index 00000000000..d8d633a1ec9 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp @@ -0,0 +1,51 @@ +#include <Functions/JSONPath/ASTs/ASTJSONPathQuery.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathQuery.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathRoot.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathRange.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathStar.h> + +namespace DB + +{ +/** + * + * @param pos token iterator + * @param query node of ASTJSONPathQuery + * @param expected stuff for logging + * @return was parse successful + */ +bool ParserJSONPathQuery::parseImpl(Pos & pos, ASTPtr & query, Expected & expected) +{ + query = std::make_shared<ASTJSONPathQuery>(); + ParserJSONPathMemberAccess parser_jsonpath_member_access; + ParserJSONPathMemberSquareBracketAccess parser_jsonpath_member_square_bracket_access; + ParserJSONPathRange parser_jsonpath_range; + ParserJSONPathStar parser_jsonpath_star; + ParserJSONPathRoot parser_jsonpath_root; + + ASTPtr path_root; + if (!parser_jsonpath_root.parse(pos, path_root, expected)) + { + return false; + } + query->children.push_back(path_root); + + ASTPtr accessor; + while (parser_jsonpath_member_access.parse(pos, accessor, expected) + || parser_jsonpath_member_square_bracket_access.parse(pos, accessor, expected) + || parser_jsonpath_range.parse(pos, accessor, expected) + || parser_jsonpath_star.parse(pos, accessor, expected)) + { + if (accessor) + { + query->children.push_back(accessor); + accessor = nullptr; + } + } + /// parsing was successful if we reached the end of query by this point + return pos->type == TokenType::EndOfStream; +} + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.h new file mode 100644 index 00000000000..fbe7321562e --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.h @@ -0,0 +1,14 @@ +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ +class ParserJSONPathQuery : public IParserBase +{ +protected: + const char * getName() const override { return "ParserJSONPathQuery"; } + bool parseImpl(Pos & pos, ASTPtr & query, Expected & expected) override; +}; +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp new file mode 100644 index 00000000000..03c006774c0 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp @@ -0,0 +1,94 @@ +#include <Functions/JSONPath/ASTs/ASTJSONPathRange.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathQuery.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathRange.h> + +#include <Parsers/ASTIdentifier.h> +#include <Parsers/ASTLiteral.h> +#include <Parsers/ExpressionElementParsers.h> +#include <Parsers/CommonParsers.h> + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} +/** + * + * @param pos token iterator + * @param node node of ASTJSONPathQuery + * @param expected stuff for logging + * @return was parse successful + */ +bool ParserJSONPathRange::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + + if (pos->type != TokenType::OpeningSquareBracket) + { + return false; + } + ++pos; + + auto range = std::make_shared<ASTJSONPathRange>(); + node = range; + + ParserNumber number_p; + ASTPtr number_ptr; + while (pos->type != TokenType::ClosingSquareBracket) + { + if (pos->type != TokenType::Number) + { + return false; + } + + std::pair<UInt32, UInt32> range_indices; + if (!number_p.parse(pos, number_ptr, expected)) + { + return false; + } + range_indices.first = static_cast<UInt32>(number_ptr->as<ASTLiteral>()->value.get<UInt32>()); + + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingSquareBracket) + { + /// Single index case + range_indices.second = range_indices.first + 1; + } + else if (pos->type == TokenType::BareWord) + { + if (!ParserKeyword("TO").ignore(pos, expected)) + { + return false; + } + if (!number_p.parse(pos, number_ptr, expected)) + { + return false; + } + range_indices.second = static_cast<UInt32>(number_ptr->as<ASTLiteral>()->value.get<UInt32>()); + } + else + { + return false; + } + + if (range_indices.first >= range_indices.second) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Start of range must be greater than end of range, however {} >= {}", + range_indices.first, + range_indices.second); + } + + range->ranges.push_back(std::move(range_indices)); + if (pos->type != TokenType::ClosingSquareBracket) + { + ++pos; + } + } + ++pos; + + /// We can't have both ranges and star present, so parse was successful <=> exactly 1 of these conditions is true + return !range->ranges.empty() ^ range->is_star; +} + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.h new file mode 100644 index 00000000000..94db29577ab --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.h @@ -0,0 +1,18 @@ +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ +class ParserJSONPathRange : public IParserBase +{ +private: + const char * getName() const override { return "ParserJSONPathRange"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +public: + explicit ParserJSONPathRange() = default; +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.cpp new file mode 100644 index 00000000000..86cf793fb52 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.cpp @@ -0,0 +1,27 @@ +#include <Functions/JSONPath/ASTs/ASTJSONPathRoot.h> +#include <Functions/JSONPath/Parsers/ParserJSONPathRoot.h> + +#include <Parsers/Lexer.h> + +namespace DB +{ +/** + * + * @param pos token iterator + * @param node node of ASTJSONPathRoot + * @param expected stuff for logging + * @return was parse successful + */ +bool ParserJSONPathRoot::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (pos->type != TokenType::DollarSign) + { + expected.add(pos, "dollar sign (start of jsonpath)"); + return false; + } + node = std::make_shared<ASTJSONPathRoot>(); + ++pos; + return true; +} + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.h new file mode 100644 index 00000000000..59fed28d63e --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.h @@ -0,0 +1,18 @@ +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ +class ParserJSONPathRoot : public IParserBase +{ +private: + const char * getName() const override { return "ParserJSONPathRoot"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +public: + explicit ParserJSONPathRoot() = default; +}; + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.cpp new file mode 100644 index 00000000000..1338a2064f1 --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.cpp @@ -0,0 +1,31 @@ +#include <Functions/JSONPath/Parsers/ParserJSONPathStar.h> + +#include <Functions/JSONPath/ASTs/ASTJSONPathStar.h> + +namespace DB +{ +bool ParserJSONPathStar::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (pos->type != TokenType::OpeningSquareBracket) + { + return false; + } + ++pos; + if (pos->type != TokenType::Asterisk) + { + return false; + } + ++pos; + if (pos->type != TokenType::ClosingSquareBracket) + { + expected.add(pos, "Closing square bracket"); + return false; + } + ++pos; + + node = std::make_shared<ASTJSONPathStar>(); + + return true; +} + +} diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.h new file mode 100644 index 00000000000..543823357de --- /dev/null +++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.h @@ -0,0 +1,18 @@ +#pragma once + +#include <Parsers/IParserBase.h> + + +namespace DB +{ +class ParserJSONPathStar : public IParserBase +{ +private: + const char * getName() const override { return "ParserJSONPathStar"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +public: + explicit ParserJSONPathStar() = default; +}; + +} |
