summaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Functions/JSONPath/Parsers
diff options
context:
space:
mode:
authorvitalyisaev <[email protected]>2023-11-14 09:58:56 +0300
committervitalyisaev <[email protected]>2023-11-14 10:20:20 +0300
commitc2b2dfd9827a400a8495e172a56343462e3ceb82 (patch)
treecd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/clickhouse/src/Functions/JSONPath/Parsers
parentd4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff)
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'contrib/clickhouse/src/Functions/JSONPath/Parsers')
-rw-r--r--contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.cpp31
-rw-r--r--contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.h21
-rw-r--r--contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp85
-rw-r--r--contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h14
-rw-r--r--contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp44
-rw-r--r--contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h17
-rw-r--r--contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp51
-rw-r--r--contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.h14
-rw-r--r--contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp94
-rw-r--r--contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.h18
-rw-r--r--contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.cpp27
-rw-r--r--contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.h18
-rw-r--r--contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.cpp31
-rw-r--r--contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.h18
14 files changed, 483 insertions, 0 deletions
diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.cpp
new file mode 100644
index 00000000000..003e97af38b
--- /dev/null
+++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.cpp
@@ -0,0 +1,31 @@
+#include <Functions/JSONPath/ASTs/ASTJSONPath.h>
+#include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h>
+#include <Functions/JSONPath/Parsers/ParserJSONPath.h>
+#include <Functions/JSONPath/Parsers/ParserJSONPathQuery.h>
+
+namespace DB
+{
+/**
+ * Entry parser for JSONPath
+ */
+bool ParserJSONPath::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ auto ast_jsonpath = std::make_shared<ASTJSONPath>();
+ ParserJSONPathQuery parser_jsonpath_query;
+
+ /// Push back dot AST and brackets AST to query->children
+ ASTPtr query;
+
+ bool res = parser_jsonpath_query.parse(pos, query, expected);
+
+ if (res)
+ {
+ /// Set ASTJSONPathQuery of ASTJSONPath
+ ast_jsonpath->set(ast_jsonpath->jsonpath_query, query);
+ }
+
+ node = ast_jsonpath;
+ return res;
+}
+
+}
diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.h
new file mode 100644
index 00000000000..7d2c2ad642c
--- /dev/null
+++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPath.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <Parsers/IParserBase.h>
+
+
+namespace DB
+{
+/**
+ * Entry parser for JSONPath
+ */
+class ParserJSONPath : public IParserBase
+{
+private:
+ const char * getName() const override { return "ParserJSONPath"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+
+public:
+ explicit ParserJSONPath() = default;
+};
+
+}
diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp
new file mode 100644
index 00000000000..141f25bfe4c
--- /dev/null
+++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp
@@ -0,0 +1,85 @@
+#include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h>
+#include <Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h>
+
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTIdentifier_fwd.h>
+#include <Parsers/ExpressionElementParsers.h>
+#include <Parsers/Lexer.h>
+#include <Common/StringUtils/StringUtils.h>
+
+namespace DB
+{
+/**
+ *
+ * @param pos token iterator
+ * @param node node of ASTJSONPathMemberAccess
+ * @param expected stuff for logging
+ * @return was parse successful
+ */
+bool ParserJSONPathMemberAccess::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ // There's a special case, that a path member can begin with number
+ // some invalid cases as following
+ // - ".123" is parsed as a number, not a dot and a number
+ // - ".123abc" is parsed as two parts, a number ".123" and a token "abc"
+ // - ".abc" is parsed as two parts. a dot and a token "abc"
+ // "$..123abc" is parsed into three parts, ".", ".123" and "abc"
+ if (pos->type != TokenType::Dot && pos->type != TokenType::Number)
+ return false;
+ if (pos->type != TokenType::Number)
+ {
+ ++pos;
+ // Check the case "$..123abc"
+ if (pos->type == TokenType::Number)
+ {
+ return false;
+ }
+ }
+
+ ASTPtr member_name;
+
+ if (pos->type == TokenType::Number)[[unlikely]]
+ {
+ for (const auto * c = pos->begin; c != pos->end; ++c)
+ {
+ if (*c == '.' && c == pos->begin)
+ continue;
+ if (!isNumericASCII(*c))
+ {
+ return false;
+ }
+ }
+ const auto * last_begin = *pos->begin == '.' ? pos->begin + 1 : pos->begin;
+ const auto * last_end = pos->end;
+ ++pos;
+
+ if (pos.isValid() && pos->type == TokenType::BareWord && pos->begin == last_end)
+ {
+ member_name = std::make_shared<ASTIdentifier>(String(last_begin, pos->end));
+ ++pos;
+ }
+ else if (!pos.isValid() && pos->type == TokenType::EndOfStream)
+ {
+ member_name = std::make_shared<ASTIdentifier>(String(last_begin, last_end));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ else
+ {
+ if (pos->type != TokenType::BareWord && pos->type != TokenType::QuotedIdentifier)
+ return false;
+
+ ParserIdentifier name_p;
+ if (!name_p.parse(pos, member_name, expected))
+ return false;
+ }
+
+ auto member_access = std::make_shared<ASTJSONPathMemberAccess>();
+ node = member_access;
+ return tryGetIdentifierNameInto(member_name, member_access->member_name);
+}
+
+}
diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h
new file mode 100644
index 00000000000..b28bf37d5ef
--- /dev/null
+++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <Parsers/IParserBase.h>
+
+namespace DB
+{
+class ParserJSONPathMemberAccess : public IParserBase
+{
+ const char * getName() const override { return "ParserJSONPathMemberAccess"; }
+
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+}
diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp
new file mode 100644
index 00000000000..93e0639ccfe
--- /dev/null
+++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp
@@ -0,0 +1,44 @@
+#include "ParserJSONPathMemberSquareBracketAccess.h"
+#include <memory>
+#include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h>
+#include <IO/ReadBufferFromMemory.h>
+#include <IO/ReadHelpers.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ExpressionElementParsers.h>
+
+namespace DB
+{
+bool ParserJSONPathMemberSquareBracketAccess::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ if (pos->type != TokenType::OpeningSquareBracket)
+ return false;
+ ++pos;
+ ASTPtr member_name;
+ if (pos->type == TokenType::BareWord || pos->type == TokenType::QuotedIdentifier)
+ {
+ ParserIdentifier name_p;
+ if (!name_p.parse(pos, member_name, expected))
+ return false;
+ }
+ else if (pos->type == TokenType::StringLiteral)
+ {
+ ReadBufferFromMemory in(pos->begin, pos->size());
+ String name;
+ readQuotedStringWithSQLStyle(name, in);
+ member_name = std::make_shared<ASTIdentifier>(name);
+ ++pos;
+ }
+ else
+ {
+ return false;
+ }
+ if (pos->type != TokenType::ClosingSquareBracket)
+ {
+ return false;
+ }
+ ++pos;
+ auto member_access = std::make_shared<ASTJSONPathMemberAccess>();
+ node = member_access;
+ return tryGetIdentifierNameInto(member_name, member_access->member_name);
+}
+}
diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h
new file mode 100644
index 00000000000..b682ec5bb96
--- /dev/null
+++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h
@@ -0,0 +1,17 @@
+#pragma once
+#include <Parsers/IParserBase.h>
+// cases
+// - [ident]
+// - ['ident']
+// - ["ident"]
+namespace DB
+{
+class ParserJSONPathMemberSquareBracketAccess : public IParserBase
+{
+private:
+ const char * getName() const override { return "ParserJSONPathMemberSquareBracketAccess"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+public:
+ explicit ParserJSONPathMemberSquareBracketAccess() = default;
+};
+}
diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp
new file mode 100644
index 00000000000..d8d633a1ec9
--- /dev/null
+++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp
@@ -0,0 +1,51 @@
+#include <Functions/JSONPath/ASTs/ASTJSONPathQuery.h>
+#include <Functions/JSONPath/Parsers/ParserJSONPathQuery.h>
+#include <Functions/JSONPath/Parsers/ParserJSONPathRoot.h>
+#include <Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h>
+#include <Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h>
+#include <Functions/JSONPath/Parsers/ParserJSONPathRange.h>
+#include <Functions/JSONPath/Parsers/ParserJSONPathStar.h>
+
+namespace DB
+
+{
+/**
+ *
+ * @param pos token iterator
+ * @param query node of ASTJSONPathQuery
+ * @param expected stuff for logging
+ * @return was parse successful
+ */
+bool ParserJSONPathQuery::parseImpl(Pos & pos, ASTPtr & query, Expected & expected)
+{
+ query = std::make_shared<ASTJSONPathQuery>();
+ ParserJSONPathMemberAccess parser_jsonpath_member_access;
+ ParserJSONPathMemberSquareBracketAccess parser_jsonpath_member_square_bracket_access;
+ ParserJSONPathRange parser_jsonpath_range;
+ ParserJSONPathStar parser_jsonpath_star;
+ ParserJSONPathRoot parser_jsonpath_root;
+
+ ASTPtr path_root;
+ if (!parser_jsonpath_root.parse(pos, path_root, expected))
+ {
+ return false;
+ }
+ query->children.push_back(path_root);
+
+ ASTPtr accessor;
+ while (parser_jsonpath_member_access.parse(pos, accessor, expected)
+ || parser_jsonpath_member_square_bracket_access.parse(pos, accessor, expected)
+ || parser_jsonpath_range.parse(pos, accessor, expected)
+ || parser_jsonpath_star.parse(pos, accessor, expected))
+ {
+ if (accessor)
+ {
+ query->children.push_back(accessor);
+ accessor = nullptr;
+ }
+ }
+ /// parsing was successful if we reached the end of query by this point
+ return pos->type == TokenType::EndOfStream;
+}
+
+}
diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.h
new file mode 100644
index 00000000000..fbe7321562e
--- /dev/null
+++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <Parsers/IParserBase.h>
+
+
+namespace DB
+{
+class ParserJSONPathQuery : public IParserBase
+{
+protected:
+ const char * getName() const override { return "ParserJSONPathQuery"; }
+ bool parseImpl(Pos & pos, ASTPtr & query, Expected & expected) override;
+};
+}
diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp
new file mode 100644
index 00000000000..03c006774c0
--- /dev/null
+++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp
@@ -0,0 +1,94 @@
+#include <Functions/JSONPath/ASTs/ASTJSONPathRange.h>
+#include <Functions/JSONPath/Parsers/ParserJSONPathQuery.h>
+#include <Functions/JSONPath/Parsers/ParserJSONPathRange.h>
+
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ExpressionElementParsers.h>
+#include <Parsers/CommonParsers.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int BAD_ARGUMENTS;
+}
+/**
+ *
+ * @param pos token iterator
+ * @param node node of ASTJSONPathQuery
+ * @param expected stuff for logging
+ * @return was parse successful
+ */
+bool ParserJSONPathRange::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+
+ if (pos->type != TokenType::OpeningSquareBracket)
+ {
+ return false;
+ }
+ ++pos;
+
+ auto range = std::make_shared<ASTJSONPathRange>();
+ node = range;
+
+ ParserNumber number_p;
+ ASTPtr number_ptr;
+ while (pos->type != TokenType::ClosingSquareBracket)
+ {
+ if (pos->type != TokenType::Number)
+ {
+ return false;
+ }
+
+ std::pair<UInt32, UInt32> range_indices;
+ if (!number_p.parse(pos, number_ptr, expected))
+ {
+ return false;
+ }
+ range_indices.first = static_cast<UInt32>(number_ptr->as<ASTLiteral>()->value.get<UInt32>());
+
+ if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingSquareBracket)
+ {
+ /// Single index case
+ range_indices.second = range_indices.first + 1;
+ }
+ else if (pos->type == TokenType::BareWord)
+ {
+ if (!ParserKeyword("TO").ignore(pos, expected))
+ {
+ return false;
+ }
+ if (!number_p.parse(pos, number_ptr, expected))
+ {
+ return false;
+ }
+ range_indices.second = static_cast<UInt32>(number_ptr->as<ASTLiteral>()->value.get<UInt32>());
+ }
+ else
+ {
+ return false;
+ }
+
+ if (range_indices.first >= range_indices.second)
+ {
+ throw Exception(
+ ErrorCodes::BAD_ARGUMENTS,
+ "Start of range must be greater than end of range, however {} >= {}",
+ range_indices.first,
+ range_indices.second);
+ }
+
+ range->ranges.push_back(std::move(range_indices));
+ if (pos->type != TokenType::ClosingSquareBracket)
+ {
+ ++pos;
+ }
+ }
+ ++pos;
+
+ /// We can't have both ranges and star present, so parse was successful <=> exactly 1 of these conditions is true
+ return !range->ranges.empty() ^ range->is_star;
+}
+
+}
diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.h
new file mode 100644
index 00000000000..94db29577ab
--- /dev/null
+++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRange.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <Parsers/IParserBase.h>
+
+
+namespace DB
+{
+class ParserJSONPathRange : public IParserBase
+{
+private:
+ const char * getName() const override { return "ParserJSONPathRange"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+
+public:
+ explicit ParserJSONPathRange() = default;
+};
+
+}
diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.cpp
new file mode 100644
index 00000000000..86cf793fb52
--- /dev/null
+++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.cpp
@@ -0,0 +1,27 @@
+#include <Functions/JSONPath/ASTs/ASTJSONPathRoot.h>
+#include <Functions/JSONPath/Parsers/ParserJSONPathRoot.h>
+
+#include <Parsers/Lexer.h>
+
+namespace DB
+{
+/**
+ *
+ * @param pos token iterator
+ * @param node node of ASTJSONPathRoot
+ * @param expected stuff for logging
+ * @return was parse successful
+ */
+bool ParserJSONPathRoot::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ if (pos->type != TokenType::DollarSign)
+ {
+ expected.add(pos, "dollar sign (start of jsonpath)");
+ return false;
+ }
+ node = std::make_shared<ASTJSONPathRoot>();
+ ++pos;
+ return true;
+}
+
+}
diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.h
new file mode 100644
index 00000000000..59fed28d63e
--- /dev/null
+++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathRoot.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <Parsers/IParserBase.h>
+
+
+namespace DB
+{
+class ParserJSONPathRoot : public IParserBase
+{
+private:
+ const char * getName() const override { return "ParserJSONPathRoot"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+
+public:
+ explicit ParserJSONPathRoot() = default;
+};
+
+}
diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.cpp b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.cpp
new file mode 100644
index 00000000000..1338a2064f1
--- /dev/null
+++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.cpp
@@ -0,0 +1,31 @@
+#include <Functions/JSONPath/Parsers/ParserJSONPathStar.h>
+
+#include <Functions/JSONPath/ASTs/ASTJSONPathStar.h>
+
+namespace DB
+{
+bool ParserJSONPathStar::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+ if (pos->type != TokenType::OpeningSquareBracket)
+ {
+ return false;
+ }
+ ++pos;
+ if (pos->type != TokenType::Asterisk)
+ {
+ return false;
+ }
+ ++pos;
+ if (pos->type != TokenType::ClosingSquareBracket)
+ {
+ expected.add(pos, "Closing square bracket");
+ return false;
+ }
+ ++pos;
+
+ node = std::make_shared<ASTJSONPathStar>();
+
+ return true;
+}
+
+}
diff --git a/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.h b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.h
new file mode 100644
index 00000000000..543823357de
--- /dev/null
+++ b/contrib/clickhouse/src/Functions/JSONPath/Parsers/ParserJSONPathStar.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <Parsers/IParserBase.h>
+
+
+namespace DB
+{
+class ParserJSONPathStar : public IParserBase
+{
+private:
+ const char * getName() const override { return "ParserJSONPathStar"; }
+ bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+
+public:
+ explicit ParserJSONPathStar() = default;
+};
+
+}