diff options
author | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-14 09:58:56 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-14 10:20:20 +0300 |
commit | c2b2dfd9827a400a8495e172a56343462e3ceb82 (patch) | |
tree | cd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/clickhouse/src/Interpreters/TreeRewriter.h | |
parent | d4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff) | |
download | ydb-c2b2dfd9827a400a8495e172a56343462e3ceb82.tar.gz |
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'contrib/clickhouse/src/Interpreters/TreeRewriter.h')
-rw-r--r-- | contrib/clickhouse/src/Interpreters/TreeRewriter.h | 137 |
1 files changed, 137 insertions, 0 deletions
diff --git a/contrib/clickhouse/src/Interpreters/TreeRewriter.h b/contrib/clickhouse/src/Interpreters/TreeRewriter.h new file mode 100644 index 0000000000..89cc13da29 --- /dev/null +++ b/contrib/clickhouse/src/Interpreters/TreeRewriter.h @@ -0,0 +1,137 @@ +#pragma once + +#include <Core/Block.h> +#include <Core/NamesAndTypes.h> +#include <Interpreters/Aliases.h> +#include <Interpreters/Context_fwd.h> +#include <Interpreters/DatabaseAndTableWithAlias.h> +#include <Interpreters/SelectQueryOptions.h> +#include <Storages/IStorage_fwd.h> + +namespace DB +{ + +class ASTFunction; +struct ASTTablesInSelectQueryElement; +class TableJoin; +struct Settings; +struct SelectQueryOptions; +using Scalars = std::map<String, Block>; +struct StorageInMemoryMetadata; +using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>; +struct StorageSnapshot; +using StorageSnapshotPtr = std::shared_ptr<StorageSnapshot>; + +struct TreeRewriterResult +{ + ConstStoragePtr storage; + StorageSnapshotPtr storage_snapshot; + std::shared_ptr<TableJoin> analyzed_join; + const ASTTablesInSelectQueryElement * ast_join = nullptr; + + NamesAndTypesList source_columns; + NameSet source_columns_set; /// Set of names of source_columns. + /// Set of columns that are enough to read from the table to evaluate the expression. It does not include joined columns. + NamesAndTypesList required_source_columns; + /// Same as above but also record alias columns which are expanded. This is for RBAC access check. + Names required_source_columns_before_expanding_alias_columns; + + /// Set of alias columns that are expanded to their alias expressions. We still need the original columns to check access permission. + NameSet expanded_aliases; + + Aliases aliases; + + ASTs aggregates; + ASTs window_function_asts; + ASTs expressions_with_window_function; + + /// Which column is needed to be ARRAY-JOIN'ed to get the specified. + /// For example, for `SELECT s.v ... ARRAY JOIN a AS s` will get "s.v" -> "a.v". + NameToNameMap array_join_result_to_source; + + /// For the ARRAY JOIN section, mapping from the alias to the full column name. + /// For example, for `ARRAY JOIN [1,2] AS b` "b" -> "array(1,2)" will enter here. + /// Note: not used further. + NameToNameMap array_join_alias_to_name; + + /// The backward mapping for array_join_alias_to_name. + /// Note: not used further. + NameToNameMap array_join_name_to_alias; + + /// Predicate optimizer overrides the sub queries + bool rewrite_subqueries = false; + + /// Whether the query contains explicit columns like "SELECT column1 + column2 FROM table1". + /// Queries like "SELECT count() FROM table1", "SELECT 1" don't contain explicit columns. + bool has_explicit_columns = false; + + /// Whether it's possible to use the trivial count optimization, + /// i.e. use a fast call of IStorage::totalRows() (or IStorage::totalRowsByPartitionPredicate()) + /// instead of actual retrieving columns and counting rows. + bool optimize_trivial_count = false; + + /// Cache isRemote() call for storage, because it may be too heavy. + bool is_remote_storage = false; + + /// Rewrite _shard_num to shardNum() + bool has_virtual_shard_num = false; + + /// Results of scalar sub queries + Scalars scalars; + Scalars local_scalars; + + explicit TreeRewriterResult( + const NamesAndTypesList & source_columns_, + ConstStoragePtr storage_ = {}, + const StorageSnapshotPtr & storage_snapshot_ = {}, + bool add_special = true); + + void collectSourceColumns(bool add_special); + void collectUsedColumns(const ASTPtr & query, bool is_select, bool visit_index_hint); + Names requiredSourceColumns() const { return required_source_columns.getNames(); } + const Names & requiredSourceColumnsForAccessCheck() const { return required_source_columns_before_expanding_alias_columns; } + NameSet getArrayJoinSourceNameSet() const; + const Scalars & getScalars() const { return scalars; } +}; + +using TreeRewriterResultPtr = std::shared_ptr<const TreeRewriterResult>; + +/// Tree Rewriter in terms of CMU slides @sa https://15721.courses.cs.cmu.edu/spring2020/slides/19-optimizer1.pdf +/// +/// Optimizes AST tree and collect information for further expression analysis in ExpressionAnalyzer. +/// Result AST has the following invariants: +/// * all aliases are substituted +/// * qualified names are translated +/// * scalar subqueries are executed replaced with constants +/// * unneeded columns are removed from SELECT clause +/// * duplicated columns are removed from ORDER BY, LIMIT BY, USING(...). +class TreeRewriter : WithContext +{ +public: + explicit TreeRewriter(ContextPtr context_) : WithContext(context_) {} + + /// Analyze and rewrite not select query + TreeRewriterResultPtr analyze( + ASTPtr & query, + const NamesAndTypesList & source_columns_, + ConstStoragePtr storage = {}, + const StorageSnapshotPtr & storage_snapshot = {}, + bool allow_aggregations = false, + bool allow_self_aliases = true, + bool execute_scalar_subqueries = true, + bool is_create_parameterized_view = false) const; + + /// Analyze and rewrite select query + TreeRewriterResultPtr analyzeSelect( + ASTPtr & query, + TreeRewriterResult && result, + const SelectQueryOptions & select_options = {}, + const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns = {}, + const Names & required_result_columns = {}, + std::shared_ptr<TableJoin> table_join = {}) const; + +private: + static void normalize(ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_, bool is_create_parameterized_view = false); +}; + +} |