diff options
author | vitalyisaev <vitalyisaev@ydb.tech> | 2023-09-20 09:43:12 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@ydb.tech> | 2023-09-20 10:04:04 +0300 |
commit | a9a65c1affe5d4d872e4fdf7852d4bc79d466d21 (patch) | |
tree | 280e86a680cf2bdacc033e1d6f142e15ebfe8083 | |
parent | 0ba56e8b5a443974649ae9f2ee9af833b168cd1b (diff) | |
download | ydb-a9a65c1affe5d4d872e4fdf7852d4bc79d466d21.tar.gz |
YQ Connector: validate NYql::TGenericClusterConfig
9 files changed, 157 insertions, 31 deletions
diff --git a/ydb/core/fq/libs/actors/clusters_from_connections.cpp b/ydb/core/fq/libs/actors/clusters_from_connections.cpp index be5df80c68..20258f30a0 100644 --- a/ydb/core/fq/libs/actors/clusters_from_connections.cpp +++ b/ydb/core/fq/libs/actors/clusters_from_connections.cpp @@ -2,6 +2,7 @@ #include <ydb/library/yql/providers/common/provider/yql_provider_names.h> #include <ydb/library/yql/providers/generic/connector/api/common/data_source.pb.h> +#include <ydb/library/yql/providers/generic/provider/yql_generic_cluster_config.h> #include <ydb/library/yql/utils/url_builder.h> #include <util/generic/hash.h> @@ -89,38 +90,40 @@ void FillSolomonClusterConfig(NYql::TSolomonClusterConfig& clusterConfig, template <typename TConnection> void FillGenericClusterConfig( - NYql::TGenericClusterConfig& clusterCfg, + TGenericClusterConfig& clusterCfg, const TConnection& connection, const TString& connectionName, NConnector::NApi::EDataSourceKind dataSourceKind, const TString& authToken, const THashMap<TString, TString>& accountIdSignatures ){ - clusterCfg.SetKind(dataSourceKind); - clusterCfg.SetName(connectionName); - clusterCfg.SetDatabaseId(connection.database_id()); - clusterCfg.mutable_credentials()->mutable_basic()->set_username(connection.login()); - clusterCfg.mutable_credentials()->mutable_basic()->set_password(connection.password()); - FillClusterAuth(clusterCfg, connection.auth(), authToken, accountIdSignatures); - - // Since resolver always returns secure ports, we'll always ask for secure connections - // between remote Connector and the data source: - // https://a.yandex-team.ru/arcadia/ydb/core/fq/libs/db_id_async_resolver_impl/mdb_host_transformer.cpp#L24 - clusterCfg.SetUseSsl(true); - - // In YQv1 we just hardcode desired protocols here. - // In YQv2 protocol can be configured via `CREATE EXTERNAL DATA SOURCE` params. - switch (dataSourceKind) { - case NYql::NConnector::NApi::CLICKHOUSE: - clusterCfg.SetProtocol(NYql::NConnector::NApi::EProtocol::HTTP); - break; - case NYql::NConnector::NApi::POSTGRESQL: - clusterCfg.SetProtocol(NYql::NConnector::NApi::EProtocol::NATIVE); - break; - default: - ythrow yexception() << "Unexpected data source kind: '" - << NYql::NConnector::NApi::EDataSourceKind_Name(dataSourceKind) << "'"; - } + clusterCfg.SetKind(dataSourceKind); + clusterCfg.SetName(connectionName); + clusterCfg.SetDatabaseId(connection.database_id()); + clusterCfg.mutable_credentials()->mutable_basic()->set_username(connection.login()); + clusterCfg.mutable_credentials()->mutable_basic()->set_password(connection.password()); + FillClusterAuth(clusterCfg, connection.auth(), authToken, accountIdSignatures); + + // Since resolver always returns secure ports, we'll always ask for secure connections + // between remote Connector and the data source: + // https://a.yandex-team.ru/arcadia/ydb/core/fq/libs/db_id_async_resolver_impl/mdb_host_transformer.cpp#L24 + clusterCfg.SetUseSsl(true); + + // In YQv1 we just hardcode desired protocols here. + // In YQv2 protocol can be configured via `CREATE EXTERNAL DATA SOURCE` params. + switch (dataSourceKind) { + case NYql::NConnector::NApi::CLICKHOUSE: + clusterCfg.SetProtocol(NYql::NConnector::NApi::EProtocol::HTTP); + break; + case NYql::NConnector::NApi::POSTGRESQL: + clusterCfg.SetProtocol(NYql::NConnector::NApi::EProtocol::NATIVE); + break; + default: + ythrow yexception() << "Unexpected data source kind: '" + << NYql::NConnector::NApi::EDataSourceKind_Name(dataSourceKind) << "'"; + } + + ValidateGenericClusterConfig(clusterCfg, "NFq::FillGenericClusterFromConfig"); } } //namespace diff --git a/ydb/library/yql/providers/generic/provider/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/providers/generic/provider/CMakeLists.darwin-x86_64.txt index ec376a93c8..971f4dac73 100644 --- a/ydb/library/yql/providers/generic/provider/CMakeLists.darwin-x86_64.txt +++ b/ydb/library/yql/providers/generic/provider/CMakeLists.darwin-x86_64.txt @@ -14,6 +14,7 @@ target_compile_options(providers-generic-provider PRIVATE target_link_libraries(providers-generic-provider PUBLIC contrib-libs-cxxsupp yutil + contrib-libs-fmt library-cpp-json library-cpp-random_provider library-cpp-time_provider diff --git a/ydb/library/yql/providers/generic/provider/CMakeLists.linux-aarch64.txt b/ydb/library/yql/providers/generic/provider/CMakeLists.linux-aarch64.txt index 581d5aae68..7e3022b156 100644 --- a/ydb/library/yql/providers/generic/provider/CMakeLists.linux-aarch64.txt +++ b/ydb/library/yql/providers/generic/provider/CMakeLists.linux-aarch64.txt @@ -15,6 +15,7 @@ target_link_libraries(providers-generic-provider PUBLIC contrib-libs-linux-headers contrib-libs-cxxsupp yutil + contrib-libs-fmt library-cpp-json library-cpp-random_provider library-cpp-time_provider diff --git a/ydb/library/yql/providers/generic/provider/CMakeLists.linux-x86_64.txt b/ydb/library/yql/providers/generic/provider/CMakeLists.linux-x86_64.txt index 581d5aae68..7e3022b156 100644 --- a/ydb/library/yql/providers/generic/provider/CMakeLists.linux-x86_64.txt +++ b/ydb/library/yql/providers/generic/provider/CMakeLists.linux-x86_64.txt @@ -15,6 +15,7 @@ target_link_libraries(providers-generic-provider PUBLIC contrib-libs-linux-headers contrib-libs-cxxsupp yutil + contrib-libs-fmt library-cpp-json library-cpp-random_provider library-cpp-time_provider diff --git a/ydb/library/yql/providers/generic/provider/CMakeLists.windows-x86_64.txt b/ydb/library/yql/providers/generic/provider/CMakeLists.windows-x86_64.txt index ec376a93c8..971f4dac73 100644 --- a/ydb/library/yql/providers/generic/provider/CMakeLists.windows-x86_64.txt +++ b/ydb/library/yql/providers/generic/provider/CMakeLists.windows-x86_64.txt @@ -14,6 +14,7 @@ target_compile_options(providers-generic-provider PRIVATE target_link_libraries(providers-generic-provider PUBLIC contrib-libs-cxxsupp yutil + contrib-libs-fmt library-cpp-json library-cpp-random_provider library-cpp-time_provider diff --git a/ydb/library/yql/providers/generic/provider/ya.make b/ydb/library/yql/providers/generic/provider/ya.make index c745e58e77..06a700d75d 100644 --- a/ydb/library/yql/providers/generic/provider/ya.make +++ b/ydb/library/yql/providers/generic/provider/ya.make @@ -25,6 +25,7 @@ SRCS( YQL_LAST_ABI_VERSION() PEERDIR( + contrib/libs/fmt library/cpp/json library/cpp/random_provider library/cpp/time_provider diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_cluster_config.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_cluster_config.cpp index 0f55422baf..ae09598204 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_cluster_config.cpp +++ b/ydb/library/yql/providers/generic/provider/yql_generic_cluster_config.cpp @@ -1,16 +1,19 @@ -#include "yql_generic_cluster_config.h" - -#include <ydb/library/yql/providers/generic/connector/api/common/data_source.pb.h> -#include <ydb/library/yql/providers/generic/connector/libcpp/external_data_source.h> +#include <fmt/format.h> #include <util/generic/serialized_enum.h> #include <util/string/builder.h> #include <util/string/cast.h> #include <util/generic/yexception.h> +#include <ydb/library/yql/providers/generic/connector/api/common/data_source.pb.h> +#include <ydb/library/yql/providers/generic/connector/libcpp/external_data_source.h> + +#include "yql_generic_cluster_config.h" + namespace NYql { using namespace NConnector; using namespace NConnector::NApi; + using namespace fmt::literals; void ParseLogin( const THashMap<TString, TString>& properties, @@ -234,7 +237,7 @@ namespace NYql { return !iter->second.Empty(); } - NYql::TGenericClusterConfig GenericClusterConfigFromProperties(const TString& clusterName, const THashMap<TString, TString>& properties) { + TGenericClusterConfig GenericClusterConfigFromProperties(const TString& clusterName, const THashMap<TString, TString>& properties) { // some cross-parameter validations auto location = KeyIsSet(properties, "location"); auto mdbClusterId = KeyIsSet(properties, "mdb_cluster_id"); @@ -264,4 +267,111 @@ namespace NYql { return clusterConfig; } + + void ValidationError(const NYql::TGenericClusterConfig& clusterConfig, + const TString& context, + const TString& msg) { + ythrow yexception() << fmt::format( + R"( + {context}: invalid cluster config: {msg}. + + Full config dump: + Name={name}, + Kind={kind}, + Location.Endpoint.host={host}, + Location.Endpoint.port={port}, + Location.DatabaseId={database_id}, + Credentials.basic.username={username}, + Credentials.basic.password=[{password} char(s)], + ServiceAccountId={service_account_id}, + ServiceAccountIdSignature=[{service_account_id_signature} char(s)], + Token=[{token} char(s)] + UseSsl={use_ssl}, + DatabaseName={database_name}, + Protocol={protocol} + )", + "context"_a = context, + "msg"_a = msg, + "name"_a = clusterConfig.GetName(), + "kind"_a = NConnector::NApi::EDataSourceKind_Name(clusterConfig.GetKind()), + "host"_a = clusterConfig.GetEndpoint().Gethost(), + "port"_a = clusterConfig.GetEndpoint().Getport(), + "database_id"_a = clusterConfig.GetDatabaseId(), + "username"_a = clusterConfig.GetCredentials().Getbasic().Getusername(), + "password"_a = ToString(clusterConfig.GetCredentials().Getbasic().Getpassword().size()), + "service_account_id"_a = clusterConfig.GetServiceAccountId(), + "service_account_id_signature"_a = ToString(clusterConfig.GetServiceAccountIdSignature().size()), + "token"_a = ToString(clusterConfig.GetToken().size()), + "use_ssl"_a = clusterConfig.GetUseSsl() ? "TRUE" : "FALSE", + "database_name"_a = clusterConfig.GetDatabaseName(), + "protocol"_a = NConnector::NApi::EProtocol_Name(clusterConfig.GetProtocol())); + } + + void ValidateGenericClusterConfig( + const NYql::TGenericClusterConfig& clusterConfig, + const TString& context) { + // cross-parameter validations for optional fields + auto hasEndpoint = clusterConfig.HasEndpoint(); + auto databaseId = clusterConfig.GetDatabaseId(); + + if ((hasEndpoint && databaseId)) { + return ValidationError( + clusterConfig, + context, + "both 'Endpoint' and 'DatabaseId' fields are set; you must set only one of them"); + } + + if (!hasEndpoint and !databaseId) { + return ValidationError( + clusterConfig, + context, + "none of 'Endpoint' and 'DatabaseId' fields are set; you must set one of them"); + } + + auto serviceAccountId = clusterConfig.GetServiceAccountId(); + auto serviceAccountIdSignature = clusterConfig.GetServiceAccountIdSignature(); + if (serviceAccountId && !serviceAccountIdSignature) { + return ValidationError( + clusterConfig, + context, + "'ServiceAccountId' field is set, but 'ServiceAccountIdSignature' field is not set; " + "you must set either both 'ServiceAccountId' and 'ServiceAccountIdSignature' fields or none of them"); + } + + if (!serviceAccountId && serviceAccountIdSignature) { + return ValidationError( + clusterConfig, + context, + "'ServiceAccountIdSignature' field is set, but 'ServiceAccountId' field is not set; " + "you must set either both 'ServiceAccountId' and 'ServiceAccountIdSignature' fields or none of them"); + } + + auto token = clusterConfig.GetToken(); + if ((serviceAccountId && serviceAccountIdSignature) && token) { + return ValidationError( + clusterConfig, + context, + "you must set either ('ServiceAccountId', 'ServiceAccountIdSignature') fields or 'Token' field or none of them"); + } + + // check required fields + if (!clusterConfig.GetName()) { + return ValidationError(clusterConfig, context, "empty field 'Name'"); + } + + if (clusterConfig.GetKind() == EDataSourceKind::DATA_SOURCE_KIND_UNSPECIFIED) { + return ValidationError(clusterConfig, context, "empty field 'Kind'"); + } + + if (!clusterConfig.GetCredentials().Getbasic().Getusername()) { + return ValidationError(clusterConfig, context, "empty field 'Credentials.basic.username'"); + } + + // TODO: validate Credentials.basic.password after ClickHouse recipe fix + // TODO: validate DatabaseName field when it is supported on frontend + + if (clusterConfig.GetProtocol() == EProtocol::PROTOCOL_UNSPECIFIED) { + return ValidationError(clusterConfig, context, "empty field 'Protocol'"); + } + } } diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_cluster_config.h b/ydb/library/yql/providers/generic/provider/yql_generic_cluster_config.h index fecf5ced7b..33759698c3 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_cluster_config.h +++ b/ydb/library/yql/providers/generic/provider/yql_generic_cluster_config.h @@ -4,7 +4,12 @@ #include <util/generic/hash.h> namespace NYql { + // Converts properties from `CREATE EXTERNAL DATA SOURCE .. WITH ...` into + // unified representation of a cluster config NYql::TGenericClusterConfig GenericClusterConfigFromProperties( const TString& clusterName, const THashMap<TString, TString>& properties); + + // Validates cluster config. Throws exception in case of error. + void ValidateGenericClusterConfig(const NYql::TGenericClusterConfig& clusterConfig, const TString& context); } diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_settings.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_settings.cpp index f84c5bc399..6afb57a2ef 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_settings.cpp +++ b/ydb/library/yql/providers/generic/provider/yql_generic_settings.cpp @@ -1,3 +1,4 @@ +#include "yql_generic_cluster_config.h" #include "yql_generic_settings.h" #include <ydb/library/yql/providers/common/structured_token/yql_token_builder.h> @@ -23,6 +24,8 @@ namespace NYql { const std::shared_ptr<NYql::IDatabaseAsyncResolver> databaseResolver, NYql::IDatabaseAsyncResolver::TDatabaseAuthMap& databaseAuth, const TCredentials::TPtr& credentials) { + ValidateGenericClusterConfig(clusterConfig, "TGenericConfiguration::AddCluster"); + const auto& clusterName = clusterConfig.GetName(); const auto& databaseId = clusterConfig.GetDatabaseId(); const auto& endpoint = clusterConfig.GetEndpoint(); |