diff options
author | Egor Zudin <e-zudin@ydb.tech> | 2024-04-18 17:24:32 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-18 17:24:32 +0300 |
commit | 75936ddf8c6c3a04a3833fbb170c606f15c15caf (patch) | |
tree | efd3de4c88f9f04755af85f2319c54f245ba351d | |
parent | 8a724ac89bd3299f446498361a244a6ff961030a (diff) | |
download | ydb-75936ddf8c6c3a04a3833fbb170c606f15c15caf.tar.gz |
YQ-3013: add retry for CURLE_GOT_NOTHING for FQ (#3565)
5 files changed, 65 insertions, 27 deletions
diff --git a/ydb/core/fq/libs/init/init.cpp b/ydb/core/fq/libs/init/init.cpp index 87fe099ae6..908a0f233b 100644 --- a/ydb/core/fq/libs/init/init.cpp +++ b/ydb/core/fq/libs/init/init.cpp @@ -195,7 +195,7 @@ void Init( if (protoConfig.GetPrivateApi().GetEnabled()) { const auto& s3readConfig = protoConfig.GetReadActorsFactoryConfig().GetS3ReadActorFactoryConfig(); - auto s3HttpRetryPolicy = NYql::GetHTTPDefaultRetryPolicy(TDuration::Max()); + auto s3HttpRetryPolicy = NYql::GetHTTPDefaultRetryPolicy(NYql::THttpRetryPolicyOptions{.MaxTime = TDuration::Max(), .RetriedCurlCodes = NYql::FqRetriedCurlCodes()}); NYql::NDq::TS3ReadActorFactoryConfig readActorFactoryCfg; if (const ui64 rowsInBatch = s3readConfig.GetRowsInBatch()) { readActorFactoryCfg.RowsInBatch = rowsInBatch; diff --git a/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp b/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp index d8b9e14d4e..9ec068e045 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp +++ b/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp @@ -77,8 +77,9 @@ NYql::NDq::IDqAsyncIoFactory::TPtr CreateKqpAsyncIoFactory( RegisterSequencerActorFactory(*factory, counters); if (federatedQuerySetup) { - RegisterS3ReadActorFactory(*factory, federatedQuerySetup->CredentialsFactory, federatedQuerySetup->HttpGateway); - RegisterS3WriteActorFactory(*factory, federatedQuerySetup->CredentialsFactory, federatedQuerySetup->HttpGateway); + auto s3HttpRetryPolicy = NYql::GetHTTPDefaultRetryPolicy(NYql::THttpRetryPolicyOptions{.RetriedCurlCodes = NYql::FqRetriedCurlCodes()}); + RegisterS3ReadActorFactory(*factory, federatedQuerySetup->CredentialsFactory, federatedQuerySetup->HttpGateway, s3HttpRetryPolicy); + RegisterS3WriteActorFactory(*factory, federatedQuerySetup->CredentialsFactory, federatedQuerySetup->HttpGateway, s3HttpRetryPolicy); if (federatedQuerySetup->ConnectorClient) { RegisterGenericProviderFactories(*factory, federatedQuerySetup->CredentialsFactory, federatedQuerySetup->ConnectorClient); diff --git a/ydb/library/yql/providers/common/http_gateway/yql_http_default_retry_policy.cpp b/ydb/library/yql/providers/common/http_gateway/yql_http_default_retry_policy.cpp index eb94f33826..994ed88d5a 100644 --- a/ydb/library/yql/providers/common/http_gateway/yql_http_default_retry_policy.cpp +++ b/ydb/library/yql/providers/common/http_gateway/yql_http_default_retry_policy.cpp @@ -2,31 +2,50 @@ namespace NYql { -IHTTPGateway::TRetryPolicy::TPtr GetHTTPDefaultRetryPolicy(TDuration maxTime, size_t maxRetries) { +std::unordered_set<CURLcode> YqlRetriedCurlCodes() { + return { + CURLE_COULDNT_CONNECT, + CURLE_WEIRD_SERVER_REPLY, + CURLE_WRITE_ERROR, + CURLE_READ_ERROR, + CURLE_OPERATION_TIMEDOUT, + CURLE_SSL_CONNECT_ERROR, + CURLE_BAD_DOWNLOAD_RESUME, + CURLE_SEND_ERROR, + CURLE_RECV_ERROR, + CURLE_NO_CONNECTION_AVAILABLE + }; +} + +std::unordered_set<CURLcode> FqRetriedCurlCodes() { + return { + CURLE_COULDNT_CONNECT, + CURLE_WEIRD_SERVER_REPLY, + CURLE_WRITE_ERROR, + CURLE_READ_ERROR, + CURLE_OPERATION_TIMEDOUT, + CURLE_SSL_CONNECT_ERROR, + CURLE_BAD_DOWNLOAD_RESUME, + CURLE_SEND_ERROR, + CURLE_RECV_ERROR, + CURLE_NO_CONNECTION_AVAILABLE, + CURLE_GOT_NOTHING + }; +} + +IHTTPGateway::TRetryPolicy::TPtr GetHTTPDefaultRetryPolicy(THttpRetryPolicyOptions&& options) { + auto maxTime = options.MaxTime; + auto maxRetries = options.MaxRetries; if (!maxTime) { maxTime = TDuration::Minutes(5); } - return IHTTPGateway::TRetryPolicy::GetExponentialBackoffPolicy([](CURLcode curlCode, long httpCode) { - - switch (curlCode) { - case CURLE_OK: - // look to http code - break; - case CURLE_COULDNT_CONNECT: - case CURLE_WEIRD_SERVER_REPLY: - case CURLE_WRITE_ERROR: - case CURLE_READ_ERROR: - case CURLE_OPERATION_TIMEDOUT: - case CURLE_SSL_CONNECT_ERROR: - case CURLE_BAD_DOWNLOAD_RESUME: - case CURLE_SEND_ERROR: - case CURLE_RECV_ERROR: - case CURLE_NO_CONNECTION_AVAILABLE: - // retry small number of known errors - return ERetryErrorClass::ShortRetry; - default: - // do not retry others - return ERetryErrorClass::NoRetry; + return IHTTPGateway::TRetryPolicy::GetExponentialBackoffPolicy([options = std::move(options)](CURLcode curlCode, long httpCode) { + if (curlCode == CURLE_OK) { + // pass + } else if (options.RetriedCurlCodes.contains(curlCode)) { + return ERetryErrorClass::ShortRetry; + } else { + return ERetryErrorClass::NoRetry; } switch (httpCode) { @@ -52,4 +71,8 @@ IHTTPGateway::TRetryPolicy::TPtr GetHTTPDefaultRetryPolicy(TDuration maxTime, si maxTime); // maxTime } +IHTTPGateway::TRetryPolicy::TPtr GetHTTPDefaultRetryPolicy(TDuration maxTime, size_t maxRetries) { + return GetHTTPDefaultRetryPolicy(THttpRetryPolicyOptions{.MaxTime = maxTime, .MaxRetries = maxRetries}); +} + } diff --git a/ydb/library/yql/providers/common/http_gateway/yql_http_default_retry_policy.h b/ydb/library/yql/providers/common/http_gateway/yql_http_default_retry_policy.h index 74169c215a..6c7d323932 100644 --- a/ydb/library/yql/providers/common/http_gateway/yql_http_default_retry_policy.h +++ b/ydb/library/yql/providers/common/http_gateway/yql_http_default_retry_policy.h @@ -2,8 +2,22 @@ #include "yql_http_gateway.h" +#include <curl/curl.h> +#include <unordered_set> + namespace NYql { -IHTTPGateway::TRetryPolicy::TPtr GetHTTPDefaultRetryPolicy(TDuration maxTime = TDuration::Zero(), size_t maxRetries = std::numeric_limits<size_t>::max()); // Zero means default maxTime +std::unordered_set<CURLcode> YqlRetriedCurlCodes(); +std::unordered_set<CURLcode> FqRetriedCurlCodes(); + +struct THttpRetryPolicyOptions { + TDuration MaxTime = TDuration::Zero(); // Zero means default maxTime + size_t MaxRetries = std::numeric_limits<size_t>::max(); + std::unordered_set<CURLcode> RetriedCurlCodes = YqlRetriedCurlCodes(); +}; + +IHTTPGateway::TRetryPolicy::TPtr GetHTTPDefaultRetryPolicy(THttpRetryPolicyOptions&& options = {}); + +IHTTPGateway::TRetryPolicy::TPtr GetHTTPDefaultRetryPolicy(TDuration maxTime, size_t maxRetries = std::numeric_limits<size_t>::max()); // Zero means default maxTime } diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp index 46f997504e..5b8f95449f 100644 --- a/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp +++ b/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp @@ -231,7 +231,7 @@ public: , CredentialsFactory(credentialsFactory) , ExternalEffect(externalEffect) , ActorSystem(NActors::TActivationContext::ActorSystem()) - , RetryPolicy(NYql::GetHTTPDefaultRetryPolicy(TDuration::Zero(), 3)) + , RetryPolicy(NYql::GetHTTPDefaultRetryPolicy(NYql::THttpRetryPolicyOptions{.MaxRetries = 3, .RetriedCurlCodes = NYql::FqRetriedCurlCodes()})) , RetryCount(GLOBAL_RETRY_LIMIT) { // ^^^ 3 retries in HTTP GW per operation // up to 100 retries at app level for all operations ^^^ |