diff options
author | max42 <max42@yandex-team.com> | 2023-06-30 03:37:03 +0300 |
---|---|---|
committer | max42 <max42@yandex-team.com> | 2023-06-30 03:37:03 +0300 |
commit | fac2bd72b4b31ec3238292caf8fb2a8aaa6d6c4a (patch) | |
tree | b8cbc1deb00309c7f1a7ab6df520a76cf0b5c6d7 /yt/cpp/mapreduce/client/client_reader.cpp | |
parent | 7bf166b1a7ed0af927f230022b245af618e998c1 (diff) | |
download | ydb-fac2bd72b4b31ec3238292caf8fb2a8aaa6d6c4a.tar.gz |
YT-19324: move YT provider to ydb/library/yql
This commit is formed by the following script: https://paste.yandex-team.ru/6f92e4b8-efc5-4d34-948b-15ee2accd7e7/text.
This commit has zero effect on all projects that depend on YQL.
The summary of changes:
- `yql/providers/yt -> ydb/library/yql/providers/yt `- the whole implementation of YT provider is moved into YDB code base for further export as a part of YT YQL plugin shared library;
- `yql/providers/stat/{expr_nodes,uploader} -> ydb/library/yql/providers/stat/{expr_nodes,uploader}` - a small interface without implementation and the description of stat expr nodes;
- `yql/core/extract_predicate/ut -> ydb/library/yql/core/extract_predicate/ut`;
- `yql/core/{ut,ut_common} -> ydb/library/yql/core/{ut,ut_common}`;
- `yql/core` is gone;
- `yql/library/url_preprocessing -> ydb/library/yql/core/url_preprocessing`.
**NB**: all new targets inside `ydb/` are under `IF (NOT CMAKE_EXPORT)` clause which disables them from open-source cmake generation and ya make build. They will be enabled in the subsequent commits.
Diffstat (limited to 'yt/cpp/mapreduce/client/client_reader.cpp')
-rw-r--r-- | yt/cpp/mapreduce/client/client_reader.cpp | 232 |
1 files changed, 232 insertions, 0 deletions
diff --git a/yt/cpp/mapreduce/client/client_reader.cpp b/yt/cpp/mapreduce/client/client_reader.cpp new file mode 100644 index 0000000000..80759b12dc --- /dev/null +++ b/yt/cpp/mapreduce/client/client_reader.cpp @@ -0,0 +1,232 @@ +#include "client_reader.h" + +#include "structured_table_formats.h" +#include "transaction.h" +#include "transaction_pinger.h" + +#include <yt/cpp/mapreduce/common/helpers.h> +#include <yt/cpp/mapreduce/common/retry_lib.h> +#include <yt/cpp/mapreduce/common/wait_proxy.h> + +#include <yt/cpp/mapreduce/interface/config.h> +#include <yt/cpp/mapreduce/interface/tvm.h> + +#include <yt/cpp/mapreduce/interface/logging/yt_log.h> + +#include <yt/cpp/mapreduce/io/helpers.h> +#include <yt/cpp/mapreduce/io/yamr_table_reader.h> + +#include <yt/cpp/mapreduce/http/helpers.h> +#include <yt/cpp/mapreduce/http/requests.h> +#include <yt/cpp/mapreduce/http/retry_request.h> + +#include <yt/cpp/mapreduce/raw_client/raw_requests.h> + +#include <library/cpp/yson/node/serialize.h> + +#include <util/random/random.h> +#include <util/stream/file.h> +#include <util/stream/str.h> +#include <util/string/builder.h> +#include <util/string/cast.h> + +namespace NYT { + +using ::ToString; + +//////////////////////////////////////////////////////////////////////////////// + +TClientReader::TClientReader( + const TRichYPath& path, + IClientRetryPolicyPtr clientRetryPolicy, + ITransactionPingerPtr transactionPinger, + const TClientContext& context, + const TTransactionId& transactionId, + const TFormat& format, + const TTableReaderOptions& options, + bool useFormatFromTableAttributes) + : Path_(path) + , ClientRetryPolicy_(std::move(clientRetryPolicy)) + , Context_(context) + , ParentTransactionId_(transactionId) + , Format_(format) + , Options_(options) + , ReadTransaction_(nullptr) +{ + if (options.CreateTransaction_) { + Y_VERIFY(transactionPinger, "Internal error: transactionPinger is null"); + ReadTransaction_ = MakeHolder<TPingableTransaction>( + ClientRetryPolicy_, + Context_, + transactionId, + transactionPinger->GetChildTxPinger(), + TStartTransactionOptions()); + Path_.Path(Snapshot( + ClientRetryPolicy_, + Context_, + ReadTransaction_->GetId(), + path.Path_)); + } + + if (useFormatFromTableAttributes) { + auto transactionId2 = ReadTransaction_ ? ReadTransaction_->GetId() : ParentTransactionId_; + auto newFormat = GetTableFormat(ClientRetryPolicy_, Context_, transactionId2, Path_); + if (newFormat) { + Format_->Config = *newFormat; + } + } + + TransformYPath(); + CreateRequest(); +} + +bool TClientReader::Retry( + const TMaybe<ui32>& rangeIndex, + const TMaybe<ui64>& rowIndex) +{ + if (CurrentRequestRetryPolicy_) { + // TODO we should pass actual exception in Retry function + yexception genericError; + auto backoff = CurrentRequestRetryPolicy_->OnGenericError(genericError); + if (!backoff) { + return false; + } + } + + try { + CreateRequest(rangeIndex, rowIndex); + return true; + } catch (const std::exception& ex) { + YT_LOG_ERROR("Client reader retry failed: %v", + ex.what()); + + return false; + } +} + +void TClientReader::ResetRetries() +{ + CurrentRequestRetryPolicy_ = nullptr; +} + +size_t TClientReader::DoRead(void* buf, size_t len) +{ + return Input_->Read(buf, len); +} + +void TClientReader::TransformYPath() +{ + for (auto& range : Path_.MutableRangesView()) { + auto& exact = range.Exact_; + if (IsTrivial(exact)) { + continue; + } + + if (exact.RowIndex_) { + range.LowerLimit(TReadLimit().RowIndex(*exact.RowIndex_)); + range.UpperLimit(TReadLimit().RowIndex(*exact.RowIndex_ + 1)); + exact.RowIndex_.Clear(); + + } else if (exact.Key_) { + range.LowerLimit(TReadLimit().Key(*exact.Key_)); + + auto lastPart = TNode::CreateEntity(); + lastPart.Attributes() = TNode()("type", "max"); + exact.Key_->Parts_.push_back(lastPart); + + range.UpperLimit(TReadLimit().Key(*exact.Key_)); + exact.Key_.Clear(); + } + } +} + +void TClientReader::CreateRequest(const TMaybe<ui32>& rangeIndex, const TMaybe<ui64>& rowIndex) +{ + if (!CurrentRequestRetryPolicy_) { + CurrentRequestRetryPolicy_ = ClientRetryPolicy_->CreatePolicyForGenericRequest(); + } + while (true) { + CurrentRequestRetryPolicy_->NotifyNewAttempt(); + + THttpHeader header("GET", GetReadTableCommand(Context_.Config->ApiVersion)); + if (Context_.ServiceTicketAuth) { + header.SetServiceTicket(Context_.ServiceTicketAuth->Ptr->IssueServiceTicket()); + } else { + header.SetToken(Context_.Token); + } + auto transactionId = (ReadTransaction_ ? ReadTransaction_->GetId() : ParentTransactionId_); + header.AddTransactionId(transactionId); + + const auto& controlAttributes = Options_.ControlAttributes_; + header.AddParameter("control_attributes", TNode() + ("enable_row_index", controlAttributes.EnableRowIndex_) + ("enable_range_index", controlAttributes.EnableRangeIndex_)); + header.SetOutputFormat(Format_); + + header.SetResponseCompression(ToString(Context_.Config->AcceptEncoding)); + + if (rowIndex.Defined()) { + auto& ranges = Path_.MutableRanges(); + if (ranges.Empty()) { + ranges.ConstructInPlace(TVector{TReadRange()}); + } else { + if (rangeIndex.GetOrElse(0) >= ranges->size()) { + ythrow yexception() + << "range index " << rangeIndex.GetOrElse(0) + << " is out of range, input range count is " << ranges->size(); + } + ranges->erase(ranges->begin(), ranges->begin() + rangeIndex.GetOrElse(0)); + } + ranges->begin()->LowerLimit(TReadLimit().RowIndex(*rowIndex)); + } + + header.MergeParameters(FormIORequestParameters(Path_, Options_)); + + auto requestId = CreateGuidAsString(); + + try { + const auto proxyName = GetProxyForHeavyRequest(Context_); + Response_ = Context_.HttpClient->Request(GetFullUrl(proxyName, Context_, header), requestId, header); + + Input_ = Response_->GetResponseStream(); + + YT_LOG_DEBUG("RSP %v - table stream", requestId); + + return; + } catch (const TErrorResponse& e) { + LogRequestError( + requestId, + header, + e.what(), + CurrentRequestRetryPolicy_->GetAttemptDescription()); + + if (!IsRetriable(e)) { + throw; + } + auto backoff = CurrentRequestRetryPolicy_->OnRetriableError(e); + if (!backoff) { + throw; + } + NDetail::TWaitProxy::Get()->Sleep(*backoff); + } catch (const std::exception& e) { + LogRequestError( + requestId, + header, + e.what(), + CurrentRequestRetryPolicy_->GetAttemptDescription()); + + Response_.reset(); + Input_ = nullptr; + + auto backoff = CurrentRequestRetryPolicy_->OnGenericError(e); + if (!backoff) { + throw; + } + NDetail::TWaitProxy::Get()->Sleep(*backoff); + } + } +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT |