aboutsummaryrefslogtreecommitdiffstats
path: root/yt
diff options
context:
space:
mode:
authorAlexander Smirnov <alex@ydb.tech>2024-12-24 22:01:20 +0000
committerAlexander Smirnov <alex@ydb.tech>2024-12-24 22:01:20 +0000
commitbd0e2de0b1035962a4d5b9e847eaa6508fad7fcf (patch)
tree79878ca309f9f7fada064f9b78b4223af4635f28 /yt
parentbe43a4691ebdd4dbe260a8d77df4cd8423b14c05 (diff)
parente6bd80ded127cd064560f7ea471974b602770cb1 (diff)
downloadydb-bd0e2de0b1035962a4d5b9e847eaa6508fad7fcf.tar.gz
Merge branch 'PR'
Diffstat (limited to 'yt')
-rw-r--r--yt/cpp/mapreduce/client/client.cpp26
-rw-r--r--yt/cpp/mapreduce/client/client_reader.cpp105
-rw-r--r--yt/cpp/mapreduce/client/client_reader.h5
-rw-r--r--yt/cpp/mapreduce/client/file_reader.cpp172
-rw-r--r--yt/cpp/mapreduce/client/file_reader.h26
-rw-r--r--yt/cpp/mapreduce/common/retry_lib.cpp5
-rw-r--r--yt/cpp/mapreduce/common/retry_lib.h1
-rw-r--r--yt/cpp/mapreduce/http/http_client.h29
-rw-r--r--yt/cpp/mapreduce/http/retry_request.cpp19
-rw-r--r--yt/cpp/mapreduce/http/retry_request.h2
-rw-r--r--yt/cpp/mapreduce/interface/client_method_options.h9
-rw-r--r--yt/cpp/mapreduce/interface/raw_client.h31
-rw-r--r--yt/cpp/mapreduce/io/helpers.h4
-rw-r--r--yt/cpp/mapreduce/raw_client/raw_client.cpp187
-rw-r--r--yt/cpp/mapreduce/raw_client/raw_client.h24
-rw-r--r--yt/cpp/mapreduce/raw_client/raw_requests.cpp44
-rw-r--r--yt/cpp/mapreduce/raw_client/raw_requests.h9
-rw-r--r--yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.cpp50
-rw-r--r--yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.h12
-rw-r--r--yt/python/yt/yson/__init__.py2
-rw-r--r--yt/yql/providers/yt/comp_nodes/dq/dq_yt_block_reader.cpp2
-rw-r--r--yt/yql/providers/yt/gateway/native/ut/ya.make6
-rw-r--r--yt/yql/providers/yt/gateway/native/ut/yql_yt_native_folders_ut.cpp366
-rw-r--r--yt/yql/providers/yt/lib/ut_common/ya.make16
-rw-r--r--yt/yql/providers/yt/lib/ut_common/yql_ut_common.cpp55
-rw-r--r--yt/yql/providers/yt/lib/ut_common/yql_ut_common.h23
-rw-r--r--yt/yql/providers/yt/provider/ut/ya.make3
-rw-r--r--yt/yt/client/arrow/arrow_row_stream_encoder.cpp1114
-rw-r--r--yt/yt/client/arrow/arrow_row_stream_encoder.h1
-rw-r--r--yt/yt/client/arrow/ya.make2
-rw-r--r--yt/yt/client/driver/proxy_discovery_cache.cpp18
-rw-r--r--yt/yt/client/signature/public.h11
-rw-r--r--yt/yt/client/signature/signature.cpp64
-rw-r--r--yt/yt/client/signature/signature.h45
-rw-r--r--yt/yt/client/signature/unittests/signature_ut.cpp54
-rw-r--r--yt/yt/client/signature/unittests/ya.make (renamed from yt/yt/library/oom/unittests/ya.make)11
-rw-r--r--yt/yt/client/ya.make3
-rw-r--r--yt/yt/core/bus/tcp/config.h6
-rw-r--r--yt/yt/core/bus/tcp/configure_dispatcher.cpp41
-rw-r--r--yt/yt/core/bus/tcp/dispatcher.cpp1
-rw-r--r--yt/yt/core/bus/tcp/public.h4
-rw-r--r--yt/yt/core/concurrency/configure_fiber_manager.cpp41
-rw-r--r--yt/yt/core/concurrency/coroutine.h2
-rw-r--r--yt/yt/core/concurrency/execution_stack.h5
-rw-r--r--yt/yt/core/concurrency/new_fair_share_thread_pool.cpp20
-rw-r--r--yt/yt/core/concurrency/public.h5
-rw-r--r--yt/yt/core/logging/configure_log_manager.cpp51
-rw-r--r--yt/yt/core/logging/public.h5
-rw-r--r--yt/yt/core/misc/configurable_singleton_decl-inl.h33
-rw-r--r--yt/yt/core/misc/configurable_singleton_decl.h16
-rw-r--r--yt/yt/core/misc/configurable_singleton_def-inl.h150
-rw-r--r--yt/yt/core/misc/configurable_singleton_def.cpp151
-rw-r--r--yt/yt/core/misc/configurable_singleton_def.h93
-rw-r--r--yt/yt/core/misc/public.h5
-rw-r--r--yt/yt/core/misc/unittests/configurable_singleton_ut.cpp234
-rw-r--r--yt/yt/core/misc/unittests/ya.make1
-rw-r--r--yt/yt/core/net/address.cpp1
-rw-r--r--yt/yt/core/net/configure_address_resolver.cpp28
-rw-r--r--yt/yt/core/net/public.h6
-rw-r--r--yt/yt/core/rpc/configure_dispatcher.cpp41
-rw-r--r--yt/yt/core/rpc/dispatcher.cpp1
-rw-r--r--yt/yt/core/rpc/grpc/configure_dispatcher.cpp28
-rw-r--r--yt/yt/core/rpc/grpc/public.h4
-rw-r--r--yt/yt/core/rpc/grpc/ya.make1
-rw-r--r--yt/yt/core/rpc/http/server.cpp5
-rw-r--r--yt/yt/core/rpc/public.h4
-rw-r--r--yt/yt/core/service_discovery/yp/configure_service_discovery.cpp30
-rw-r--r--yt/yt/core/service_discovery/yp/public.h3
-rw-r--r--yt/yt/core/service_discovery/yp/ya.make1
-rw-r--r--yt/yt/core/ya.make7
-rw-r--r--yt/yt/core/yson/configure_protobuf_interop.cpp41
-rw-r--r--yt/yt/core/yson/public.h3
-rw-r--r--yt/yt/core/yson/string.h2
-rw-r--r--yt/yt/core/yson/token.h2
-rw-r--r--yt/yt/core/yson/writer.cpp26
-rw-r--r--yt/yt/core/ytree/unittests/text_yson_convert_ut.cpp273
-rw-r--r--yt/yt/core/ytree/unittests/ya.make1
-rw-r--r--yt/yt/core/ytree/ypath_client.h2
-rw-r--r--yt/yt/library/backtrace_introspector/http/handler.cpp87
-rw-r--r--yt/yt/library/backtrace_introspector/http/handler.h20
-rw-r--r--yt/yt/library/backtrace_introspector/http/ya.make16
-rw-r--r--yt/yt/library/backtrace_introspector/introspect.cpp224
-rw-r--r--yt/yt/library/backtrace_introspector/introspect.h57
-rw-r--r--yt/yt/library/backtrace_introspector/introspect_dummy.cpp14
-rw-r--r--yt/yt/library/backtrace_introspector/introspect_linux.cpp217
-rw-r--r--yt/yt/library/backtrace_introspector/private.h16
-rw-r--r--yt/yt/library/backtrace_introspector/public.h12
-rw-r--r--yt/yt/library/backtrace_introspector/unittests/introspect_ut.cpp198
-rw-r--r--yt/yt/library/backtrace_introspector/unittests/ya.make15
-rw-r--r--yt/yt/library/backtrace_introspector/ya.make31
-rw-r--r--yt/yt/library/formats/arrow_parser.cpp22
-rw-r--r--yt/yt/library/formats/unittests/arrow_parser_ut.cpp690
-rw-r--r--yt/yt/library/formats/unittests/dsv_parser_ut.cpp365
-rw-r--r--yt/yt/library/formats/unittests/dsv_writer_ut.cpp316
-rw-r--r--yt/yt/library/formats/unittests/format_writer_ut.h36
-rw-r--r--yt/yt/library/formats/unittests/protobuf_format_ut.cpp4659
-rw-r--r--yt/yt/library/formats/unittests/protobuf_format_ut.proto255
-rw-r--r--yt/yt/library/formats/unittests/row_helpers.cpp70
-rw-r--r--yt/yt/library/formats/unittests/row_helpers.h111
-rw-r--r--yt/yt/library/formats/unittests/schemaful_dsv_parser_ut.cpp248
-rw-r--r--yt/yt/library/formats/unittests/schemaful_dsv_writer_ut.cpp346
-rw-r--r--yt/yt/library/formats/unittests/skiff_format_ut.cpp3028
-rw-r--r--yt/yt/library/formats/unittests/skiff_yson_converter_ut.cpp707
-rw-r--r--yt/yt/library/formats/unittests/value_examples.cpp163
-rw-r--r--yt/yt/library/formats/unittests/value_examples.h24
-rw-r--r--yt/yt/library/formats/unittests/web_json_writer_ut.cpp1714
-rw-r--r--yt/yt/library/formats/unittests/ya.make53
-rw-r--r--yt/yt/library/formats/unittests/yaml_parser_ut.cpp598
-rw-r--r--yt/yt/library/formats/unittests/yaml_writer_ut.cpp319
-rw-r--r--yt/yt/library/formats/unittests/yamr_parser_ut.cpp601
-rw-r--r--yt/yt/library/formats/unittests/yamr_writer_ut.cpp645
-rw-r--r--yt/yt/library/formats/unittests/yamred_dsv_parser_ut.cpp185
-rw-r--r--yt/yt/library/formats/unittests/yamred_dsv_writer_ut.cpp424
-rw-r--r--yt/yt/library/formats/unittests/yson_helpers.cpp29
-rw-r--r--yt/yt/library/formats/unittests/yson_helpers.h (renamed from yt/yt/library/program/private.h)6
-rw-r--r--yt/yt/library/monitoring/http_integration.cpp209
-rw-r--r--yt/yt/library/monitoring/http_integration.h28
-rw-r--r--yt/yt/library/monitoring/monitoring_manager.cpp177
-rw-r--r--yt/yt/library/monitoring/monitoring_manager.h55
-rw-r--r--yt/yt/library/monitoring/private.h15
-rw-r--r--yt/yt/library/monitoring/public.h13
-rw-r--r--yt/yt/library/monitoring/ya.make27
-rw-r--r--yt/yt/library/oom/oom.cpp144
-rw-r--r--yt/yt/library/oom/oom.h21
-rw-r--r--yt/yt/library/oom/unittests/oom_ut.cpp41
-rw-r--r--yt/yt/library/oom/ya.make20
-rw-r--r--yt/yt/library/process/config.cpp34
-rw-r--r--yt/yt/library/process/config.h43
-rw-r--r--yt/yt/library/process/configure_io_dispatcher.cpp41
-rw-r--r--yt/yt/library/process/io_dispatcher.cpp10
-rw-r--r--yt/yt/library/process/io_dispatcher.h19
-rw-r--r--yt/yt/library/process/public.h8
-rw-r--r--yt/yt/library/process/unittests/pipes_ut.cpp432
-rw-r--r--yt/yt/library/process/unittests/process_ut.cpp242
-rw-r--r--yt/yt/library/process/unittests/subprocess_ut.cpp111
-rw-r--r--yt/yt/library/process/unittests/ya.make22
-rw-r--r--yt/yt/library/process/ya.make2
-rw-r--r--yt/yt/library/profiling/resource_tracker/configure_resource_tracker.cpp28
-rw-r--r--yt/yt/library/profiling/resource_tracker/public.h6
-rw-r--r--yt/yt/library/profiling/resource_tracker/resource_tracker.cpp1
-rw-r--r--yt/yt/library/profiling/resource_tracker/ya.make1
-rw-r--r--yt/yt/library/profiling/solomon/config.cpp3
-rw-r--r--yt/yt/library/profiling/solomon/config.h1
-rw-r--r--yt/yt/library/profiling/solomon/exporter.cpp3
-rw-r--r--yt/yt/library/profiling/solomon/helpers.cpp37
-rw-r--r--yt/yt/library/profiling/solomon/helpers.h2
-rw-r--r--yt/yt/library/program/config.cpp61
-rw-r--r--yt/yt/library/program/config.h77
-rw-r--r--yt/yt/library/program/helpers.cpp101
-rw-r--r--yt/yt/library/program/helpers.h4
-rw-r--r--yt/yt/library/program/program.cpp2
-rw-r--r--yt/yt/library/program/program.h2
-rw-r--r--yt/yt/library/program/public.h2
-rw-r--r--yt/yt/library/program/ya.make11
-rw-r--r--yt/yt/library/stockpile/config.cpp61
-rw-r--r--yt/yt/library/stockpile/config.h45
-rw-r--r--yt/yt/library/stockpile/public.h14
-rw-r--r--yt/yt/library/stockpile/ya.make14
-rw-r--r--yt/yt/library/tcmalloc/configure_tcmalloc_manager.cpp36
-rw-r--r--yt/yt/library/tcmalloc/public.h6
-rw-r--r--yt/yt/library/tcmalloc/ya.make1
-rw-r--r--yt/yt/library/tracing/jaeger/configure_tracer.cpp43
-rw-r--r--yt/yt/library/tracing/jaeger/public.h6
-rw-r--r--yt/yt/library/tracing/jaeger/tracer.cpp1
-rw-r--r--yt/yt/library/tracing/jaeger/ya.make4
-rw-r--r--yt/yt/library/tvm/service/unittests/ya.make19
166 files changed, 18342 insertions, 4415 deletions
diff --git a/yt/cpp/mapreduce/client/client.cpp b/yt/cpp/mapreduce/client/client.cpp
index 9e3976b144..9fcb82f5b7 100644
--- a/yt/cpp/mapreduce/client/client.cpp
+++ b/yt/cpp/mapreduce/client/client.cpp
@@ -1352,11 +1352,27 @@ TNode::TListType TClient::SkyShareTable(
const TSkyShareTableOptions& options)
{
CheckShutdown();
- return NRawClient::SkyShareTable(
- ClientRetryPolicy_->CreatePolicyForGenericRequest(),
- Context_,
- tablePaths,
- options);
+
+ // As documented at https://wiki.yandex-team.ru/yt/userdoc/blob_tables/#shag3.sozdajomrazdachu
+ // first request returns HTTP status code 202 (Accepted). And we need retrying until we have 200 (OK).
+ NHttpClient::IHttpResponsePtr response;
+ do {
+ response = RequestWithRetry<NHttpClient::IHttpResponsePtr>(
+ ClientRetryPolicy_->CreatePolicyForGenericRequest(),
+ [this, &tablePaths, &options] (TMutationId /*mutationId*/) {
+ return RawClient_->SkyShareTable(tablePaths, options);
+ });
+ TWaitProxy::Get()->Sleep(TDuration::Seconds(5));
+ } while (response->GetStatusCode() != 200);
+
+ if (options.KeyColumns_) {
+ return NodeFromJsonString(response->GetResponse())["torrents"].AsList();
+ } else {
+ TNode torrent;
+ torrent["key"] = TNode::CreateList();
+ torrent["rbtorrent"] = response->GetResponse();
+ return TNode::TListType{torrent};
+ }
}
TCheckPermissionResponse TClient::CheckPermission(
diff --git a/yt/cpp/mapreduce/client/client_reader.cpp b/yt/cpp/mapreduce/client/client_reader.cpp
index b312716877..e7538a22da 100644
--- a/yt/cpp/mapreduce/client/client_reader.cpp
+++ b/yt/cpp/mapreduce/client/client_reader.cpp
@@ -166,99 +166,28 @@ void TClientReader::CreateRequest(const TMaybe<ui32>& rangeIndex, const TMaybe<u
CurrentRequestRetryPolicy_ = ClientRetryPolicy_->CreatePolicyForGenericRequest();
}
- bool areRangesUpdated = false;
+ auto transactionId = (ReadTransaction_ ? ReadTransaction_->GetId() : ParentTransactionId_);
- while (true) {
- CurrentRequestRetryPolicy_->NotifyNewAttempt();
-
- THttpHeader header("GET", GetReadTableCommand(Context_.Config->ApiVersion));
- if (Context_.ServiceTicketAuth) {
- header.SetServiceTicket(Context_.ServiceTicketAuth->Ptr->IssueServiceTicket());
+ if (rowIndex.Defined()) {
+ auto& ranges = Path_.MutableRanges();
+ if (ranges.Empty()) {
+ ranges.ConstructInPlace(TVector{TReadRange()});
} else {
- header.SetToken(Context_.Token);
- }
-
- if (Context_.ImpersonationUser) {
- header.SetImpersonationUser(*Context_.ImpersonationUser);
- }
-
- auto transactionId = (ReadTransaction_ ? ReadTransaction_->GetId() : ParentTransactionId_);
- header.AddTransactionId(transactionId);
-
- const auto& controlAttributes = Options_.ControlAttributes_;
- header.AddParameter("control_attributes", TNode()
- ("enable_row_index", controlAttributes.EnableRowIndex_)
- ("enable_range_index", controlAttributes.EnableRangeIndex_));
- header.SetOutputFormat(Format_);
-
- header.SetResponseCompression(ToString(Context_.Config->AcceptEncoding));
-
- if (rowIndex.Defined() && !areRangesUpdated) {
- auto& ranges = Path_.MutableRanges();
- if (ranges.Empty()) {
- ranges.ConstructInPlace(TVector{TReadRange()});
- } else {
- if (rangeIndex.GetOrElse(0) >= ranges->size()) {
- ythrow yexception()
- << "range index " << rangeIndex.GetOrElse(0)
- << " is out of range, input range count is " << ranges->size();
- }
- ranges->erase(ranges->begin(), ranges->begin() + rangeIndex.GetOrElse(0));
+ if (rangeIndex.GetOrElse(0) >= ranges->size()) {
+ ythrow yexception()
+ << "range index " << rangeIndex.GetOrElse(0)
+ << " is out of range, input range count is " << ranges->size();
}
- ranges->begin()->LowerLimit(TReadLimit().RowIndex(*rowIndex));
- areRangesUpdated = true;
- }
-
- header.MergeParameters(FormIORequestParameters(Path_, Options_));
-
- auto requestId = CreateGuidAsString();
-
- try {
- const auto proxyName = GetProxyForHeavyRequest(Context_);
- UpdateHeaderForProxyIfNeed(proxyName, Context_, header);
- Response_ = Context_.HttpClient->Request(GetFullUrlForProxy(proxyName, Context_, header), requestId, header);
-
- Input_ = Response_->GetResponseStream();
-
- YT_LOG_DEBUG(
- "RSP %v - table stream (RangeIndex: %v, RowIndex: %v)",
- requestId,
- rangeIndex,
- rowIndex);
-
- return;
- } catch (const TErrorResponse& e) {
- LogRequestError(
- requestId,
- header,
- e.what(),
- CurrentRequestRetryPolicy_->GetAttemptDescription());
-
- if (!IsRetriable(e)) {
- throw;
- }
- auto backoff = CurrentRequestRetryPolicy_->OnRetriableError(e);
- if (!backoff) {
- throw;
- }
- NDetail::TWaitProxy::Get()->Sleep(*backoff);
- } catch (const std::exception& e) {
- LogRequestError(
- requestId,
- header,
- e.what(),
- CurrentRequestRetryPolicy_->GetAttemptDescription());
-
- Response_.reset();
- Input_ = nullptr;
-
- auto backoff = CurrentRequestRetryPolicy_->OnGenericError(e);
- if (!backoff) {
- throw;
- }
- NDetail::TWaitProxy::Get()->Sleep(*backoff);
+ ranges->erase(ranges->begin(), ranges->begin() + rangeIndex.GetOrElse(0));
}
+ ranges->begin()->LowerLimit(TReadLimit().RowIndex(*rowIndex));
}
+
+ Input_ = NDetail::RequestWithRetry<std::unique_ptr<IInputStream>>(
+ CurrentRequestRetryPolicy_,
+ [this, &transactionId] (TMutationId /*mutationId*/) {
+ return RawClient_->ReadTable(transactionId, Path_, Format_, Options_);
+ });
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/yt/cpp/mapreduce/client/client_reader.h b/yt/cpp/mapreduce/client/client_reader.h
index 61bc698340..3f73080046 100644
--- a/yt/cpp/mapreduce/client/client_reader.h
+++ b/yt/cpp/mapreduce/client/client_reader.h
@@ -2,8 +2,6 @@
#include <yt/cpp/mapreduce/common/fwd.h>
-#include <yt/cpp/mapreduce/interface/io.h>
-
#include <yt/cpp/mapreduce/http/context.h>
#include <yt/cpp/mapreduce/http/requests.h>
#include <yt/cpp/mapreduce/http/http.h>
@@ -55,8 +53,7 @@ private:
THolder<TPingableTransaction> ReadTransaction_;
- NHttpClient::IHttpResponsePtr Response_;
- IInputStream* Input_;
+ std::unique_ptr<IInputStream> Input_;
IRequestRetryPolicyPtr CurrentRequestRetryPolicy_;
diff --git a/yt/cpp/mapreduce/client/file_reader.cpp b/yt/cpp/mapreduce/client/file_reader.cpp
index 06463d0af2..f88b40e38b 100644
--- a/yt/cpp/mapreduce/client/file_reader.cpp
+++ b/yt/cpp/mapreduce/client/file_reader.cpp
@@ -31,7 +31,7 @@ using ::ToString;
static TMaybe<ui64> GetEndOffset(const TFileReaderOptions& options) {
if (options.Length_) {
- return options.Offset_.GetOrElse(0) + *options.Length_;
+ return options.Offset_ + *options.Length_;
} else {
return Nothing();
}
@@ -46,7 +46,6 @@ TStreamReaderBase::TStreamReaderBase(
const TClientContext& context,
const TTransactionId& transactionId)
: RawClient_(rawClient)
- , Context_(context)
, ClientRetryPolicy_(std::move(clientRetryPolicy))
, ReadTransaction_(MakeHolder<TPingableTransaction>(
RawClient_,
@@ -64,59 +63,26 @@ TYPath TStreamReaderBase::Snapshot(const TYPath& path)
return NYT::Snapshot(RawClient_, ClientRetryPolicy_, ReadTransaction_->GetId(), path);
}
-TString TStreamReaderBase::GetActiveRequestId() const
-{
- if (Response_) {
- return Response_->GetRequestId();;
- } else {
- return "<no-active-request>";
- }
-}
-
size_t TStreamReaderBase::DoRead(void* buf, size_t len)
{
- const int retryCount = Context_.Config->ReadRetryCount;
- for (int attempt = 1; attempt <= retryCount; ++attempt) {
- try {
- if (!Input_) {
- Response_ = Request(Context_, ReadTransaction_->GetId(), CurrentOffset_);
- Input_ = Response_->GetResponseStream();
- }
- if (len == 0) {
- return 0;
- }
- const size_t read = Input_->Read(buf, len);
- CurrentOffset_ += read;
- return read;
- } catch (TErrorResponse& e) {
- YT_LOG_ERROR("RSP %v - failed: %v (attempt %v of %v)",
- GetActiveRequestId(),
- e.what(),
- attempt,
- retryCount);
-
- if (!IsRetriable(e) || attempt == retryCount) {
- throw;
- }
- TWaitProxy::Get()->Sleep(GetBackoffDuration(e, Context_.Config));
- } catch (std::exception& e) {
- YT_LOG_ERROR("RSP %v - failed: %v (attempt %v of %v)",
- GetActiveRequestId(),
- e.what(),
- attempt,
- retryCount);
-
- // Invalidate connection.
- Response_.reset();
-
- if (attempt == retryCount) {
+ if (len == 0) {
+ return 0;
+ }
+ return RequestWithRetry<size_t>(
+ ClientRetryPolicy_->CreatePolicyForReaderRequest(),
+ [this, &buf, len] (TMutationId /*mutationId*/) {
+ try {
+ if (!Input_) {
+ Input_ = Request(ReadTransaction_->GetId(), CurrentOffset_);
+ }
+ const size_t read = Input_->Read(buf, len);
+ CurrentOffset_ += read;
+ return read;
+ } catch (...) {
+ Input_ = nullptr;
throw;
}
- TWaitProxy::Get()->Sleep(GetBackoffDuration(e, Context_.Config));
- }
- Input_ = nullptr;
- }
- Y_UNREACHABLE(); // we should either return or throw from loop above
+ });
}
////////////////////////////////////////////////////////////////////////////////
@@ -130,57 +96,25 @@ TFileReader::TFileReader(
const TTransactionId& transactionId,
const TFileReaderOptions& options)
: TStreamReaderBase(rawClient, std::move(clientRetryPolicy), std::move(transactionPinger), context, transactionId)
- , FileReaderOptions_(options)
+ , StartOffset_(options.Offset_)
+ , EndOffset_(GetEndOffset(options))
+ , Options_(options)
, Path_(path)
- , StartOffset_(FileReaderOptions_.Offset_.GetOrElse(0))
- , EndOffset_(GetEndOffset(FileReaderOptions_))
{
Path_.Path_ = TStreamReaderBase::Snapshot(Path_.Path_);
}
-NHttpClient::IHttpResponsePtr TFileReader::Request(const TClientContext& context, const TTransactionId& transactionId, ui64 readBytes)
+std::unique_ptr<IInputStream> TFileReader::Request(const TTransactionId& transactionId, ui64 readBytes)
{
const ui64 currentOffset = StartOffset_ + readBytes;
- TString hostName = GetProxyForHeavyRequest(context);
-
- THttpHeader header("GET", GetReadFileCommand(context.Config->ApiVersion));
- if (context.ServiceTicketAuth) {
- header.SetServiceTicket(context.ServiceTicketAuth->Ptr->IssueServiceTicket());
- } else {
- header.SetToken(context.Token);
- }
-
- if (context.ImpersonationUser) {
- header.SetImpersonationUser(*context.ImpersonationUser);
- }
-
- UpdateHeaderForProxyIfNeed(hostName, context, header);
-
- header.AddTransactionId(transactionId);
- header.SetOutputFormat(TMaybe<TFormat>()); // Binary format
if (EndOffset_) {
Y_ABORT_UNLESS(*EndOffset_ >= currentOffset);
- FileReaderOptions_.Length(*EndOffset_ - currentOffset);
- }
- FileReaderOptions_.Offset(currentOffset);
- header.MergeParameters(FormIORequestParameters(Path_, FileReaderOptions_));
-
- header.SetResponseCompression(ToString(context.Config->AcceptEncoding));
-
- auto requestId = CreateGuidAsString();
- NHttpClient::IHttpResponsePtr response;
- try {
- response = context.HttpClient->Request(GetFullUrl(hostName, context, header), requestId, header);
- } catch (const std::exception& ex) {
- LogRequestError(requestId, header, ex.what(), "");
- throw;
+ Options_.Length(*EndOffset_ - currentOffset);
}
- YT_LOG_DEBUG("RSP %v - file stream",
- requestId);
-
- return response;
+ Options_.Offset(currentOffset);
+ return RawClient_->ReadFile(transactionId, Path_, Options_);
}
////////////////////////////////////////////////////////////////////////////////
@@ -195,66 +129,22 @@ TBlobTableReader::TBlobTableReader(
const TTransactionId& transactionId,
const TBlobTableReaderOptions& options)
: TStreamReaderBase(rawClient, std::move(retryPolicy), std::move(transactionPinger), context, transactionId)
+ , StartOffset_(options.Offset_)
, Key_(key)
, Options_(options)
{
Path_ = TStreamReaderBase::Snapshot(path);
}
-NHttpClient::IHttpResponsePtr TBlobTableReader::Request(const TClientContext& context, const TTransactionId& transactionId, ui64 readBytes)
+std::unique_ptr<IInputStream> TBlobTableReader::Request(const TTransactionId& transactionId, ui64 readBytes)
{
- TString hostName = GetProxyForHeavyRequest(context);
-
- THttpHeader header("GET", "read_blob_table");
- if (context.ServiceTicketAuth) {
- header.SetServiceTicket(context.ServiceTicketAuth->Ptr->IssueServiceTicket());
- } else {
- header.SetToken(context.Token);
- }
-
- if (context.ImpersonationUser) {
- header.SetImpersonationUser(*context.ImpersonationUser);
- }
-
- UpdateHeaderForProxyIfNeed(hostName, context, header);
-
- header.AddTransactionId(transactionId);
- header.SetOutputFormat(TMaybe<TFormat>()); // Binary format
-
- const ui64 currentOffset = Options_.Offset_ + readBytes;
+ const i64 currentOffset = StartOffset_ + readBytes;
const i64 startPartIndex = currentOffset / Options_.PartSize_;
- const ui64 skipBytes = currentOffset - Options_.PartSize_ * startPartIndex;
- auto lowerLimitKey = Key_;
- lowerLimitKey.Parts_.push_back(startPartIndex);
- auto upperLimitKey = Key_;
- upperLimitKey.Parts_.push_back(std::numeric_limits<i64>::max());
- TNode params = PathToParamNode(TRichYPath(Path_).AddRange(TReadRange()
- .LowerLimit(TReadLimit().Key(lowerLimitKey))
- .UpperLimit(TReadLimit().Key(upperLimitKey))));
- params["start_part_index"] = TNode(startPartIndex);
- params["offset"] = skipBytes;
- if (Options_.PartIndexColumnName_) {
- params["part_index_column_name"] = *Options_.PartIndexColumnName_;
- }
- if (Options_.DataColumnName_) {
- params["data_column_name"] = *Options_.DataColumnName_;
- }
- params["part_size"] = Options_.PartSize_;
- header.MergeParameters(params);
- header.SetResponseCompression(ToString(context.Config->AcceptEncoding));
-
- auto requestId = CreateGuidAsString();
- NHttpClient::IHttpResponsePtr response;
- try {
- response = context.HttpClient->Request(GetFullUrl(hostName, context, header), requestId, header);
- } catch (const std::exception& ex) {
- LogRequestError(requestId, header, ex.what(), "");
- throw;
- }
+ const i64 skipBytes = currentOffset - Options_.PartSize_ * startPartIndex;
- YT_LOG_DEBUG("RSP %v - blob table stream",
- requestId);
- return response;
+ Options_.Offset(skipBytes);
+ Options_.StartPartIndex(startPartIndex);
+ return RawClient_->ReadBlobTable(transactionId, Path_, Key_, Options_);
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/yt/cpp/mapreduce/client/file_reader.h b/yt/cpp/mapreduce/client/file_reader.h
index 48248696d3..8aafdc860d 100644
--- a/yt/cpp/mapreduce/client/file_reader.h
+++ b/yt/cpp/mapreduce/client/file_reader.h
@@ -11,7 +11,6 @@ class IInputStream;
namespace NYT {
-class THttpRequest;
class TPingableTransaction;
namespace NDetail {
@@ -35,19 +34,16 @@ protected:
protected:
const IRawClientPtr RawClient_;
- const TClientContext Context_;
private:
size_t DoRead(void* buf, size_t len) override;
- virtual NHttpClient::IHttpResponsePtr Request(const TClientContext& context, const TTransactionId& transactionId, ui64 readBytes) = 0;
- TString GetActiveRequestId() const;
+ virtual std::unique_ptr<IInputStream> Request(const TTransactionId& transactionId, ui64 readBytes) = 0;
private:
const IClientRetryPolicyPtr ClientRetryPolicy_;
TFileReaderOptions FileReaderOptions_;
- NHttpClient::IHttpResponsePtr Response_;
- IInputStream* Input_ = nullptr;
+ std::unique_ptr<IInputStream> Input_;
THolder<TPingableTransaction> ReadTransaction_;
@@ -67,17 +63,17 @@ public:
ITransactionPingerPtr transactionPinger,
const TClientContext& context,
const TTransactionId& transactionId,
- const TFileReaderOptions& options = TFileReaderOptions());
+ const TFileReaderOptions& options = {});
private:
- NHttpClient::IHttpResponsePtr Request(const TClientContext& context, const TTransactionId& transactionId, ui64 readBytes) override;
+ std::unique_ptr<IInputStream> Request(const TTransactionId& transactionId, ui64 readBytes) override;
private:
- TFileReaderOptions FileReaderOptions_;
-
- TRichYPath Path_;
const ui64 StartOffset_;
const TMaybe<ui64> EndOffset_;
+
+ TFileReaderOptions Options_;
+ TRichYPath Path_;
};
////////////////////////////////////////////////////////////////////////////////
@@ -94,14 +90,16 @@ public:
ITransactionPingerPtr transactionPinger,
const TClientContext& context,
const TTransactionId& transactionId,
- const TBlobTableReaderOptions& options);
+ const TBlobTableReaderOptions& options = {});
private:
- NHttpClient::IHttpResponsePtr Request(const TClientContext& context, const TTransactionId& transactionId, ui64 readBytes) override;
+ std::unique_ptr<IInputStream> Request(const TTransactionId& transactionId, ui64 readBytes) override;
private:
+ const ui64 StartOffset_;
const TKey Key_;
- const TBlobTableReaderOptions Options_;
+
+ TBlobTableReaderOptions Options_;
TYPath Path_;
};
diff --git a/yt/cpp/mapreduce/common/retry_lib.cpp b/yt/cpp/mapreduce/common/retry_lib.cpp
index 772a2ab0cd..8146eb8b46 100644
--- a/yt/cpp/mapreduce/common/retry_lib.cpp
+++ b/yt/cpp/mapreduce/common/retry_lib.cpp
@@ -118,6 +118,11 @@ public:
return Wrap(MakeIntrusive<TAttemptLimitedRetryPolicy>(static_cast<ui32>(Config_->StartOperationRetryCount), Config_));
}
+ IRequestRetryPolicyPtr CreatePolicyForReaderRequest() override
+ {
+ return Wrap(MakeIntrusive<TAttemptLimitedRetryPolicy>(static_cast<ui32>(Config_->ReadRetryCount), Config_));
+ }
+
IRequestRetryPolicyPtr Wrap(IRequestRetryPolicyPtr basePolicy)
{
auto config = RetryConfigProvider_->CreateRetryConfig();
diff --git a/yt/cpp/mapreduce/common/retry_lib.h b/yt/cpp/mapreduce/common/retry_lib.h
index c6c061f614..5b406b075f 100644
--- a/yt/cpp/mapreduce/common/retry_lib.h
+++ b/yt/cpp/mapreduce/common/retry_lib.h
@@ -48,6 +48,7 @@ class IClientRetryPolicy
public:
virtual IRequestRetryPolicyPtr CreatePolicyForGenericRequest() = 0;
virtual IRequestRetryPolicyPtr CreatePolicyForStartOperationRequest() = 0;
+ virtual IRequestRetryPolicyPtr CreatePolicyForReaderRequest() = 0;
};
diff --git a/yt/cpp/mapreduce/http/http_client.h b/yt/cpp/mapreduce/http/http_client.h
index 6087eca098..a01b619fab 100644
--- a/yt/cpp/mapreduce/http/http_client.h
+++ b/yt/cpp/mapreduce/http/http_client.h
@@ -42,7 +42,6 @@ public:
virtual IHttpResponsePtr Finish() = 0;
};
-
class IHttpClient
{
public:
@@ -65,6 +64,34 @@ public:
////////////////////////////////////////////////////////////////////////////////
+class THttpResponseStream
+ : public IInputStream
+{
+public:
+ THttpResponseStream(IHttpResponsePtr response)
+ : Response_(std::move(response))
+ {
+ Underlying_ = Response_->GetResponseStream();
+ }
+
+private:
+ size_t DoRead(void *buf, size_t len) override
+ {
+ return Underlying_->Read(buf, len);
+ }
+
+ size_t DoSkip(size_t len) override
+ {
+ return Underlying_->Skip(len);
+ }
+
+private:
+ IHttpResponsePtr Response_;
+ IInputStream* Underlying_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
IHttpClientPtr CreateDefaultHttpClient();
IHttpClientPtr CreateCoreHttpClient(bool useTLS, const TConfigPtr& config);
diff --git a/yt/cpp/mapreduce/http/retry_request.cpp b/yt/cpp/mapreduce/http/retry_request.cpp
index 1d9267009f..a47b2952b1 100644
--- a/yt/cpp/mapreduce/http/retry_request.cpp
+++ b/yt/cpp/mapreduce/http/retry_request.cpp
@@ -20,7 +20,7 @@ namespace NDetail {
////////////////////////////////////////////////////////////////////////////////
-static TResponseInfo Request(
+static NHttpClient::IHttpResponsePtr Request(
const TClientContext& context,
THttpHeader& header,
TMaybe<TStringBuf> body,
@@ -38,16 +38,10 @@ static TResponseInfo Request(
auto url = GetFullUrlForProxy(hostName, context, header);
- auto response = context.HttpClient->Request(url, requestId, config.HttpConfig, header, body);
-
- TResponseInfo result;
- result.RequestId = requestId;
- result.Response = response->GetResponse();
- result.HttpCode = response->GetStatusCode();
- return result;
+ return context.HttpClient->Request(url, requestId, config.HttpConfig, header, body);
}
-TResponseInfo RequestWithoutRetry(
+NHttpClient::IHttpResponsePtr RequestWithoutRetry(
const TClientContext& context,
TMutationId& mutationId,
THttpHeader& header,
@@ -118,7 +112,12 @@ TResponseInfo RetryRequestWithPolicy(
}
}
- return Request(context, header, body, requestId, config);
+ auto response = Request(context, header, body, requestId, config);
+ return TResponseInfo{
+ .RequestId = response->GetRequestId(),
+ .Response = response->GetResponse(),
+ .HttpCode = response->GetStatusCode(),
+ };
} catch (const TErrorResponse& e) {
LogRequestError(requestId, header, e.what(), retryPolicy->GetAttemptDescription());
retryWithSameMutationId = e.IsTransportError();
diff --git a/yt/cpp/mapreduce/http/retry_request.h b/yt/cpp/mapreduce/http/retry_request.h
index 9750d0b541..444ecbbafc 100644
--- a/yt/cpp/mapreduce/http/retry_request.h
+++ b/yt/cpp/mapreduce/http/retry_request.h
@@ -105,7 +105,7 @@ TResponseInfo RetryRequestWithPolicy(
TMaybe<TStringBuf> body = {},
const TRequestConfig& config = TRequestConfig());
-TResponseInfo RequestWithoutRetry(
+NHttpClient::IHttpResponsePtr RequestWithoutRetry(
const TClientContext& context,
TMutationId& mutationId,
THttpHeader& header,
diff --git a/yt/cpp/mapreduce/interface/client_method_options.h b/yt/cpp/mapreduce/interface/client_method_options.h
index 9bfb79753d..d457bf5f43 100644
--- a/yt/cpp/mapreduce/interface/client_method_options.h
+++ b/yt/cpp/mapreduce/interface/client_method_options.h
@@ -287,9 +287,12 @@ struct TBlobTableReaderOptions
///
/// All blob parts except the last part of the blob must be of this size
/// otherwise blob table reader emits error.
- FLUENT_FIELD_DEFAULT(ui64, PartSize, 4 * 1024 * 1024);
+ FLUENT_FIELD_DEFAULT(i64, PartSize, 4 * 1024 * 1024);
- /// @brief Offset from which to start reading
+ /// @brief Part index from which to start reading.
+ FLUENT_FIELD_DEFAULT(i64, StartPartIndex, 0);
+
+ /// @brief Offset from which to start reading.
FLUENT_FIELD_DEFAULT(i64, Offset, 0);
};
@@ -468,7 +471,7 @@ struct TFileReaderOptions
/// @brief Offset to start reading from.
///
/// By default reading is started from the beginning of the file.
- FLUENT_FIELD_OPTION(i64, Offset);
+ FLUENT_FIELD_DEFAULT(i64, Offset, 0);
///
/// @brief Maximum length to read.
diff --git a/yt/cpp/mapreduce/interface/raw_client.h b/yt/cpp/mapreduce/interface/raw_client.h
index 32055e3d00..4994826863 100644
--- a/yt/cpp/mapreduce/interface/raw_client.h
+++ b/yt/cpp/mapreduce/interface/raw_client.h
@@ -8,6 +8,13 @@ namespace NYT {
////////////////////////////////////////////////////////////////////////////////
+namespace NHttpClient {
+ class IHttpResponse;
+ using IHttpResponsePtr = std::unique_ptr<IHttpResponse>;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
class IRawClient
: public virtual TThrRefBase
{
@@ -196,6 +203,18 @@ public:
const TOperationId& operationId,
const TGetJobTraceOptions& options = {}) = 0;
+ // SkyShare
+
+ virtual NHttpClient::IHttpResponsePtr SkyShareTable(
+ const std::vector<TYPath>& tablePaths,
+ const TSkyShareTableOptions& options = {}) = 0;
+
+ // Files
+ virtual std::unique_ptr<IInputStream> ReadFile(
+ const TTransactionId& transactionId,
+ const TRichYPath& path,
+ const TFileReaderOptions& options = {}) = 0;
+
// File cache
virtual TMaybe<TYPath> GetFileFromCache(
@@ -266,6 +285,18 @@ public:
const TYPath& path,
const TAlterTableOptions& options = {}) = 0;
+ virtual std::unique_ptr<IInputStream> ReadTable(
+ const TTransactionId& transactionId,
+ const TRichYPath& path,
+ const TMaybe<TFormat>& format,
+ const TTableReaderOptions& options = {}) = 0;
+
+ virtual std::unique_ptr<IInputStream> ReadBlobTable(
+ const TTransactionId& transactionId,
+ const TRichYPath& path,
+ const TKey& key,
+ const TBlobTableReaderOptions& options = {}) = 0;
+
virtual void AlterTableReplica(
TMutationId& mutationId,
const TReplicaId& replicaId,
diff --git a/yt/cpp/mapreduce/io/helpers.h b/yt/cpp/mapreduce/io/helpers.h
index 0733ff417c..0d3ec40ab6 100644
--- a/yt/cpp/mapreduce/io/helpers.h
+++ b/yt/cpp/mapreduce/io/helpers.h
@@ -63,9 +63,7 @@ inline TNode FormIORequestParameters(
if (options.Config_) {
params[TIOOptionsTraits<TTableReaderOptions>::ConfigName] = *options.Config_;
}
- if (options.Offset_) {
- params["offset"] = *options.Offset_;
- }
+ params["offset"] = options.Offset_;
if (options.Length_) {
params["length"] = *options.Length_;
}
diff --git a/yt/cpp/mapreduce/raw_client/raw_client.cpp b/yt/cpp/mapreduce/raw_client/raw_client.cpp
index 71d8d5fba9..65bfa01cea 100644
--- a/yt/cpp/mapreduce/raw_client/raw_client.cpp
+++ b/yt/cpp/mapreduce/raw_client/raw_client.cpp
@@ -14,6 +14,8 @@
#include <yt/cpp/mapreduce/interface/operation.h>
#include <yt/cpp/mapreduce/interface/tvm.h>
+#include <yt/cpp/mapreduce/io/helpers.h>
+
#include <library/cpp/yson/node/node_io.h>
namespace NYT::NDetail {
@@ -32,7 +34,7 @@ TNode THttpRawClient::Get(
TMutationId mutationId;
THttpHeader header("GET", "get");
header.MergeParameters(NRawClient::SerializeParamsForGet(transactionId, Context_.Config->Prefix, path, options));
- return NodeFromYsonString(RequestWithoutRetry(Context_, mutationId, header).Response);
+ return NodeFromYsonString(RequestWithoutRetry(Context_, mutationId, header)->GetResponse());
}
TNode THttpRawClient::TryGet(
@@ -61,7 +63,7 @@ void THttpRawClient::Set(
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeParamsForSet(transactionId, Context_.Config->Prefix, path, options));
auto body = NodeToYsonString(value);
- RequestWithoutRetry(Context_, mutationId, header, body);
+ RequestWithoutRetry(Context_, mutationId, header, body)->GetResponse();
}
bool THttpRawClient::Exists(
@@ -72,7 +74,7 @@ bool THttpRawClient::Exists(
TMutationId mutationId;
THttpHeader header("GET", "exists");
header.MergeParameters(NRawClient::SerializeParamsForExists(transactionId, Context_.Config->Prefix, path, options));
- return ParseBoolFromResponse(RequestWithoutRetry(Context_, mutationId, header).Response);
+ return ParseBoolFromResponse(RequestWithoutRetry(Context_, mutationId, header)->GetResponse());
}
void THttpRawClient::MultisetAttributes(
@@ -86,7 +88,7 @@ void THttpRawClient::MultisetAttributes(
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeParamsForMultisetAttributes(transactionId, Context_.Config->Prefix, path, options));
auto body = NodeToYsonString(value);
- RequestWithoutRetry(Context_, mutationId, header, body);
+ RequestWithoutRetry(Context_, mutationId, header, body)->GetResponse();
}
TNodeId THttpRawClient::Create(
@@ -99,7 +101,7 @@ TNodeId THttpRawClient::Create(
THttpHeader header("POST", "create");
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeParamsForCreate(transactionId, Context_.Config->Prefix, path, type, options));
- return ParseGuidFromResponse(RequestWithoutRetry(Context_, mutationId, header).Response);
+ return ParseGuidFromResponse(RequestWithoutRetry(Context_, mutationId, header)->GetResponse());
}
TNodeId THttpRawClient::CopyWithoutRetries(
@@ -112,7 +114,7 @@ TNodeId THttpRawClient::CopyWithoutRetries(
THttpHeader header("POST", "copy");
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeParamsForCopy(transactionId, Context_.Config->Prefix, sourcePath, destinationPath, options));
- return ParseGuidFromResponse(RequestWithoutRetry(Context_, mutationId, header).Response);
+ return ParseGuidFromResponse(RequestWithoutRetry(Context_, mutationId, header)->GetResponse());
}
TNodeId THttpRawClient::CopyInsideMasterCell(
@@ -129,7 +131,7 @@ TNodeId THttpRawClient::CopyInsideMasterCell(
// Make cross cell copying disable.
params["enable_cross_cell_copying"] = false;
header.MergeParameters(params);
- return ParseGuidFromResponse(RequestWithoutRetry(Context_, mutationId, header).Response);
+ return ParseGuidFromResponse(RequestWithoutRetry(Context_, mutationId, header)->GetResponse());
}
TNodeId THttpRawClient::MoveWithoutRetries(
@@ -142,7 +144,7 @@ TNodeId THttpRawClient::MoveWithoutRetries(
THttpHeader header("POST", "move");
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeParamsForMove(transactionId, Context_.Config->Prefix, sourcePath, destinationPath, options));
- return ParseGuidFromResponse(RequestWithoutRetry(Context_, mutationId, header).Response);
+ return ParseGuidFromResponse(RequestWithoutRetry(Context_, mutationId, header)->GetResponse());
}
TNodeId THttpRawClient::MoveInsideMasterCell(
@@ -159,7 +161,7 @@ TNodeId THttpRawClient::MoveInsideMasterCell(
// Make cross cell copying disable.
params["enable_cross_cell_copying"] = false;
header.MergeParameters(params);
- return ParseGuidFromResponse(RequestWithoutRetry(Context_, mutationId, header).Response);
+ return ParseGuidFromResponse(RequestWithoutRetry(Context_, mutationId, header)->GetResponse());
}
void THttpRawClient::Remove(
@@ -171,7 +173,7 @@ void THttpRawClient::Remove(
THttpHeader header("POST", "remove");
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeParamsForRemove(transactionId, Context_.Config->Prefix, path, options));
- RequestWithoutRetry(Context_, mutationId, header);
+ RequestWithoutRetry(Context_, mutationId, header)->GetResponse();
}
TNode::TListType THttpRawClient::List(
@@ -190,7 +192,7 @@ TNode::TListType THttpRawClient::List(
}
header.MergeParameters(NRawClient::SerializeParamsForList(transactionId, Context_.Config->Prefix, updatedPath, options));
auto responseInfo = RequestWithoutRetry(Context_, mutationId, header);
- return NodeFromYsonString(responseInfo.Response).AsList();
+ return NodeFromYsonString(responseInfo->GetResponse()).AsList();
}
TNodeId THttpRawClient::Link(
@@ -203,7 +205,7 @@ TNodeId THttpRawClient::Link(
THttpHeader header("POST", "link");
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeParamsForLink(transactionId, Context_.Config->Prefix, targetPath, linkPath, options));
- return ParseGuidFromResponse(RequestWithoutRetry(Context_, mutationId, header).Response);
+ return ParseGuidFromResponse(RequestWithoutRetry(Context_, mutationId, header)->GetResponse());
}
TLockId THttpRawClient::Lock(
@@ -216,7 +218,7 @@ TLockId THttpRawClient::Lock(
THttpHeader header("POST", "lock");
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeParamsForLock(transactionId, Context_.Config->Prefix, path, mode, options));
- return ParseGuidFromResponse(RequestWithoutRetry(Context_, mutationId, header).Response);
+ return ParseGuidFromResponse(RequestWithoutRetry(Context_, mutationId, header)->GetResponse());
}
void THttpRawClient::Unlock(
@@ -228,7 +230,7 @@ void THttpRawClient::Unlock(
THttpHeader header("POST", "unlock");
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeParamsForUnlock(transactionId, Context_.Config->Prefix, path, options));
- RequestWithoutRetry(Context_, mutationId, header);
+ RequestWithoutRetry(Context_, mutationId, header)->GetResponse();
}
void THttpRawClient::Concatenate(
@@ -241,7 +243,7 @@ void THttpRawClient::Concatenate(
THttpHeader header("POST", "concatenate");
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeParamsForConcatenate(transactionId, Context_.Config->Prefix, sourcePaths, destinationPath, options));
- RequestWithoutRetry(Context_, mutationId, header);
+ RequestWithoutRetry(Context_, mutationId, header)->GetResponse();
}
TTransactionId THttpRawClient::StartTransaction(
@@ -252,7 +254,7 @@ TTransactionId THttpRawClient::StartTransaction(
THttpHeader header("POST", "start_tx");
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeParamsForStartTransaction(parentTransactionId, Context_.Config->TxTimeout, options));
- return ParseGuidFromResponse(RequestWithoutRetry(Context_, mutationId, header).Response);
+ return ParseGuidFromResponse(RequestWithoutRetry(Context_, mutationId, header)->GetResponse());
}
void THttpRawClient::PingTransaction(const TTransactionId& transactionId)
@@ -264,7 +266,7 @@ void THttpRawClient::PingTransaction(const TTransactionId& transactionId)
requestConfig.HttpConfig = NHttpClient::THttpConfig{
.SocketTimeout = Context_.Config->PingTimeout
};
- RequestWithoutRetry(Context_, mutationId, header);
+ RequestWithoutRetry(Context_, mutationId, header)->GetResponse();
}
void THttpRawClient::AbortTransaction(
@@ -274,7 +276,7 @@ void THttpRawClient::AbortTransaction(
THttpHeader header("POST", "abort_tx");
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeParamsForAbortTransaction(transactionId));
- RequestWithoutRetry(Context_, mutationId, header);
+ RequestWithoutRetry(Context_, mutationId, header)->GetResponse();
}
void THttpRawClient::CommitTransaction(
@@ -284,7 +286,7 @@ void THttpRawClient::CommitTransaction(
THttpHeader header("POST", "commit_tx");
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeParamsForCommitTransaction(transactionId));
- RequestWithoutRetry(Context_, mutationId, header);
+ RequestWithoutRetry(Context_, mutationId, header)->GetResponse();
}
TOperationAttributes THttpRawClient::GetOperation(
@@ -295,7 +297,7 @@ TOperationAttributes THttpRawClient::GetOperation(
THttpHeader header("GET", "get_operation");
header.MergeParameters(NRawClient::SerializeParamsForGetOperation(operationId, options));
auto responseInfo = RequestWithoutRetry(Context_, mutationId, header);
- return NRawClient::ParseOperationAttributes(NodeFromYsonString(responseInfo.Response));
+ return NRawClient::ParseOperationAttributes(NodeFromYsonString(responseInfo->GetResponse()));
}
TOperationAttributes THttpRawClient::GetOperation(
@@ -306,7 +308,7 @@ TOperationAttributes THttpRawClient::GetOperation(
THttpHeader header("GET", "get_operation");
header.MergeParameters(NRawClient::SerializeParamsForGetOperation(alias, options));
auto responseInfo = RequestWithoutRetry(Context_, mutationId, header);
- return NRawClient::ParseOperationAttributes(NodeFromYsonString(responseInfo.Response));
+ return NRawClient::ParseOperationAttributes(NodeFromYsonString(responseInfo->GetResponse()));
}
void THttpRawClient::AbortOperation(
@@ -316,7 +318,7 @@ void THttpRawClient::AbortOperation(
THttpHeader header("POST", "abort_op");
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeParamsForAbortOperation(operationId));
- RequestWithoutRetry(Context_, mutationId, header);
+ RequestWithoutRetry(Context_, mutationId, header)->GetResponse();
}
void THttpRawClient::CompleteOperation(
@@ -326,7 +328,7 @@ void THttpRawClient::CompleteOperation(
THttpHeader header("POST", "complete_op");
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeParamsForCompleteOperation(operationId));
- RequestWithoutRetry(Context_, mutationId, header);
+ RequestWithoutRetry(Context_, mutationId, header)->GetResponse();
}
void THttpRawClient::SuspendOperation(
@@ -337,7 +339,7 @@ void THttpRawClient::SuspendOperation(
THttpHeader header("POST", "suspend_op");
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeParamsForSuspendOperation(operationId, options));
- RequestWithoutRetry(Context_, mutationId, header);
+ RequestWithoutRetry(Context_, mutationId, header)->GetResponse();
}
void THttpRawClient::ResumeOperation(
@@ -348,7 +350,7 @@ void THttpRawClient::ResumeOperation(
THttpHeader header("POST", "resume_op");
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeParamsForResumeOperation(operationId, options));
- RequestWithoutRetry(Context_, mutationId, header);
+ RequestWithoutRetry(Context_, mutationId, header)->GetResponse();
}
template <typename TKey>
@@ -367,7 +369,7 @@ TListOperationsResult THttpRawClient::ListOperations(const TListOperationsOption
THttpHeader header("GET", "list_operations");
header.MergeParameters(NRawClient::SerializeParamsForListOperations(options));
auto responseInfo = RequestWithoutRetry(Context_, mutationId, header);
- auto resultNode = NodeFromYsonString(responseInfo.Response);
+ auto resultNode = NodeFromYsonString(responseInfo->GetResponse());
const auto& operationNodesList = resultNode["operations"].AsList();
@@ -417,7 +419,7 @@ NYson::TYsonString THttpRawClient::GetJob(
THttpHeader header("GET", "get_job");
header.MergeParameters(NRawClient::SerializeParamsForGetJob(operationId, jobId, options));
auto responseInfo = RequestWithoutRetry(Context_, mutationId, header);
- return NYson::TYsonString(responseInfo.Response);
+ return NYson::TYsonString(responseInfo->GetResponse());
}
TListJobsResult THttpRawClient::ListJobs(
@@ -428,7 +430,7 @@ TListJobsResult THttpRawClient::ListJobs(
THttpHeader header("GET", "list_jobs");
header.MergeParameters(NRawClient::SerializeParamsForListJobs(operationId, options));
auto responseInfo = RequestWithoutRetry(Context_, mutationId, header);
- auto resultNode = NodeFromYsonString(responseInfo.Response);
+ auto resultNode = NodeFromYsonString(responseInfo->GetResponse());
const auto& jobNodesList = resultNode["jobs"].AsList();
@@ -524,7 +526,7 @@ TString THttpRawClient::GetJobStderrWithRetries(
TRequestConfig config;
config.IsHeavy = true;
auto responseInfo = RequestWithoutRetry(Context_, mutationId, header, {}, config);
- return responseInfo.Response;
+ return responseInfo->GetResponse();
}
IFileReaderPtr THttpRawClient::GetJobStderr(
@@ -573,7 +575,7 @@ std::vector<TJobTraceEvent> THttpRawClient::GetJobTrace(
THttpHeader header("GET", "get_job_trace");
header.MergeParameters(NRawClient::SerializeParamsForGetJobTrace(operationId, options));
auto responseInfo = RequestWithoutRetry(Context_, mutationId, header);
- auto resultNode = NodeFromYsonString(responseInfo.Response);
+ auto resultNode = NodeFromYsonString(responseInfo->GetResponse());
const auto& traceEventNodesList = resultNode.AsList();
@@ -586,6 +588,50 @@ std::vector<TJobTraceEvent> THttpRawClient::GetJobTrace(
return result;
}
+NHttpClient::IHttpResponsePtr THttpRawClient::SkyShareTable(
+ const std::vector<TYPath>& tablePaths,
+ const TSkyShareTableOptions& options)
+{
+ TMutationId mutationId;
+ THttpHeader header("POST", "api/v1/share", /*IsApi*/ false);
+
+ auto proxyName = Context_.ServerName.substr(0, Context_.ServerName.find('.'));
+
+ auto host = Context_.Config->SkynetApiHost;
+ if (host == "") {
+ host = "skynet." + proxyName + ".yt.yandex.net";
+ }
+
+ TSkyShareTableOptions patchedOptions = options;
+
+ if (Context_.Config->Pool && !patchedOptions.Pool_) {
+ patchedOptions.Pool(Context_.Config->Pool);
+ }
+
+ header.MergeParameters(NRawClient::SerializeParamsForSkyShareTable(proxyName, Context_.Config->Prefix, tablePaths, patchedOptions));
+ TClientContext skyApiHost({.ServerName = host, .HttpClient = NHttpClient::CreateDefaultHttpClient()});
+
+ return RequestWithoutRetry(skyApiHost, mutationId, header, "");
+}
+
+std::unique_ptr<IInputStream> THttpRawClient::ReadFile(
+ const TTransactionId& transactionId,
+ const TRichYPath& path,
+ const TFileReaderOptions& options)
+{
+ TMutationId mutationId;
+ THttpHeader header("GET", GetReadFileCommand(Context_.Config->ApiVersion));
+ header.AddTransactionId(transactionId);
+ header.SetOutputFormat(TMaybe<TFormat>()); // Binary format
+ header.MergeParameters(FormIORequestParameters(path, options));
+ header.SetResponseCompression(ToString(Context_.Config->AcceptEncoding));
+
+ TRequestConfig config;
+ config.IsHeavy = true;
+ auto responseInfo = RequestWithoutRetry(Context_, mutationId, header, /*body*/ {}, config);
+ return std::make_unique<NHttpClient::THttpResponseStream>(std::move(responseInfo));
+}
+
TMaybe<TYPath> THttpRawClient::GetFileFromCache(
const TTransactionId& transactionId,
const TString& md5Signature,
@@ -596,7 +642,7 @@ TMaybe<TYPath> THttpRawClient::GetFileFromCache(
THttpHeader header("GET", "get_file_from_cache");
header.MergeParameters(NRawClient::SerializeParamsForGetFileFromCache(transactionId, md5Signature, cachePath, options));
auto responseInfo = RequestWithoutRetry(Context_, mutationId, header);
- auto resultNode = NodeFromYsonString(responseInfo.Response).AsString();
+ auto resultNode = NodeFromYsonString(responseInfo->GetResponse()).AsString();
return resultNode.empty() ? Nothing() : TMaybe<TYPath>(resultNode);
}
@@ -611,7 +657,7 @@ TYPath THttpRawClient::PutFileToCache(
THttpHeader header("POST", "put_file_to_cache");
header.MergeParameters(NRawClient::SerializeParamsForPutFileToCache(transactionId, Context_.Config->Prefix, filePath, md5Signature, cachePath, options));
auto responseInfo = RequestWithoutRetry(Context_, mutationId, header);
- return NodeFromYsonString(responseInfo.Response).AsString();
+ return NodeFromYsonString(responseInfo->GetResponse()).AsString();
}
void THttpRawClient::MountTable(
@@ -626,7 +672,7 @@ void THttpRawClient::MountTable(
header.AddParameter("cell_id", GetGuidAsString(*options.CellId_));
}
header.AddParameter("freeze", options.Freeze_);
- RequestWithoutRetry(Context_, mutationId, header);
+ RequestWithoutRetry(Context_, mutationId, header)->GetResponse();
}
void THttpRawClient::UnmountTable(
@@ -638,7 +684,7 @@ void THttpRawClient::UnmountTable(
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeTabletParams(Context_.Config->Prefix, path, options));
header.AddParameter("force", options.Force_);
- RequestWithoutRetry(Context_, mutationId, header);
+ RequestWithoutRetry(Context_, mutationId, header)->GetResponse();
}
void THttpRawClient::RemountTable(
@@ -649,7 +695,7 @@ void THttpRawClient::RemountTable(
THttpHeader header("POST", "remount_table");
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeTabletParams(Context_.Config->Prefix, path, options));
- RequestWithoutRetry(Context_, mutationId, header);
+ RequestWithoutRetry(Context_, mutationId, header)->GetResponse();
}
void THttpRawClient::ReshardTableByPivotKeys(
@@ -662,7 +708,7 @@ void THttpRawClient::ReshardTableByPivotKeys(
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeTabletParams(Context_.Config->Prefix, path, options));
header.AddParameter("pivot_keys", BuildYsonNodeFluently().List(keys));
- RequestWithoutRetry(Context_, mutationId, header);
+ RequestWithoutRetry(Context_, mutationId, header)->GetResponse();
}
void THttpRawClient::ReshardTableByTabletCount(
@@ -675,7 +721,7 @@ void THttpRawClient::ReshardTableByTabletCount(
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeTabletParams(Context_.Config->Prefix, path, options));
header.AddParameter("tablet_count", tabletCount);
- RequestWithoutRetry(Context_, mutationId, header);
+ RequestWithoutRetry(Context_, mutationId, header)->GetResponse();
}
void THttpRawClient::InsertRows(
@@ -690,7 +736,7 @@ void THttpRawClient::InsertRows(
auto body = NodeListToYsonString(rows);
TRequestConfig config;
config.IsHeavy = true;
- RequestWithoutRetry(Context_, mutationId, header, body, config);
+ RequestWithoutRetry(Context_, mutationId, header, body, config)->GetResponse();
}
void THttpRawClient::TrimRows(
@@ -706,7 +752,7 @@ void THttpRawClient::TrimRows(
header.MergeParameters(NRawClient::SerializeParametersForTrimRows(Context_.Config->Prefix, path, options));
TRequestConfig config;
config.IsHeavy = true;
- RequestWithoutRetry(Context_, mutationId, header, /*body*/ {}, config);
+ RequestWithoutRetry(Context_, mutationId, header, /*body*/ {}, config)->GetResponse();
}
TNode::TListType THttpRawClient::LookupRows(
@@ -737,7 +783,7 @@ TNode::TListType THttpRawClient::LookupRows(
TRequestConfig config;
config.IsHeavy = true;
auto responseInfo = RequestWithoutRetry(Context_, mutationId, header, body, config);
- return NodeFromYsonString(responseInfo.Response, ::NYson::EYsonType::ListFragment).AsList();
+ return NodeFromYsonString(responseInfo->GetResponse(), ::NYson::EYsonType::ListFragment).AsList();
}
TNode::TListType THttpRawClient::SelectRows(
@@ -769,7 +815,44 @@ TNode::TListType THttpRawClient::SelectRows(
TRequestConfig config;
config.IsHeavy = true;
auto responseInfo = RequestWithoutRetry(Context_, mutationId, header, /*body*/ {}, config);
- return NodeFromYsonString(responseInfo.Response, ::NYson::EYsonType::ListFragment).AsList();
+ return NodeFromYsonString(responseInfo->GetResponse(), ::NYson::EYsonType::ListFragment).AsList();
+}
+
+std::unique_ptr<IInputStream> THttpRawClient::ReadTable(
+ const TTransactionId& transactionId,
+ const TRichYPath& path,
+ const TMaybe<TFormat>& format,
+ const TTableReaderOptions& options)
+{
+ TMutationId mutationId;
+ THttpHeader header("GET", GetReadTableCommand(Context_.Config->ApiVersion));
+ header.SetOutputFormat(format);
+ header.SetResponseCompression(ToString(Context_.Config->AcceptEncoding));
+ header.MergeParameters(NRawClient::SerializeParamsForReadTable(transactionId, Context_.Config->Prefix, path, options));
+ header.MergeParameters(FormIORequestParameters(path, options));
+
+ TRequestConfig config;
+ config.IsHeavy = true;
+ auto responseInfo = RequestWithoutRetry(Context_, mutationId, header, /*body*/ {}, config);
+ return std::make_unique<NHttpClient::THttpResponseStream>(std::move(responseInfo));
+}
+
+std::unique_ptr<IInputStream> THttpRawClient::ReadBlobTable(
+ const TTransactionId& transactionId,
+ const TRichYPath& path,
+ const TKey& key,
+ const TBlobTableReaderOptions& options)
+{
+ TMutationId mutationId;
+ THttpHeader header("GET", "read_blob_table");
+ header.SetOutputFormat(TMaybe<TFormat>()); // Binary format
+ header.SetResponseCompression(ToString(Context_.Config->AcceptEncoding));
+ header.MergeParameters(NRawClient::SerializeParamsForReadBlobTable(transactionId, path, key, options));
+
+ TRequestConfig config;
+ config.IsHeavy = true;
+ auto responseInfo = RequestWithoutRetry(Context_, mutationId, header, /*body*/ {}, config);
+ return std::make_unique<NHttpClient::THttpResponseStream>(std::move(responseInfo));
}
void THttpRawClient::AlterTable(
@@ -781,7 +864,7 @@ void THttpRawClient::AlterTable(
THttpHeader header("POST", "alter_table");
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeParamsForAlterTable(transactionId, Context_.Config->Prefix, path, options));
- RequestWithoutRetry(Context_, mutationId, header);
+ RequestWithoutRetry(Context_, mutationId, header)->GetResponse();
}
void THttpRawClient::AlterTableReplica(
@@ -792,7 +875,7 @@ void THttpRawClient::AlterTableReplica(
THttpHeader header("POST", "alter_table_replica");
header.AddMutationId();
header.MergeParameters(NRawClient::SerializeParamsForAlterTableReplica(replicaId, options));
- RequestWithoutRetry(Context_, mutationId, header);
+ RequestWithoutRetry(Context_, mutationId, header)->GetResponse();
}
void THttpRawClient::DeleteRows(
@@ -808,7 +891,7 @@ void THttpRawClient::DeleteRows(
auto body = NodeListToYsonString(keys);
TRequestConfig config;
config.IsHeavy = true;
- RequestWithoutRetry(Context_, mutationId, header, body, config);
+ RequestWithoutRetry(Context_, mutationId, header, body, config)->GetResponse();
}
void THttpRawClient::FreezeTable(
@@ -818,7 +901,7 @@ void THttpRawClient::FreezeTable(
TMutationId mutationId;
THttpHeader header("POST", "freeze_table");
header.MergeParameters(NRawClient::SerializeParamsForFreezeTable(Context_.Config->Prefix, path, options));
- RequestWithoutRetry(Context_, mutationId, header);
+ RequestWithoutRetry(Context_, mutationId, header)->GetResponse();
}
void THttpRawClient::UnfreezeTable(
@@ -828,7 +911,7 @@ void THttpRawClient::UnfreezeTable(
TMutationId mutationId;
THttpHeader header("POST", "unfreeze_table");
header.MergeParameters(NRawClient::SerializeParamsForUnfreezeTable(Context_.Config->Prefix, path, options));
- RequestWithoutRetry(Context_, mutationId, header);
+ RequestWithoutRetry(Context_, mutationId, header)->GetResponse();
}
TCheckPermissionResponse THttpRawClient::CheckPermission(
@@ -841,7 +924,7 @@ TCheckPermissionResponse THttpRawClient::CheckPermission(
THttpHeader header("GET", "check_permission");
header.MergeParameters(NRawClient::SerializeParamsForCheckPermission(user, permission, Context_.Config->Prefix, path, options));
auto responseInfo = RequestWithoutRetry(Context_, mutationId, header);
- return NRawClient::ParseCheckPermissionResponse(NodeFromYsonString(responseInfo.Response));
+ return NRawClient::ParseCheckPermissionResponse(NodeFromYsonString(responseInfo->GetResponse()));
}
TVector<TTabletInfo> THttpRawClient::GetTabletInfos(
@@ -854,7 +937,7 @@ TVector<TTabletInfo> THttpRawClient::GetTabletInfos(
header.MergeParameters(NRawClient::SerializeParamsForGetTabletInfos(Context_.Config->Prefix, path, tabletIndexes, options));
auto responseInfo = RequestWithoutRetry(Context_, mutationId, header);
TVector<TTabletInfo> result;
- Deserialize(result, *NodeFromYsonString(responseInfo.Response).AsMap().FindPtr("tablets"));
+ Deserialize(result, *NodeFromYsonString(responseInfo->GetResponse()).AsMap().FindPtr("tablets"));
return result;
}
@@ -870,7 +953,7 @@ TVector<TTableColumnarStatistics> THttpRawClient::GetTableColumnarStatistics(
config.IsHeavy = true;
auto responseInfo = RequestWithoutRetry(Context_, mutationId, header, /*body*/ {}, config);
TVector<TTableColumnarStatistics> result;
- Deserialize(result, NodeFromYsonString(responseInfo.Response));
+ Deserialize(result, NodeFromYsonString(responseInfo->GetResponse()));
return result;
}
@@ -886,7 +969,7 @@ TMultiTablePartitions THttpRawClient::GetTablePartitions(
config.IsHeavy = true;
auto responseInfo = RequestWithoutRetry(Context_, mutationId, header, /*body*/ {}, config);
TMultiTablePartitions result;
- Deserialize(result, NodeFromYsonString(responseInfo.Response));
+ Deserialize(result, NodeFromYsonString(responseInfo->GetResponse()));
return result;
}
@@ -897,7 +980,7 @@ ui64 THttpRawClient::GenerateTimestamp()
TRequestConfig config;
config.IsHeavy = true;
auto responseInfo = RequestWithoutRetry(Context_, mutationId, header, /*body*/ {}, config);
- return NodeFromYsonString(responseInfo.Response).AsUint64();
+ return NodeFromYsonString(responseInfo->GetResponse()).AsUint64();
}
TAuthorizationInfo THttpRawClient::WhoAmI()
@@ -908,7 +991,7 @@ TAuthorizationInfo THttpRawClient::WhoAmI()
TAuthorizationInfo result;
NJson::TJsonValue jsonValue;
- bool ok = NJson::ReadJsonTree(requestResult.Response, &jsonValue, /*throwOnError*/ true);
+ bool ok = NJson::ReadJsonTree(requestResult->GetResponse(), &jsonValue, /*throwOnError*/ true);
Y_ABORT_UNLESS(ok);
result.Login = jsonValue["login"].GetString();
result.Realm = jsonValue["realm"].GetString();
diff --git a/yt/cpp/mapreduce/raw_client/raw_client.h b/yt/cpp/mapreduce/raw_client/raw_client.h
index 08015f024f..e540d1b331 100644
--- a/yt/cpp/mapreduce/raw_client/raw_client.h
+++ b/yt/cpp/mapreduce/raw_client/raw_client.h
@@ -202,6 +202,18 @@ public:
const TOperationId& operationId,
const TGetJobTraceOptions& options = {}) override;
+ // SkyShare
+
+ NHttpClient::IHttpResponsePtr SkyShareTable(
+ const std::vector<TYPath>& tablePaths,
+ const TSkyShareTableOptions& options = {}) override;
+
+ // Files
+ std::unique_ptr<IInputStream> ReadFile(
+ const TTransactionId& transactionId,
+ const TRichYPath& path,
+ const TFileReaderOptions& options = {}) override;
+
// File cache
TMaybe<TYPath> GetFileFromCache(
@@ -266,6 +278,18 @@ public:
const TString& query,
const TSelectRowsOptions& options = {}) override;
+ std::unique_ptr<IInputStream> ReadTable(
+ const TTransactionId& transactionId,
+ const TRichYPath& path,
+ const TMaybe<TFormat>& format,
+ const TTableReaderOptions& options = {}) override;
+
+ std::unique_ptr<IInputStream> ReadBlobTable(
+ const TTransactionId& transactionId,
+ const TRichYPath& path,
+ const TKey& key,
+ const TBlobTableReaderOptions& options = {}) override;
+
void AlterTable(
TMutationId& mutationId,
const TTransactionId& transactionId,
diff --git a/yt/cpp/mapreduce/raw_client/raw_requests.cpp b/yt/cpp/mapreduce/raw_client/raw_requests.cpp
index a3f10e6c41..a3f01da6fc 100644
--- a/yt/cpp/mapreduce/raw_client/raw_requests.cpp
+++ b/yt/cpp/mapreduce/raw_client/raw_requests.cpp
@@ -301,50 +301,6 @@ TCheckPermissionResponse ParseCheckPermissionResponse(const TNode& node)
return result;
}
-TNode::TListType SkyShareTable(
- const IRequestRetryPolicyPtr& retryPolicy,
- const TClientContext& context,
- const std::vector<TYPath>& tablePaths,
- const TSkyShareTableOptions& options)
-{
- THttpHeader header("POST", "api/v1/share", /*IsApi*/ false);
-
- auto proxyName = context.ServerName.substr(0, context.ServerName.find('.'));
-
- auto host = context.Config->SkynetApiHost;
- if (host == "") {
- host = "skynet." + proxyName + ".yt.yandex.net";
- }
-
- TSkyShareTableOptions patchedOptions = options;
-
- if (context.Config->Pool && !patchedOptions.Pool_) {
- patchedOptions.Pool(context.Config->Pool);
- }
-
- header.MergeParameters(NRawClient::SerializeParamsForSkyShareTable(proxyName, context.Config->Prefix, tablePaths, patchedOptions));
- TClientContext skyApiHost({ .ServerName = host, .HttpClient = NHttpClient::CreateDefaultHttpClient() });
- TResponseInfo response = {};
-
- // As documented at https://wiki.yandex-team.ru/yt/userdoc/blob_tables/#shag3.sozdajomrazdachu
- // first request returns HTTP status code 202 (Accepted). And we need retrying until we have 200 (OK).
- while (response.HttpCode != 200) {
- response = RetryRequestWithPolicy(retryPolicy, skyApiHost, header, "");
- TWaitProxy::Get()->Sleep(TDuration::Seconds(5));
- }
-
- if (options.KeyColumns_) {
- return NodeFromJsonString(response.Response)["torrents"].AsList();
- } else {
- TNode torrent;
-
- torrent["key"] = TNode::CreateList();
- torrent["rbtorrent"] = response.Response;
-
- return TNode::TListType{ torrent };
- }
-}
-
TRichYPath CanonizeYPath(
const IRequestRetryPolicyPtr& retryPolicy,
const TClientContext& context,
diff --git a/yt/cpp/mapreduce/raw_client/raw_requests.h b/yt/cpp/mapreduce/raw_client/raw_requests.h
index c60536c86d..bcc9a4bfd7 100644
--- a/yt/cpp/mapreduce/raw_client/raw_requests.h
+++ b/yt/cpp/mapreduce/raw_client/raw_requests.h
@@ -29,7 +29,6 @@ TCheckPermissionResponse ParseCheckPermissionResponse(const TNode& node);
////////////////////////////////////////////////////////////////////////////////
-//
// marks `batchRequest' as executed
void ExecuteBatch(
IRequestRetryPolicyPtr retryPolicy,
@@ -37,14 +36,6 @@ void ExecuteBatch(
TRawBatchRequest& batchRequest,
const TExecuteBatchOptions& options = {});
-// SkyShare
-
-TNode::TListType SkyShareTable(
- const IRequestRetryPolicyPtr& retryPolicy,
- const TClientContext& context,
- const std::vector<TYPath>& tablePaths,
- const TSkyShareTableOptions& options = {});
-
// Misc
TRichYPath CanonizeYPath(
diff --git a/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.cpp b/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.cpp
index 8474bd0edc..2869ddcc0f 100644
--- a/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.cpp
+++ b/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.cpp
@@ -4,6 +4,7 @@
#include <yt/cpp/mapreduce/interface/config.h>
#include <yt/cpp/mapreduce/interface/client_method_options.h>
+#include <yt/cpp/mapreduce/interface/fluent.h>
#include <yt/cpp/mapreduce/interface/operation.h>
#include <yt/cpp/mapreduce/interface/serialize.h>
@@ -639,13 +640,60 @@ TNode SerializeParametersForDeleteRows(
TNode SerializeParametersForTrimRows(
const TString& pathPrefix,
const TYPath& path,
- const TTrimRowsOptions& /* options*/)
+ const TTrimRowsOptions& /*options*/)
{
TNode result;
SetPathParam(&result, pathPrefix, path);
return result;
}
+TNode SerializeParamsForReadTable(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TRichYPath& path,
+ const TTableReaderOptions& options)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ result["control_attributes"] = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("enable_row_index").Value(options.ControlAttributes_.EnableRowIndex_)
+ .Item("enable_range_index").Value(options.ControlAttributes_.EnableRangeIndex_)
+ .EndMap();
+ return result;
+}
+
+TNode SerializeParamsForReadBlobTable(
+ const TTransactionId& transactionId,
+ const TRichYPath& path,
+ const TKey& key,
+ const TBlobTableReaderOptions& options)
+{
+ auto lowerLimitKey = key;
+ lowerLimitKey.Parts_.push_back(options.StartPartIndex_);
+ auto upperLimitKey = key;
+ upperLimitKey.Parts_.push_back(std::numeric_limits<i64>::max());
+
+ TNode result = PathToParamNode(
+ TRichYPath(path).
+ AddRange(TReadRange()
+ .LowerLimit(TReadLimit().Key(lowerLimitKey))
+ .UpperLimit(TReadLimit().Key(upperLimitKey))));
+
+ SetTransactionIdParam(&result, transactionId);
+
+ result["start_part_index"] = options.StartPartIndex_;
+ result["offset"] = options.Offset_;
+ if (options.PartIndexColumnName_) {
+ result["part_index_column_name"] = *options.PartIndexColumnName_;
+ }
+ if (options.DataColumnName_) {
+ result["data_column_name"] = *options.DataColumnName_;
+ }
+ result["part_size"] = options.PartSize_;
+ return result;
+}
+
TNode SerializeParamsForParseYPath(const TRichYPath& path)
{
TNode result;
diff --git a/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.h b/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.h
index 655198248c..acbf003b5c 100644
--- a/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.h
+++ b/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.h
@@ -146,6 +146,18 @@ TNode SerializeParametersForTrimRows(
const TYPath& path,
const TTrimRowsOptions& options);
+TNode SerializeParamsForReadTable(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TRichYPath& path,
+ const TTableReaderOptions& options);
+
+TNode SerializeParamsForReadBlobTable(
+ const TTransactionId& transactionId,
+ const TRichYPath& path,
+ const TKey& key,
+ const TBlobTableReaderOptions& options);
+
TNode SerializeParamsForParseYPath(
const TRichYPath& path);
diff --git a/yt/python/yt/yson/__init__.py b/yt/python/yt/yson/__init__.py
index 2d5dad9663..ddaec8dd4c 100644
--- a/yt/python/yt/yson/__init__.py
+++ b/yt/python/yt/yson/__init__.py
@@ -54,7 +54,7 @@ except ImportError as error:
print("Warning! Failed to import YSON bindings: " + message, file=_sys.stderr)
try:
- from yt_yson_bindings import upload_parquet, dump_parquet, dump_orc, upload_orc, async_dump_parquet # noqa
+ from yt_yson_bindings import upload_parquet, dump_parquet, dump_orc, upload_orc, async_dump_parquet, async_dump_orc # noqa
HAS_PARQUET = True
except ImportError as error:
message = str(error)
diff --git a/yt/yql/providers/yt/comp_nodes/dq/dq_yt_block_reader.cpp b/yt/yql/providers/yt/comp_nodes/dq/dq_yt_block_reader.cpp
index c8c2b61607..8b3019ffa3 100644
--- a/yt/yql/providers/yt/comp_nodes/dq/dq_yt_block_reader.cpp
+++ b/yt/yql/providers/yt/comp_nodes/dq/dq_yt_block_reader.cpp
@@ -368,7 +368,7 @@ public:
LocalListeners_.reserve(Inputs_.size());
for (size_t i = 0; i < Inputs_.size(); ++i) {
auto& decoder = Settings_->Specs->Inputs[Settings_->OriginalIndexes[i]];
- bool native = decoder->NativeYtTypeFlags && !decoder->FieldsVec[i].ExplicitYson;
+ bool native = decoder->NativeYtTypeFlags;
LocalListeners_.emplace_back(std::make_shared<TLocalListener>(Listener_, Settings_->ColumnNameMapping, ptr, types, *Settings_->Pool, Settings_->PgBuilder, native, jobStats));
LocalListeners_.back()->Init(LocalListeners_.back());
}
diff --git a/yt/yql/providers/yt/gateway/native/ut/ya.make b/yt/yql/providers/yt/gateway/native/ut/ya.make
index 702a53d5dd..23f262c22b 100644
--- a/yt/yql/providers/yt/gateway/native/ut/ya.make
+++ b/yt/yql/providers/yt/gateway/native/ut/ya.make
@@ -1,5 +1,3 @@
-IF (NOT OPENSOURCE)
-
UNITTEST()
SRCS(
@@ -11,7 +9,7 @@ PEERDIR(
yt/yql/providers/yt/gateway/file
yt/yql/providers/yt/codec/codegen
yt/yql/providers/yt/comp_nodes/llvm14
- yql/essentials/core/ut_common
+ yt/yql/providers/yt/lib/ut_common
library/cpp/testing/mock_server
library/cpp/testing/common
yql/essentials/public/udf/service/terminate_policy
@@ -24,5 +22,3 @@ YQL_LAST_ABI_VERSION()
END()
-ENDIF()
-
diff --git a/yt/yql/providers/yt/gateway/native/ut/yql_yt_native_folders_ut.cpp b/yt/yql/providers/yt/gateway/native/ut/yql_yt_native_folders_ut.cpp
new file mode 100644
index 0000000000..b08db52a50
--- /dev/null
+++ b/yt/yql/providers/yt/gateway/native/ut/yql_yt_native_folders_ut.cpp
@@ -0,0 +1,366 @@
+#include "library/cpp/testing/unittest/registar.h"
+#include <library/cpp/yson/node/node_io.h>
+#include <yt/yql/providers/yt/lib/ut_common/yql_ut_common.h>
+#include <library/cpp/testing/common/network.h>
+#include <library/cpp/testing/mock_server/server.h>
+#include <yt/yql/providers/yt/gateway/native/yql_yt_native.h>
+#include <yql/essentials/core/file_storage/proto/file_storage.pb.h>
+#include <yql/essentials/providers/common/proto/gateways_config.pb.h>
+#include <yt/yql/providers/yt/provider/yql_yt_provider.h>
+
+namespace NYql {
+
+namespace {
+
+constexpr auto CYPRES_TX_ID = "\"9518f6d4-f0480586-41103e8-ca595920\"";
+constexpr auto CYPRES_NODE_A_CONTENT = R"(
+[
+ {
+ output = [
+ <
+ "user_attributes" = {};
+ "type" = "table";
+ > "a";
+ <
+ "user_attributes" = {};
+ "type" = "table";
+ > "b";
+ <
+ "user_attributes" = {};
+ "target_path" = "//link_dest";
+ "broken" = %false;
+ "type" = "link";
+ > "link";
+ <
+ "user_attributes" = {};
+ "target_path" = "//link_broken_dest";
+ "broken" = %true;
+ "type" = "link";
+ > "link_broken";
+ <
+ "user_attributes" = {};
+ "target_path" = "//link_access_denied";
+ "broken" = %false;
+ "type" = "link";
+ > "link_access_denied";
+ ];
+ };
+]
+)";
+
+constexpr auto CYPRES_NODE_W_LINK = R"(
+[
+ {
+ output = [
+ <
+ "target_path" = "//link_dest";
+ "broken" = %false;
+ "type" = "link";
+ > "link";
+ ];
+ }
+]
+)";
+
+constexpr auto CYPRES_LINK_DEST = R"(
+[
+ {
+ "output" = <
+ "user_attributes" = {};
+ "type" = "table";
+ > #;
+ };
+]
+)";
+
+constexpr auto CYPRES_ACCESS_ERROR = R"(
+[
+ {
+ "error" = {
+ "code" = 901;
+ "message" = "Access denied";
+ }
+ }
+]
+)";
+
+constexpr auto CYPRESS_BLACKBOX_ERROR = R"(
+[
+ {
+ "error" = {
+ "code" = 111;
+ "message" = "Blackbox rejected token";
+ }
+ }
+]
+)";
+
+TVector<IYtGateway::TFolderResult::TFolderItem> EXPECTED_ITEMS {
+ {"test/a/a", "table", R"({"user_attributes"={}})"},
+ {"test/a/b", "table", R"({"user_attributes"={}})"},
+ {"test/a/link", "table", R"({"user_attributes"={}})"},
+ {"test/a/link_access_denied", "unknown", "{}"}
+};
+
+TGatewaysConfig MakeGatewaysConfig(size_t port)
+{
+ TGatewaysConfig config {};
+ auto* clusters = config.MutableYt()->MutableClusterMapping();
+ NYql::TYtClusterConfig cluster;
+ cluster.SetName("ut_cluster");
+ cluster.SetYTName("ut_cluster");
+ cluster.SetCluster("localhost:" + ToString(port));
+ clusters->Add(std::move(cluster));
+ return config;
+}
+
+class TYtReplier : public TRequestReplier {
+public:
+ using THandler = std::function<THttpResponse(TStringBuf path, const NYT::TNode& attributes)>;
+
+ bool DoReply(const TReplyParams& params) override {
+ const TParsedHttpFull parsed(params.Input.FirstLine());
+ Cout << parsed.Path << Endl;
+
+ HttpCodes code = HTTP_NOT_FOUND;
+ TString content;
+ if (parsed.Path == "/api/v3/start_tx") {
+ content = CYPRES_TX_ID;
+ code = HTTP_OK;
+ }
+ else if (parsed.Path == "/api/v3/ping_tx") {
+ code = HTTP_OK;
+ }
+ else if (parsed.Path == "/api/v3/execute_batch") {
+ auto executeBatchRes = HandleExecuteBatch(params.Input);
+ executeBatchRes.OutTo(params.Output);
+ return true;
+ }
+ THttpResponse resp(code);
+ resp.SetContent(content);
+ resp.OutTo(params.Output);
+
+ return true;
+ }
+ explicit TYtReplier(THandler handleListCommand, THandler handleGetCommand, TMaybe<std::function<void(const NYT::TNode& request)>> assertion):
+ HandleListCommand_(handleListCommand), HandleGetCommand_(handleGetCommand) {
+ if (assertion) {
+ Assertion_ = assertion.GetRef();
+ }
+ }
+
+private:
+ THttpResponse HandleExecuteBatch(THttpInput& input) {
+ auto requestBody = input.ReadAll();
+ auto requestBodyNode = NYT::NodeFromYsonString(requestBody);
+ if (!requestBodyNode.HasKey("requests")) {
+ return THttpResponse{HTTP_INTERNAL_SERVER_ERROR};
+ }
+ auto& requests = requestBodyNode["requests"];
+ if (!requests.IsList()) {
+ return THttpResponse{HTTP_INTERNAL_SERVER_ERROR};
+ }
+ for (auto& request : requests.AsList()) {
+ Assertion_(request);
+
+ const auto& command = request["command"];
+ const auto& parameters = request["parameters"];
+ const auto& path = parameters["path"].AsString();
+ const auto& attributes = parameters.HasKey("attributes") ? parameters["attributes"] : NYT::TNode{};
+ if (command == "list") {
+ return HandleListCommand_(path, attributes);
+ }
+ if (command == "get") {
+ return HandleGetCommand_(path, attributes);
+ }
+ }
+ return THttpResponse{HTTP_NOT_FOUND};
+ }
+
+ std::function<void(const NYT::TNode& request)> Assertion_ = [] ([[maybe_unused]] auto _) {};
+ THandler HandleListCommand_;
+ THandler HandleGetCommand_;
+
+};
+
+Y_UNIT_TEST_SUITE(YtNativeGateway) {
+
+std::pair<TIntrusivePtr<TYtState>, IYtGateway::TPtr> InitTest(const NTesting::TPortHolder& port, TTypeAnnotationContext* types) {
+ TYtNativeServices nativeServices;
+ auto gatewaysConfig = MakeGatewaysConfig(port);
+ nativeServices.Config = std::make_shared<TYtGatewayConfig>(gatewaysConfig.GetYt());
+ nativeServices.FileStorage = CreateFileStorage(TFileStorageConfig{});
+
+ auto ytGateway = CreateYtNativeGateway(nativeServices);
+ auto ytState = MakeIntrusive<TYtState>(types);
+ ytState->Gateway = ytGateway;
+
+ InitializeYtGateway(ytGateway, ytState);
+ return {ytState, ytGateway};
+}
+
+IYtGateway::TFolderResult GetFolderResult(TYtReplier::THandler handleList, TYtReplier::THandler handleGet,
+TMaybe<std::function<void(const NYT::TNode& request)>> gatewayRequestAssertion, std::function<IYtGateway::TFolderOptions(TString)> makeFolderOptions) {
+ const auto port = NTesting::GetFreePort();
+ NMock::TMockServer mockServer{port,
+ [gatewayRequestAssertion, handleList, handleGet] () {return new TYtReplier(handleList, handleGet, gatewayRequestAssertion);}
+ };
+
+ TTypeAnnotationContext types;
+ auto [ytState, ytGateway] = InitTest(port, &types);
+
+ IYtGateway::TFolderOptions folderOptions = makeFolderOptions(ytState->SessionId);
+ auto folderFuture = ytGateway->GetFolder(std::move(folderOptions));
+
+ folderFuture.Wait();
+ ytState->Gateway->CloseSession({ytState->SessionId});
+ auto folderRes = folderFuture.GetValue();
+ return folderRes;
+}
+
+Y_UNIT_TEST(GetFolder) {
+ THashMap<TString, THashSet<TString>> requiredAttributes {
+ {"//test/a", {"type", "broken", "target_path", "user_attributes"}},
+ {"//link_dest", {"type", "user_attributes"}}
+ };
+ const auto checkRequiredAttributes = [&requiredAttributes] (const NYT::TNode& request) {
+ const auto& parameters = request["parameters"];
+ const auto path = parameters["path"].AsString();
+ const auto& attributes = parameters.HasKey("attributes") ? parameters["attributes"] : NYT::TNode{};
+
+ if (!requiredAttributes.contains(path)) {
+ return;
+ }
+
+ THashSet<TString> attributesSet;
+ for (const auto& attribute : attributes.AsList()) {
+ attributesSet.insert(attribute.AsString());
+ }
+ UNIT_ASSERT_VALUES_EQUAL(requiredAttributes[path], attributesSet);
+ };
+
+ const auto handleGet = [] (TStringBuf path, const NYT::TNode& attributes) {
+ Y_UNUSED(attributes);
+ THttpResponse resp{HTTP_OK};
+ if (path == "//link_dest") {
+ resp.SetContent(CYPRES_LINK_DEST);
+ return resp;
+ }
+ if (path == "//link_access_denied") {
+ resp.SetContent(CYPRES_ACCESS_ERROR);
+ return resp;
+ }
+
+ return THttpResponse{HTTP_NOT_FOUND};
+ };
+
+ const auto handleList = [] (TStringBuf path, const NYT::TNode& attributes) {
+ Y_UNUSED(attributes);
+ THttpResponse resp{HTTP_OK};
+ if (path == "//test/a") {
+ resp.SetContent(CYPRES_NODE_A_CONTENT);
+ return resp;
+ }
+ return THttpResponse{HTTP_NOT_FOUND};
+ };
+
+ const auto makeFolderOptions = [] (const TString& sessionId) {
+ IYtGateway::TFolderOptions folderOptions{sessionId};
+ TYtSettings ytSettings {};
+ folderOptions.Cluster("ut_cluster")
+ .Config(std::make_shared<TYtSettings>(ytSettings))
+ .Prefix("//test/a")
+ .Attributes({"user_attributes"});
+ return folderOptions;
+ };
+
+ auto folderRes
+ = GetFolderResult(handleList, handleGet, checkRequiredAttributes, makeFolderOptions);
+
+ UNIT_ASSERT_EQUAL_C(folderRes.Success(), true, folderRes.Issues().ToString());
+ UNIT_ASSERT_EQUAL(
+ folderRes.ItemsOrFileLink,
+ (std::variant<TVector<IYtGateway::TFolderResult::TFolderItem>, TFileLinkPtr>(EXPECTED_ITEMS)));
+ }
+
+Y_UNIT_TEST(EmptyResolveIsNotError) {
+ const auto port = NTesting::GetFreePort();
+
+ const auto handleList = [] (TStringBuf path, const NYT::TNode& attributes) {
+ Y_UNUSED(path);
+ Y_UNUSED(attributes);
+
+ THttpResponse resp{HTTP_OK};
+ resp.SetContent(CYPRES_NODE_W_LINK);
+ return resp;
+ };
+
+ const auto handleGet = [] (TStringBuf path, const NYT::TNode& attributes) {
+ Y_UNUSED(path);
+ Y_UNUSED(attributes);
+
+ THttpResponse resp{HTTP_OK};
+ resp.SetContent(CYPRES_ACCESS_ERROR);
+ return resp;
+ };
+
+ const auto makeFolderOptions = [] (const TString& sessionId) {
+ IYtGateway::TFolderOptions folderOptions{sessionId};
+ TYtSettings ytSettings {};
+ folderOptions.Cluster("ut_cluster")
+ .Config(std::make_shared<TYtSettings>(ytSettings))
+ .Prefix("//test/a")
+ .Attributes({"user_attributes"});
+ return folderOptions;
+ };
+
+ auto folderRes
+ = GetFolderResult(handleList, handleGet, Nothing(), makeFolderOptions);
+
+ UNIT_ASSERT_EQUAL_C(folderRes.Success(), true, folderRes.Issues().ToString());
+}
+
+Y_UNIT_TEST(GetFolderException) {
+ const auto port = NTesting::GetFreePort();
+
+ const auto handleList = [] (TStringBuf path, const NYT::TNode& attributes) {
+ Y_UNUSED(path);
+ Y_UNUSED(attributes);
+
+ THttpResponse resp{HTTP_UNAUTHORIZED};
+ auto header = R"({"code":900,"message":"Authentication failed"})";
+ resp.AddHeader(THttpInputHeader("X-YT-Error", header));
+ resp.SetContent(CYPRESS_BLACKBOX_ERROR);
+ return resp;
+ };
+
+ const auto handleGet = [] (TStringBuf path, const NYT::TNode& attributes) {
+ Y_UNUSED(path);
+ Y_UNUSED(attributes);
+
+ THttpResponse resp{HTTP_OK};
+ resp.SetContent("");
+ return resp;
+ };
+
+ const auto makeFolderOptions = [] (const TString& sessionId) {
+ IYtGateway::TFolderOptions folderOptions{sessionId};
+ TYtSettings ytSettings {};
+ folderOptions.Cluster("ut_cluster")
+ .Config(std::make_shared<TYtSettings>(ytSettings))
+ .Prefix("//test/a")
+ .Attributes({"user_attributes"});
+ return folderOptions;
+ };
+
+ const auto folderRes
+ = GetFolderResult(handleList, handleGet, Nothing(), makeFolderOptions);
+
+ UNIT_ASSERT(!folderRes.Issues().Empty());
+ UNIT_ASSERT_STRING_CONTAINS(folderRes.Issues().ToString(), "Authentication failed");
+}
+}
+
+} // namespace
+
+} // namespace NYql
diff --git a/yt/yql/providers/yt/lib/ut_common/ya.make b/yt/yql/providers/yt/lib/ut_common/ya.make
new file mode 100644
index 0000000000..4084a3d770
--- /dev/null
+++ b/yt/yql/providers/yt/lib/ut_common/ya.make
@@ -0,0 +1,16 @@
+LIBRARY()
+
+SRCS(
+ yql_ut_common.cpp
+ yql_ut_common.h
+)
+
+PEERDIR(
+ yql/essentials/core
+ yql/essentials/core/expr_nodes
+)
+
+YQL_LAST_ABI_VERSION()
+
+END()
+
diff --git a/yt/yql/providers/yt/lib/ut_common/yql_ut_common.cpp b/yt/yql/providers/yt/lib/ut_common/yql_ut_common.cpp
new file mode 100644
index 0000000000..cef3f2723c
--- /dev/null
+++ b/yt/yql/providers/yt/lib/ut_common/yql_ut_common.cpp
@@ -0,0 +1,55 @@
+#include "yql_ut_common.h"
+
+#include <library/cpp/random_provider/random_provider.h>
+#include <library/cpp/time_provider/time_provider.h>
+
+#include <util/generic/guid.h>
+#include <util/system/user.h>
+#include <util/stream/file.h>
+
+namespace NYql {
+
+TTestTablesMapping::TTestTablesMapping()
+ : TmpInput()
+ , TmpInputAttr(TmpInput.Name() + ".attr")
+ , TmpOutput()
+ , TmpOutputAttr(TmpOutput.Name() + ".attr")
+{
+ {
+ TUnbufferedFileOutput tmpInput(TmpInput);
+ tmpInput << "{\"key\"=\"\";\"subkey\"=\"\";\"value\"=\"\"}" << Endl;
+ TUnbufferedFileOutput tmpInputAttr(TmpInputAttr);
+ tmpInputAttr << "{\"_yql_row_spec\" = {\"Type\" = [\"StructType\";["
+ << "[\"key\";[\"DataType\";\"String\"]];"
+ << "[\"subkey\";[\"DataType\";\"String\"]];"
+ << "[\"value\";[\"DataType\";\"String\"]]"
+ << "]]}}" << Endl;
+ }
+ insert(std::make_pair("yt.plato.Input", TmpInput.Name()));
+
+ {
+ TUnbufferedFileOutput tmpOutput(TmpOutput);
+ tmpOutput << "{\"key\"=\"\";\"subkey\"=\"\";\"value\"=\"\"}" << Endl;
+ TUnbufferedFileOutput tmpOutputAttr(TmpOutputAttr);
+ tmpOutputAttr << "{\"_yql_row_spec\" = {\"Type\" = [\"StructType\";["
+ << "[\"key\";[\"DataType\";\"String\"]];"
+ << "[\"subkey\";[\"DataType\";\"String\"]];"
+ << "[\"value\";[\"DataType\";\"String\"]]"
+ << "]]}}" << Endl;
+ }
+ insert(std::make_pair("yt.plato.Output", TmpOutput.Name()));
+}
+
+void InitializeYtGateway(IYtGateway::TPtr gateway, TYtState::TPtr ytState) {
+ ytState->SessionId = CreateGuidAsString();
+ gateway->OpenSession(
+ IYtGateway::TOpenSessionOptions(ytState->SessionId)
+ .UserName(GetUsername())
+ .ProgressWriter(&NullProgressWriter)
+ .OperationOptions(TYqlOperationOptions())
+ .RandomProvider(CreateDeterministicRandomProvider(1))
+ .TimeProvider(CreateDeterministicTimeProvider(10000000))
+ );
+}
+
+}
diff --git a/yt/yql/providers/yt/lib/ut_common/yql_ut_common.h b/yt/yql/providers/yt/lib/ut_common/yql_ut_common.h
new file mode 100644
index 0000000000..ddee02690a
--- /dev/null
+++ b/yt/yql/providers/yt/lib/ut_common/yql_ut_common.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <yql/essentials/core/yql_expr_type_annotation.h>
+
+#include <yt/yql/providers/yt/gateway/file/yql_yt_file.h>
+#include <yt/yql/providers/yt/provider/yql_yt_provider.h>
+
+#include <util/system/tempfile.h>
+
+namespace NYql {
+
+struct TTestTablesMapping: public THashMap<TString, TString> {
+ TTempFileHandle TmpInput;
+ TTempFileHandle TmpInputAttr;
+ TTempFileHandle TmpOutput;
+ TTempFileHandle TmpOutputAttr;
+
+ TTestTablesMapping();
+};
+
+void InitializeYtGateway(IYtGateway::TPtr gateway, TYtState::TPtr ytState);
+
+}
diff --git a/yt/yql/providers/yt/provider/ut/ya.make b/yt/yql/providers/yt/provider/ut/ya.make
index 3b29f30999..888bfe2d25 100644
--- a/yt/yql/providers/yt/provider/ut/ya.make
+++ b/yt/yql/providers/yt/provider/ut/ya.make
@@ -17,7 +17,7 @@ PEERDIR(
yt/yql/providers/yt/gateway/file
yt/yql/providers/yt/codec/codegen
yt/yql/providers/yt/comp_nodes/llvm14
- yql/essentials/core/ut_common
+ yt/yql/providers/yt/lib/ut_common
yql/essentials/ast
yql/essentials/public/udf/service/terminate_policy
yql/essentials/core/services
@@ -38,3 +38,4 @@ YQL_LAST_ABI_VERSION()
END()
ENDIF()
+
diff --git a/yt/yt/client/arrow/arrow_row_stream_encoder.cpp b/yt/yt/client/arrow/arrow_row_stream_encoder.cpp
index 25a403790c..1d266f3c71 100644
--- a/yt/yt/client/arrow/arrow_row_stream_encoder.cpp
+++ b/yt/yt/client/arrow/arrow_row_stream_encoder.cpp
@@ -1,8 +1,5 @@
#include "arrow_row_stream_encoder.h"
-#include <yt/yt/client/arrow/fbs/Message.fbs.h>
-#include <yt/yt/client/arrow/fbs/Schema.fbs.h>
-
#include <yt/yt/client/api/rpc_proxy/row_stream.h>
#include <yt/yt/client/api/rpc_proxy/wire_row_stream.h>
@@ -14,6 +11,8 @@
#include <yt/yt/client/table_client/schema.h>
#include <yt/yt/client/table_client/columnar.h>
+#include <yt/yt/library/formats/format.h>
+
#include <yt/yt/core/misc/error.h>
#include <yt/yt/core/misc/range.h>
@@ -32,651 +31,6 @@ static constexpr auto& Logger = ArrowLogger;
namespace {
-using TBatchColumn = IUnversionedColumnarRowBatch::TColumn;
-using TBodyWriter = std::function<void(TMutableRef)>;
-
-constexpr i64 ArrowAlignment = 8;
-
-flatbuffers::Offset<flatbuffers::String> SerializeString(
- flatbuffers::FlatBufferBuilder* flatbufBuilder,
- const std::string& str)
-{
- return flatbufBuilder->CreateString(str.data(), str.length());
-}
-
-std::tuple<org::apache::arrow::flatbuf::Type, flatbuffers::Offset<void>> SerializeColumnType(
- flatbuffers::FlatBufferBuilder* flatbufBuilder,
- const TColumnSchema& schema)
-{
- auto simpleType = CastToV1Type(schema.LogicalType()).first;
- switch (simpleType) {
- case ESimpleLogicalValueType::Null:
- case ESimpleLogicalValueType::Void:
- return std::tuple(
- org::apache::arrow::flatbuf::Type_Null,
- org::apache::arrow::flatbuf::CreateNull(*flatbufBuilder)
- .Union());
-
- case ESimpleLogicalValueType::Int64:
- case ESimpleLogicalValueType::Uint64:
- case ESimpleLogicalValueType::Int8:
- case ESimpleLogicalValueType::Uint8:
- case ESimpleLogicalValueType::Int16:
- case ESimpleLogicalValueType::Uint16:
- case ESimpleLogicalValueType::Int32:
- case ESimpleLogicalValueType::Uint32:
- return std::tuple(
- org::apache::arrow::flatbuf::Type_Int,
- org::apache::arrow::flatbuf::CreateInt(
- *flatbufBuilder,
- GetIntegralTypeBitWidth(simpleType),
- IsIntegralTypeSigned(simpleType)).Union());
-
- case ESimpleLogicalValueType::Double:
- return std::tuple(
- org::apache::arrow::flatbuf::Type_FloatingPoint,
- org::apache::arrow::flatbuf::CreateFloatingPoint(
- *flatbufBuilder,
- org::apache::arrow::flatbuf::Precision_DOUBLE)
- .Union());
-
- case ESimpleLogicalValueType::Float:
- return std::tuple(
- org::apache::arrow::flatbuf::Type_FloatingPoint,
- org::apache::arrow::flatbuf::CreateFloatingPoint(
- *flatbufBuilder,
- org::apache::arrow::flatbuf::Precision_SINGLE)
- .Union());
-
- case ESimpleLogicalValueType::Boolean:
- return std::tuple(
- org::apache::arrow::flatbuf::Type_Bool,
- org::apache::arrow::flatbuf::CreateBool(*flatbufBuilder)
- .Union());
-
- case ESimpleLogicalValueType::String:
- case ESimpleLogicalValueType::Any:
- return std::tuple(
- org::apache::arrow::flatbuf::Type_Binary,
- org::apache::arrow::flatbuf::CreateBinary(*flatbufBuilder)
- .Union());
-
- case ESimpleLogicalValueType::Utf8:
- return std::tuple(
- org::apache::arrow::flatbuf::Type_Utf8,
- org::apache::arrow::flatbuf::CreateUtf8(*flatbufBuilder)
- .Union());
-
- // TODO(babenko): the following types are not supported:
- // Date
- // Datetime
- // Interval
- // Timestamp
-
- default:
- THROW_ERROR_EXCEPTION("Column %v has type %Qlv that is not currently supported by Arrow encoder",
- schema.GetDiagnosticNameString(),
- simpleType);
- }
-}
-
-bool IsRleButNotDictionaryEncodedStringLikeColumn(const TBatchColumn& column)
-{
- auto simpleType = CastToV1Type(column.Type).first;
- return
- IsStringLikeType(simpleType) &&
- column.Rle &&
- !column.Rle->ValueColumn->Dictionary;
-}
-
-bool IsRleAndDictionaryEncodedColumn(const TBatchColumn& column)
-{
- return
- column.Rle &&
- column.Rle->ValueColumn->Dictionary;
-}
-
-bool IsDictionaryEncodedColumn(const TBatchColumn& column)
-{
- return
- column.Dictionary ||
- IsRleAndDictionaryEncodedColumn(column) ||
- IsRleButNotDictionaryEncodedStringLikeColumn(column);
-}
-
-struct TTypedBatchColumn
-{
- const TBatchColumn* Column;
- TLogicalTypePtr Type;
-};
-
-struct TRecordBatchBodyPart
-{
- i64 Size;
- TBodyWriter Writer;
-};
-
-struct TRecordBatchSerializationContext final
-{
- explicit TRecordBatchSerializationContext(flatbuffers::FlatBufferBuilder* flatbufBuilder)
- : FlatbufBuilder(flatbufBuilder)
- { }
-
- void AddFieldNode(i64 length, i64 nullCount)
- {
- FieldNodes.emplace_back(length, nullCount);
- }
-
- void AddBuffer(i64 size, TBodyWriter writer)
- {
- YT_LOG_DEBUG("Buffer registered (Offset: %v, Size: %v)",
- CurrentBodyOffset,
- size);
-
- Buffers.emplace_back(CurrentBodyOffset, size);
- CurrentBodyOffset += AlignUp<i64>(size, ArrowAlignment);
- Parts.push_back(TRecordBatchBodyPart{size, std::move(writer)});
- }
-
- flatbuffers::FlatBufferBuilder* const FlatbufBuilder;
-
- i64 CurrentBodyOffset = 0;
- std::vector<org::apache::arrow::flatbuf::FieldNode> FieldNodes;
- std::vector<org::apache::arrow::flatbuf::Buffer> Buffers;
- std::vector<TRecordBatchBodyPart> Parts;
-};
-
-template <class T>
-TMutableRange<T> GetTypedValues(TMutableRef ref)
-{
- return TMutableRange(
- reinterpret_cast<T*>(ref.Begin()),
- reinterpret_cast<T*>(ref.End()));
-}
-
-void SerializeColumnPrologue(
- const TTypedBatchColumn& typedColumn,
- TRecordBatchSerializationContext* context)
-{
- const auto* column = typedColumn.Column;
- if (column->NullBitmap ||
- column->Rle && column->Rle->ValueColumn->NullBitmap)
- {
- if (column->Rle) {
- const auto* valueColumn = column->Rle->ValueColumn;
- auto rleIndexes = column->GetTypedValues<ui64>();
-
- context->AddFieldNode(
- column->ValueCount,
- CountOnesInRleBitmap(
- valueColumn->NullBitmap->Data,
- rleIndexes,
- column->StartIndex,
- column->StartIndex + column->ValueCount));
-
- context->AddBuffer(
- GetBitmapByteSize(column->ValueCount),
- [=] (TMutableRef dstRef) {
- BuildValidityBitmapFromRleNullBitmap(
- valueColumn->NullBitmap->Data,
- rleIndexes,
- column->StartIndex,
- column->StartIndex + column->ValueCount,
- dstRef);
- });
- } else {
- context->AddFieldNode(
- column->ValueCount,
- CountOnesInBitmap(
- column->NullBitmap->Data,
- column->StartIndex,
- column->StartIndex + column->ValueCount));
-
- context->AddBuffer(
- GetBitmapByteSize(column->ValueCount),
- [=] (TMutableRef dstRef) {
- CopyBitmapRangeToBitmapNegated(
- column->NullBitmap->Data,
- column->StartIndex,
- column->StartIndex + column->ValueCount,
- dstRef);
- });
- }
- } else {
- context->AddFieldNode(
- column->ValueCount,
- 0);
-
- context->AddBuffer(
- 0,
- [=] (TMutableRef /*dstRef*/) { });
- }
-}
-
-void SerializeRleButNotDictionaryEncodedStringLikeColumn(
- const TTypedBatchColumn& typedColumn,
- TRecordBatchSerializationContext* context)
-{
- const auto* column = typedColumn.Column;
- YT_VERIFY(column->Values);
- YT_VERIFY(column->Values->BitWidth == 64);
- YT_VERIFY(column->Values->BaseValue == 0);
- YT_VERIFY(!column->Values->ZigZagEncoded);
-
- YT_LOG_DEBUG("Adding RLE but not dictionary-encoded string-like column (ColumnId: %v, StartIndex: %v, ValueCount: %v)",
- column->Id,
- column->StartIndex,
- column->ValueCount);
-
- SerializeColumnPrologue(typedColumn, context);
-
- auto rleIndexes = column->GetTypedValues<ui64>();
-
- context->AddBuffer(
- sizeof (ui32) * column->ValueCount,
- [=] (TMutableRef dstRef) {
- BuildIotaDictionaryIndexesFromRleIndexes(
- rleIndexes,
- column->StartIndex,
- column->StartIndex + column->ValueCount,
- GetTypedValues<ui32>(dstRef));
- });
-}
-
-void SerializeDictionaryColumn(
- const TTypedBatchColumn& typedColumn,
- TRecordBatchSerializationContext* context)
-{
- const auto* column = typedColumn.Column;
- YT_VERIFY(column->Values);
- YT_VERIFY(column->Dictionary->ZeroMeansNull);
- YT_VERIFY(column->Values->BitWidth == 32);
- YT_VERIFY(column->Values->BaseValue == 0);
- YT_VERIFY(!column->Values->ZigZagEncoded);
-
- YT_LOG_DEBUG("Adding dictionary column (ColumnId: %v, StartIndex: %v, ValueCount: %v, Rle: %v)",
- column->Id,
- column->StartIndex,
- column->ValueCount,
- column->Rle.has_value());
-
- auto relevantDictionaryIndexes = column->GetRelevantTypedValues<ui32>();
-
- context->AddFieldNode(
- column->ValueCount,
- CountNullsInDictionaryIndexesWithZeroNull(relevantDictionaryIndexes));
-
- context->AddBuffer(
- GetBitmapByteSize(column->ValueCount),
- [=] (TMutableRef dstRef) {
- BuildValidityBitmapFromDictionaryIndexesWithZeroNull(
- relevantDictionaryIndexes,
- dstRef);
- });
-
- context->AddBuffer(
- sizeof (ui32) * column->ValueCount,
- [=] (TMutableRef dstRef) {
- BuildDictionaryIndexesFromDictionaryIndexesWithZeroNull(
- relevantDictionaryIndexes,
- GetTypedValues<ui32>(dstRef));
- });
-}
-
-void SerializeRleDictionaryColumn(
- const TTypedBatchColumn& typedColumn,
- TRecordBatchSerializationContext* context)
-{
- const auto* column = typedColumn.Column;
- YT_VERIFY(column->Values);
- YT_VERIFY(column->Values->BitWidth == 64);
- YT_VERIFY(column->Values->BaseValue == 0);
- YT_VERIFY(!column->Values->ZigZagEncoded);
- YT_VERIFY(column->Rle->ValueColumn->Dictionary->ZeroMeansNull);
- YT_VERIFY(column->Rle->ValueColumn->Values->BitWidth == 32);
- YT_VERIFY(column->Rle->ValueColumn->Values->BaseValue == 0);
- YT_VERIFY(!column->Rle->ValueColumn->Values->ZigZagEncoded);
-
- YT_LOG_DEBUG("Adding dictionary column (ColumnId: %v, StartIndex: %v, ValueCount: %v, Rle: %v)",
- column->Id,
- column->StartIndex,
- column->ValueCount,
- column->Rle.has_value());
-
- auto dictionaryIndexes = column->Rle->ValueColumn->GetTypedValues<ui32>();
- auto rleIndexes = column->GetTypedValues<ui64>();
-
- context->AddFieldNode(
- column->ValueCount,
- CountNullsInRleDictionaryIndexesWithZeroNull(
- dictionaryIndexes,
- rleIndexes,
- column->StartIndex,
- column->StartIndex + column->ValueCount));
-
- context->AddBuffer(
- GetBitmapByteSize(column->ValueCount),
- [=] (TMutableRef dstRef) {
- BuildValidityBitmapFromRleDictionaryIndexesWithZeroNull(
- dictionaryIndexes,
- rleIndexes,
- column->StartIndex,
- column->StartIndex + column->ValueCount,
- dstRef);
- });
-
- context->AddBuffer(
- sizeof (ui32) * column->ValueCount,
- [=] (TMutableRef dstRef) {
- BuildDictionaryIndexesFromRleDictionaryIndexesWithZeroNull(
- dictionaryIndexes,
- rleIndexes,
- column->StartIndex,
- column->StartIndex + column->ValueCount,
- GetTypedValues<ui32>(dstRef));
- });
-}
-
-void SerializeIntegerColumn(
- const TTypedBatchColumn& typedColumn,
- ESimpleLogicalValueType simpleType,
- TRecordBatchSerializationContext* context)
-{
- const auto* column = typedColumn.Column;
- YT_VERIFY(column->Values);
-
- YT_LOG_DEBUG("Adding integer column (ColumnId: %v, StartIndex: %v, ValueCount: %v, Rle: %v)",
- column->Id,
- column->StartIndex,
- column->ValueCount,
- column->Rle.has_value());
-
- SerializeColumnPrologue(typedColumn, context);
-
- context->AddBuffer(
- column->ValueCount * GetIntegralTypeByteSize(simpleType),
- [=] (TMutableRef dstRef) {
- const auto* valueColumn = column->Rle
- ? column->Rle->ValueColumn
- : column;
- auto values = valueColumn->GetTypedValues<ui64>();
-
- auto rleIndexes = column->Rle
- ? column->GetTypedValues<ui64>()
- : TRange<ui64>();
-
- auto startIndex = column->StartIndex;
-
- switch (simpleType) {
- #define XX(cppType, ytType) \
- case ESimpleLogicalValueType::ytType: { \
- auto dstValues = GetTypedValues<cppType>(dstRef); \
- auto* currentOutput = dstValues.Begin(); \
- DecodeIntegerVector( \
- startIndex, \
- startIndex + column->ValueCount, \
- valueColumn->Values->BaseValue, \
- valueColumn->Values->ZigZagEncoded, \
- TRange<ui32>(), \
- rleIndexes, \
- [&] (auto index) { \
- return values[index]; \
- }, \
- [&] (auto value) { \
- *currentOutput++ = value; \
- }); \
- break; \
- }
-
- XX( i8, Int8)
- XX( i16, Int16)
- XX( i32, Int32)
- XX( i64, Int64)
- XX( ui8, Uint8)
- XX(ui16, Uint16)
- XX(ui32, Uint32)
- XX(ui64, Uint64)
-
- #undef XX
-
- default:
- THROW_ERROR_EXCEPTION("Integer column %v has unexpected type %Qlv",
- typedColumn.Column->Id,
- simpleType);
- }
- });
-}
-
-void SerializeDoubleColumn(
- const TTypedBatchColumn& typedColumn,
- TRecordBatchSerializationContext* context)
-{
- const auto* column = typedColumn.Column;
- YT_VERIFY(column->Values);
- YT_VERIFY(column->Values->BitWidth == 64);
- YT_VERIFY(column->Values->BaseValue == 0);
- YT_VERIFY(!column->Values->ZigZagEncoded);
-
- YT_LOG_DEBUG(
- "Adding double column (ColumnId: %v, StartIndex: %v, ValueCount: %v, Rle: %v)",
- column->Id,
- column->StartIndex,
- column->ValueCount,
- column->Rle.has_value());
-
- SerializeColumnPrologue(typedColumn, context);
-
- context->AddBuffer(
- column->ValueCount * sizeof(double),
- [=] (TMutableRef dstRef) {
- auto relevantValues = column->GetRelevantTypedValues<double>();
- ::memcpy(
- dstRef.Begin(),
- relevantValues.Begin(),
- column->ValueCount * sizeof(double));
- });
-}
-
-void SerializeFloatColumn(
- const TTypedBatchColumn& typedColumn,
- TRecordBatchSerializationContext* context)
-{
- const auto* column = typedColumn.Column;
- YT_VERIFY(column->Values);
- YT_VERIFY(column->Values->BitWidth == 32);
- YT_VERIFY(column->Values->BaseValue == 0);
- YT_VERIFY(!column->Values->ZigZagEncoded);
-
- YT_LOG_DEBUG(
- "Adding float column (ColumnId: %v, StartIndex: %v, ValueCount: %v, Rle: %v)",
- column->Id,
- column->StartIndex,
- column->ValueCount,
- column->Rle.has_value());
-
- SerializeColumnPrologue(typedColumn, context);
-
- context->AddBuffer(
- column->ValueCount * sizeof(float),
- [=] (TMutableRef dstRef) {
- auto relevantValues = column->GetRelevantTypedValues<float>();
- ::memcpy(
- dstRef.Begin(),
- relevantValues.Begin(),
- column->ValueCount * sizeof(float));
- });
-}
-
-void SerializeStringLikeColumn(
- const TTypedBatchColumn& typedColumn,
- TRecordBatchSerializationContext* context)
-{
- const auto* column = typedColumn.Column;
- YT_VERIFY(column->Values);
- YT_VERIFY(column->Values->BaseValue == 0);
- YT_VERIFY(column->Values->BitWidth == 32);
- YT_VERIFY(column->Values->ZigZagEncoded);
- YT_VERIFY(column->Strings);
- YT_VERIFY(column->Strings->AvgLength);
- YT_VERIFY(!column->Rle);
-
- auto startIndex = column->StartIndex;
- auto endIndex = startIndex + column->ValueCount;
- auto stringData = column->Strings->Data;
- auto avgLength = *column->Strings->AvgLength;
-
- auto offsets = column->GetTypedValues<ui32>();
- auto startOffset = DecodeStringOffset(offsets, avgLength, startIndex);
- auto endOffset = DecodeStringOffset(offsets, avgLength, endIndex);
- auto stringsSize = endOffset - startOffset;
-
- YT_LOG_DEBUG("Adding string-like column (ColumnId: %v, StartIndex: %v, ValueCount: %v, StartOffset: %v, EndOffset: %v, StringsSize: %v)",
- column->Id,
- column->StartIndex,
- column->ValueCount,
- startOffset,
- endOffset,
- stringsSize);
-
- SerializeColumnPrologue(typedColumn, context);
-
- context->AddBuffer(
- sizeof(i32) * (column->ValueCount + 1),
- [=] (TMutableRef dstRef) {
- DecodeStringOffsets(
- offsets,
- avgLength,
- startIndex,
- endIndex,
- GetTypedValues<ui32>(dstRef));
- });
-
- context->AddBuffer(
- stringsSize,
- [=] (TMutableRef dstRef) {
- ::memcpy(
- dstRef.Begin(),
- stringData.Begin() + startOffset,
- stringsSize);
- });
-}
-
-void SerializeBooleanColumn(
- const TTypedBatchColumn& typedColumn,
- TRecordBatchSerializationContext* context)
-{
- const auto* column = typedColumn.Column;
- YT_VERIFY(column->Values);
- YT_VERIFY(!column->Values->ZigZagEncoded);
- YT_VERIFY(column->Values->BaseValue == 0);
- YT_VERIFY(column->Values->BitWidth == 1);
-
- YT_LOG_DEBUG("Adding boolean column (ColumnId: %v, StartIndex: %v, ValueCount: %v)",
- column->Id,
- column->StartIndex,
- column->ValueCount);
-
- SerializeColumnPrologue(typedColumn, context);
-
- context->AddBuffer(
- GetBitmapByteSize(column->ValueCount),
- [=] (TMutableRef dstRef) {
- CopyBitmapRangeToBitmap(
- column->Values->Data,
- column->StartIndex,
- column->StartIndex + column->ValueCount,
- dstRef);
- });
-}
-
-void SerializeNullColumn(
- const TTypedBatchColumn& typedColumn,
- TRecordBatchSerializationContext* context)
-{
- SerializeColumnPrologue(typedColumn, context);
-}
-
-void SerializeColumn(
- const TTypedBatchColumn& typedColumn,
- TRecordBatchSerializationContext* context)
-{
- const auto* column = typedColumn.Column;
-
- if (IsRleButNotDictionaryEncodedStringLikeColumn(*typedColumn.Column)) {
- SerializeRleButNotDictionaryEncodedStringLikeColumn(typedColumn, context);
- return;
- }
-
- if (column->Dictionary) {
- SerializeDictionaryColumn(typedColumn, context);
- return;
- }
-
- if (column->Rle && column->Rle->ValueColumn->Dictionary) {
- SerializeRleDictionaryColumn(typedColumn, context);
- return;
- }
-
- auto simpleType = CastToV1Type(typedColumn.Type).first;
- if (IsIntegralType(simpleType)) {
- SerializeIntegerColumn(typedColumn, simpleType, context);
- } else if (simpleType == ESimpleLogicalValueType::Double) {
- SerializeDoubleColumn(typedColumn, context);
- } else if (simpleType == ESimpleLogicalValueType::Float) {
- SerializeFloatColumn(typedColumn, context);
- } else if (IsStringLikeType(simpleType)) {
- SerializeStringLikeColumn(typedColumn, context);
- } else if (simpleType == ESimpleLogicalValueType::Boolean) {
- SerializeBooleanColumn(typedColumn, context);
- } else if (simpleType == ESimpleLogicalValueType::Null) {
- SerializeNullColumn(typedColumn, context);
- } else if (simpleType == ESimpleLogicalValueType::Void) {
- SerializeNullColumn(typedColumn, context);
- } else {
- THROW_ERROR_EXCEPTION("Column %v has unexpected type %Qlv",
- typedColumn.Column->Id,
- simpleType);
- }
-}
-
-auto SerializeRecordBatch(
- flatbuffers::FlatBufferBuilder* flatbufBuilder,
- int length,
- TRange<TTypedBatchColumn> typedColumns)
-{
- auto context = New<TRecordBatchSerializationContext>(flatbufBuilder);
-
- for (const auto& typedColumn : typedColumns) {
- SerializeColumn(typedColumn, context.Get());
- }
-
- auto fieldNodesOffset = flatbufBuilder->CreateVectorOfStructs(context->FieldNodes);
-
- auto buffersOffset = flatbufBuilder->CreateVectorOfStructs(context->Buffers);
-
- auto recordBatchOffset = org::apache::arrow::flatbuf::CreateRecordBatch(
- *flatbufBuilder,
- length,
- fieldNodesOffset,
- buffersOffset);
-
- auto totalSize = context->CurrentBodyOffset;
-
- return std::tuple(
- recordBatchOffset,
- totalSize,
- [context = std::move(context)] (TMutableRef dstRef) {
- char* current = dstRef.Begin();
- for (const auto& part : context->Parts) {
- part.Writer(TMutableRef(current, current + part.Size));
- current += AlignUp<i64>(part.Size, ArrowAlignment);
- }
- YT_VERIFY(current == dstRef.End());
- });
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
DECLARE_REFCOUNTED_CLASS(TArrowRowStreamEncoder)
class TArrowRowStreamEncoder
@@ -685,460 +39,41 @@ class TArrowRowStreamEncoder
public:
TArrowRowStreamEncoder(
TTableSchemaPtr schema,
+ std::optional<std::vector<std::string>> columns,
TNameTablePtr nameTable,
IRowStreamEncoderPtr fallbackEncoder,
NFormats::TControlAttributesConfigPtr controlAttributesConfig)
: Schema_(std::move(schema))
+ , Columns_(std::move(columns))
, NameTable_(std::move(nameTable))
, FallbackEncoder_(std::move(fallbackEncoder))
, ControlAttributesConfig_(controlAttributesConfig)
+ , OutputStream_(Data_)
+ , AsyncOutputStream_(NConcurrency::CreateAsyncAdapter(&OutputStream_))
{
- if (ControlAttributesConfig_->EnableRowIndex) {
- RowIndexId_ = NameTable_->GetIdOrRegisterName(RowIndexColumnName);
- }
-
- if (ControlAttributesConfig_->EnableRangeIndex) {
- RangeIndexId_ = NameTable_->GetIdOrRegisterName(RangeIndexColumnName);
- }
-
- if (ControlAttributesConfig_->EnableTableIndex) {
- TableIndexId_ = NameTable_->GetIdOrRegisterName(TableIndexColumnName);
- }
-
- if (ControlAttributesConfig_->EnableTabletIndex) {
- TabletIndexId_ = NameTable_->GetIdOrRegisterName(TabletIndexColumnName);
- }
-
YT_LOG_DEBUG("Row stream encoder created (Schema: %v)",
*Schema_);
}
- const TTableSchemaPtr& GetSchema()
- {
- return Schema_;
- }
-
- const TNameTablePtr& GetNameTable()
- {
- return NameTable_;
- }
-
- bool IsFirstBatch()
- {
- return FirstBatch_;
- }
-
- std::vector<IUnversionedColumnarRowBatch::TDictionaryId>& ArrowDictionaryIds()
- {
- return ArrowDictionaryIds_;
- }
-
- bool IsTableIndexColumnId(int id) const
- {
- return id == TableIndexId_;
- }
-
- bool IsRowIndexColumnId(int id) const
- {
- return id == RowIndexId_;
- }
-
- bool IsRangeIndexColumnId(int id) const
- {
- return id == RangeIndexId_;
- }
-
- bool IsTabletIndexColumnId(int id) const
- {
- return id == TabletIndexId_;
- }
-
- bool IsSystemColumnId(int id) const
- {
- return IsTableIndexColumnId(id) ||
- IsRangeIndexColumnId(id) ||
- IsRowIndexColumnId(id) ||
- IsTabletIndexColumnId(id);
- }
-
- bool IsSystemColumnEnable(int columnIndex)
- {
- return ControlAttributesConfig_->EnableTableIndex && IsTableIndexColumnId(columnIndex) ||
- ControlAttributesConfig_->EnableRangeIndex && IsRangeIndexColumnId(columnIndex) ||
- ControlAttributesConfig_->EnableRowIndex && IsRowIndexColumnId(columnIndex) ||
- ControlAttributesConfig_->EnableTabletIndex && IsTabletIndexColumnId(columnIndex);
- }
-
TSharedRef Encode(
const IUnversionedRowBatchPtr& batch,
const NApi::NRpcProxy::NProto::TRowsetStatistics* statistics) override;
private:
const TTableSchemaPtr Schema_;
+ const std::optional<std::vector<std::string>> Columns_;
const TNameTablePtr NameTable_;
const IRowStreamEncoderPtr FallbackEncoder_;
const NFormats::TControlAttributesConfigPtr ControlAttributesConfig_;
- int RowIndexId_ = -1;
- int RangeIndexId_ = -1;
- int TableIndexId_ = -1;
- int TabletIndexId_ = -1;
-
- bool FirstBatch_ = true;
- std::vector<IUnversionedColumnarRowBatch::TDictionaryId> ArrowDictionaryIds_;
+ NFormats::ISchemalessFormatWriterPtr Writer_ = nullptr;
+ TString Data_;
+ TStringOutput OutputStream_;
+ NConcurrency::IFlushableAsyncOutputStreamPtr AsyncOutputStream_;
};
DEFINE_REFCOUNTED_TYPE(TArrowRowStreamEncoder)
-////////////////////////////////////////////////////////////////////////////////
-
-class TArrowRowStreamBlockEncoder
-{
-public:
- TArrowRowStreamBlockEncoder(
- TArrowRowStreamEncoderPtr streamEncoder,
- IUnversionedColumnarRowBatchPtr batch)
- : StreamEncoder_(std::move(streamEncoder))
- , Batch_(std::move(batch))
- {
- PrepareColumns();
- if (IsSchemaMessageNeeded()) {
- if (!StreamEncoder_->IsFirstBatch()) {
- RegisterEosMarker();
- }
- ResetArrowDictionaries();
- PrepareSchema();
- }
- PrepareDictionaryBatches();
- PrepareRecordBatch();
- }
-
- i64 GetPayloadSize() const
- {
- i64 size = 0;
- for (const auto& message : Messages_) {
- size += sizeof (ui32); // continuation indicator
- size += sizeof (ui32); // metadata size
- if (message.FlatbufBuilder) {
- size += AlignUp<i64>(message.FlatbufBuilder->GetSize(), ArrowAlignment); // metadata message
- size += AlignUp<i64>(message.BodySize, ArrowAlignment); // body
- }
- }
- return size;
- }
-
- void WritePayload(TMutableRef payloadRef)
- {
- YT_LOG_DEBUG("Started writing payload (Size: %v)",
- payloadRef.Size());
- char* current = payloadRef.Begin();
- for (const auto& message : Messages_) {
- // Continuation indicator
- *reinterpret_cast<ui32*>(current) = 0xFFFFFFFF;
- current += sizeof(ui32);
-
- if (message.FlatbufBuilder) {
- auto metadataSize = message.FlatbufBuilder->GetSize();
- auto* metadataPtr = message.FlatbufBuilder->GetBufferPointer();
-
- // Metadata size
- *reinterpret_cast<ui32*>(current) = AlignUp<i64>(metadataSize, ArrowAlignment);
- current += sizeof(ui32);
-
- // Metadata message
- ::memcpy(current, metadataPtr, metadataSize);
- current += AlignUp<i64>(metadataSize, ArrowAlignment);
-
- // Body
- if (message.BodyWriter) {
- message.BodyWriter(TMutableRef(current, current + message.BodySize));
- current += AlignUp<i64>(message.BodySize, ArrowAlignment);
- } else {
- YT_VERIFY(message.BodySize == 0);
- }
- } else {
- // EOS marker
- *reinterpret_cast<ui32*>(current) = 0;
- current += sizeof(ui32);
- }
- }
- YT_VERIFY(current == payloadRef.End());
- YT_LOG_DEBUG("Finished writing payload");
- }
-
-private:
- const TArrowRowStreamEncoderPtr StreamEncoder_;
- const IUnversionedColumnarRowBatchPtr Batch_;
-
- std::vector<TTypedBatchColumn> TypedColumns_;
-
- struct TMessage
- {
- std::optional<flatbuffers::FlatBufferBuilder> FlatbufBuilder;
- i64 BodySize;
- TBodyWriter BodyWriter;
- };
-
- std::vector<TMessage> Messages_;
-
- void RegisterEosMarker()
- {
- YT_LOG_DEBUG("EOS marker registered");
-
- Messages_.push_back(TMessage{
- std::nullopt,
- 0,
- TBodyWriter()
- });
- }
-
- void RegisterMessage(
- org::apache::arrow::flatbuf::MessageHeader type,
- flatbuffers::FlatBufferBuilder&& flatbufBuilder,
- i64 bodySize = 0,
- std::function<void(TMutableRef)> bodyWriter = nullptr)
- {
- YT_LOG_DEBUG("Message registered (Type: %v, MessageSize: %v, BodySize: %v)",
- org::apache::arrow::flatbuf::EnumNamesMessageHeader()[type],
- flatbufBuilder.GetSize(),
- bodySize);
-
- YT_VERIFY((bodySize % ArrowAlignment) == 0);
- Messages_.push_back(TMessage{
- std::move(flatbufBuilder),
- bodySize,
- std::move(bodyWriter)
- });
- }
-
- std::optional<TColumnSchema> FindColumnSchema(const TBatchColumn& column)
- {
- YT_VERIFY(column.Id >= 0);
- auto name = StreamEncoder_->GetNameTable()->GetName(column.Id);
- auto columnSchemaPtr = StreamEncoder_->GetSchema()->FindColumn(name);
- if (!columnSchemaPtr) {
- if (StreamEncoder_->IsSystemColumnId(column.Id) && StreamEncoder_->IsSystemColumnEnable(column.Id)) {
- return TColumnSchema(TString(name), EValueType::Int64);
- }
- return std::nullopt;
- }
- return *columnSchemaPtr;
- }
-
- void PrepareColumns()
- {
- auto batchColumns = Batch_->MaterializeColumns();
- TypedColumns_.reserve(batchColumns.Size());
- for (const auto* column : batchColumns) {
- // Ignoring null schema column and not enabled system columns.
- if (auto columnSchema = FindColumnSchema(*column)) {
- TypedColumns_.push_back(TTypedBatchColumn{
- column,
- columnSchema->LogicalType()
- });
- }
- }
- }
-
- bool IsSchemaMessageNeeded()
- {
- if (StreamEncoder_->IsFirstBatch()) {
- return true;
- }
-
- YT_VERIFY(StreamEncoder_->ArrowDictionaryIds().size() == TypedColumns_.size());
-
- bool result = StreamEncoder_->IsFirstBatch();
- for (int index = 0; index < std::ssize(TypedColumns_); ++index) {
- bool currentDictionary = IsDictionaryEncodedColumn(*TypedColumns_[index].Column);
- bool previousDictionary = StreamEncoder_->ArrowDictionaryIds()[index] != IUnversionedColumnarRowBatch::NullDictionaryId;
- if (currentDictionary != previousDictionary) {
- result = true;
- }
- }
- return result;
- }
-
- void ResetArrowDictionaries()
- {
- StreamEncoder_->ArrowDictionaryIds().assign(TypedColumns_.size(), IUnversionedColumnarRowBatch::NullDictionaryId);
- }
-
-
- void PrepareSchema()
- {
- flatbuffers::FlatBufferBuilder flatbufBuilder;
-
- int arrowDictionaryIdCounter = 0;
- std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>> fieldOffsets;
- fieldOffsets.reserve(TypedColumns_.size());
-
- for (const auto& typedColumn : TypedColumns_) {
- auto optionalColumnSchema = FindColumnSchema(*typedColumn.Column);
- YT_VERIFY(optionalColumnSchema != std::nullopt);
- auto columnSchema = *optionalColumnSchema;
-
- auto nameOffset = SerializeString(&flatbufBuilder, columnSchema.Name());
-
- auto [typeType, typeOffset] = SerializeColumnType(&flatbufBuilder, columnSchema);
-
- flatbuffers::Offset<org::apache::arrow::flatbuf::DictionaryEncoding> dictionaryEncodingOffset;
-
- auto indexTypeOffset = org::apache::arrow::flatbuf::CreateInt(flatbufBuilder, 32, false);
-
- if (IsDictionaryEncodedColumn(*typedColumn.Column)) {
- dictionaryEncodingOffset = org::apache::arrow::flatbuf::CreateDictionaryEncoding(
- flatbufBuilder,
- arrowDictionaryIdCounter++,
- indexTypeOffset);
- }
-
- auto fieldOffset = org::apache::arrow::flatbuf::CreateField(
- flatbufBuilder,
- nameOffset,
- columnSchema.LogicalType()->IsNullable(),
- typeType,
- typeOffset,
- dictionaryEncodingOffset);
-
- fieldOffsets.push_back(fieldOffset);
- }
-
- auto fieldsOffset = flatbufBuilder.CreateVector(fieldOffsets);
-
- auto schemaOffset = org::apache::arrow::flatbuf::CreateSchema(
- flatbufBuilder,
- org::apache::arrow::flatbuf::Endianness_Little,
- fieldsOffset);
-
- auto messageOffset = org::apache::arrow::flatbuf::CreateMessage(
- flatbufBuilder,
- org::apache::arrow::flatbuf::MetadataVersion_V4,
- org::apache::arrow::flatbuf::MessageHeader_Schema,
- schemaOffset.Union(),
- 0);
-
- flatbufBuilder.Finish(messageOffset);
-
- RegisterMessage(
- org::apache::arrow::flatbuf::MessageHeader_Schema,
- std::move(flatbufBuilder));
- }
-
- void PrepareDictionaryBatches()
- {
- int arrowDictionaryIdCounter = 0;
- auto prepareDictionaryBatch = [&] (
- int columnIndex,
- IUnversionedColumnarRowBatch::TDictionaryId ytDictionaryId,
- const TBatchColumn* dictionaryColumn)
- {
- int arrowDictionaryId = arrowDictionaryIdCounter++;
- const auto& typedColumn = TypedColumns_[columnIndex];
- auto previousYTDictionaryId = StreamEncoder_->ArrowDictionaryIds()[columnIndex];
- if (ytDictionaryId == previousYTDictionaryId) {
- YT_LOG_DEBUG("Reusing previous dictionary (ColumnId: %v, YTDictionaryId: %v, ArrowDictionaryId: %v)",
- typedColumn.Column->Id,
- ytDictionaryId,
- arrowDictionaryId);
- } else {
- YT_LOG_DEBUG("Sending new dictionary (ColumnId: %v, YTDictionaryId: %v, ArrowDictionaryId: %v)",
- typedColumn.Column->Id,
- ytDictionaryId,
- arrowDictionaryId);
- PrepareDictionaryBatch(
- TTypedBatchColumn{dictionaryColumn, typedColumn.Type},
- arrowDictionaryId);
- StreamEncoder_->ArrowDictionaryIds()[columnIndex] = ytDictionaryId;
- }
- };
-
- for (int columnIndex = 0; columnIndex < std::ssize(TypedColumns_); ++columnIndex) {
- const auto& typedColumn = TypedColumns_[columnIndex];
- if (typedColumn.Column->Dictionary) {
- YT_LOG_DEBUG("Adding dictionary batch for dictionary-encoded column (ColumnId: %v)",
- typedColumn.Column->Id);
- prepareDictionaryBatch(
- columnIndex,
- typedColumn.Column->Dictionary->DictionaryId,
- typedColumn.Column->Dictionary->ValueColumn);
- } else if (IsRleButNotDictionaryEncodedStringLikeColumn(*typedColumn.Column)) {
- YT_LOG_DEBUG("Adding dictionary batch for RLE but not dictionary-encoded string-like column (ColumnId: %v)",
- typedColumn.Column->Id);
- prepareDictionaryBatch(
- columnIndex,
- IUnversionedColumnarRowBatch::GenerateDictionaryId(), // any unique one will do
- typedColumn.Column->Rle->ValueColumn);
- } else if (IsRleAndDictionaryEncodedColumn(*typedColumn.Column)) {
- YT_LOG_DEBUG("Adding dictionary batch for RLE and dictionary-encoded column (ColumnId: %v)",
- typedColumn.Column->Id);
- prepareDictionaryBatch(
- columnIndex,
- typedColumn.Column->Rle->ValueColumn->Dictionary->DictionaryId,
- typedColumn.Column->Rle->ValueColumn->Dictionary->ValueColumn);
- }
- }
- }
-
- void PrepareDictionaryBatch(
- const TTypedBatchColumn& typedColumn,
- int arrowDictionaryId)
- {
- flatbuffers::FlatBufferBuilder flatbufBuilder;
-
- auto [recordBatchOffset, bodySize, bodyWriter] = SerializeRecordBatch(
- &flatbufBuilder,
- typedColumn.Column->ValueCount,
- TRange({typedColumn}));
-
- auto dictionaryBatchOffset = org::apache::arrow::flatbuf::CreateDictionaryBatch(
- flatbufBuilder,
- arrowDictionaryId,
- recordBatchOffset);
-
- auto messageOffset = org::apache::arrow::flatbuf::CreateMessage(
- flatbufBuilder,
- org::apache::arrow::flatbuf::MetadataVersion_V4,
- org::apache::arrow::flatbuf::MessageHeader_DictionaryBatch,
- dictionaryBatchOffset.Union(),
- bodySize);
-
- flatbufBuilder.Finish(messageOffset);
-
- RegisterMessage(
- org::apache::arrow::flatbuf::MessageHeader_DictionaryBatch,
- std::move(flatbufBuilder),
- bodySize,
- std::move(bodyWriter));
- }
-
- void PrepareRecordBatch()
- {
- flatbuffers::FlatBufferBuilder flatbufBuilder;
-
- auto [recordBatchOffset, bodySize, bodyWriter] = SerializeRecordBatch(
- &flatbufBuilder,
- Batch_->GetRowCount(),
- TypedColumns_);
-
- auto messageOffset = org::apache::arrow::flatbuf::CreateMessage(
- flatbufBuilder,
- org::apache::arrow::flatbuf::MetadataVersion_V4,
- org::apache::arrow::flatbuf::MessageHeader_RecordBatch,
- recordBatchOffset.Union(),
- bodySize);
-
- flatbufBuilder.Finish(messageOffset);
-
- RegisterMessage(
- org::apache::arrow::flatbuf::MessageHeader_RecordBatch,
- std::move(flatbufBuilder),
- bodySize,
- std::move(bodyWriter));
- }
-};
-
-////////////////////////////////////////////////////////////////////////////////
TSharedRef TArrowRowStreamEncoder::Encode(
const IUnversionedRowBatchPtr& batch,
@@ -1157,16 +92,31 @@ TSharedRef TArrowRowStreamEncoder::Encode(
descriptor.set_rowset_kind(NApi::NRpcProxy::NProto::RK_UNVERSIONED);
descriptor.set_rowset_format(NApi::NRpcProxy::NProto::RF_ARROW);
- TArrowRowStreamBlockEncoder blockEncoder(this, std::move(columnarBatch));
+ if (!Writer_) {
+ // The writer is created lazily to avoid unnecessary errors in the constructor when using fallbackEncoder
+ Writer_ = CreateStaticTableWriterForFormat(
+ NFormats::EFormatType::Arrow,
+ NameTable_,
+ {Schema_},
+ {Columns_},
+ AsyncOutputStream_,
+ /*enableContextSaving*/ false,
+ ControlAttributesConfig_,
+ /*keyColumnCount*/ 0);
+ }
+ Data_.clear();
+ Writer_->WriteBatch(batch);
+ NConcurrency::WaitFor(Writer_->Flush())
+ .ThrowOnError();
+
+ auto rowRefs = TSharedRef::FromString(Data_);
auto [block, payloadRef] = SerializeRowStreamBlockEnvelope(
- blockEncoder.GetPayloadSize(),
+ rowRefs.Size(),
descriptor,
statistics);
- blockEncoder.WritePayload(payloadRef);
-
- FirstBatch_ = false;
+ MergeRefsToRef(std::vector<TSharedRef>{rowRefs}, payloadRef);
return block;
}
@@ -1177,12 +127,14 @@ TSharedRef TArrowRowStreamEncoder::Encode(
IRowStreamEncoderPtr CreateArrowRowStreamEncoder(
TTableSchemaPtr schema,
+ std::optional<std::vector<std::string>> columns,
TNameTablePtr nameTable,
IRowStreamEncoderPtr fallbackEncoder,
NFormats::TControlAttributesConfigPtr controlAttributesConfig)
{
return New<TArrowRowStreamEncoder>(
std::move(schema),
+ std::move(columns),
std::move(nameTable),
std::move(fallbackEncoder),
std::move(controlAttributesConfig));
diff --git a/yt/yt/client/arrow/arrow_row_stream_encoder.h b/yt/yt/client/arrow/arrow_row_stream_encoder.h
index 792b647d18..be20a949c4 100644
--- a/yt/yt/client/arrow/arrow_row_stream_encoder.h
+++ b/yt/yt/client/arrow/arrow_row_stream_encoder.h
@@ -14,6 +14,7 @@ namespace NYT::NArrow {
NApi::NRpcProxy::IRowStreamEncoderPtr CreateArrowRowStreamEncoder(
NTableClient::TTableSchemaPtr schema,
+ std::optional<std::vector<std::string>> columns,
NTableClient::TNameTablePtr nameTable,
NApi::NRpcProxy::IRowStreamEncoderPtr fallbackEncoder,
NFormats::TControlAttributesConfigPtr controlAttributesConfig);
diff --git a/yt/yt/client/arrow/ya.make b/yt/yt/client/arrow/ya.make
index 40d27d8e07..097479ffdf 100644
--- a/yt/yt/client/arrow/ya.make
+++ b/yt/yt/client/arrow/ya.make
@@ -10,7 +10,7 @@ SRCS(
PEERDIR(
yt/yt/client
- yt/yt/client/arrow/fbs
+ yt/yt/library/formats
)
END()
diff --git a/yt/yt/client/driver/proxy_discovery_cache.cpp b/yt/yt/client/driver/proxy_discovery_cache.cpp
index a17894a93e..a9612bb359 100644
--- a/yt/yt/client/driver/proxy_discovery_cache.cpp
+++ b/yt/yt/client/driver/proxy_discovery_cache.cpp
@@ -70,6 +70,8 @@ public:
private:
const IClientPtr Client_;
+ const NLogging::TLogger Logger = DriverLogger();
+
TFuture<TProxyDiscoveryResponse> DoGet(
const TProxyDiscoveryRequest& request,
bool /*isPeriodicUpdate*/) noexcept override
@@ -93,7 +95,13 @@ private:
options.ReadFrom = EMasterChannelKind::LocalCache;
options.Attributes = {BalancersAttributeName};
- auto path = GetProxyRegistryPath(request.Type) + "/@";
+ TYPath path;
+ try {
+ path = GetProxyRegistryPath(request.Type) + "/@";
+ } catch (const std::exception& ex) {
+ YT_LOG_ERROR(ex, "Failed to get proxy registry path");
+ return MakeFuture<std::optional<TProxyDiscoveryResponse>>(ex);
+ }
return Client_->GetNode(path, options).Apply(
BIND([=] (const TYsonString& yson) -> std::optional<TProxyDiscoveryResponse> {
auto attributes = ConvertTo<IMapNodePtr>(yson);
@@ -120,7 +128,13 @@ private:
options.SuppressTransactionCoordinatorSync = true;
options.Attributes = {BannedAttributeName, RoleAttributeName, AddressesAttributeName};
- auto path = GetProxyRegistryPath(request.Type);
+ TYPath path;
+ try {
+ path = GetProxyRegistryPath(request.Type);
+ } catch (const std::exception& ex) {
+ YT_LOG_ERROR(ex, "Failed to get proxy registry path");
+ return MakeFuture<TProxyDiscoveryResponse>(ex);
+ }
return Client_->GetNode(path, options).Apply(BIND([=] (const TYsonString& yson) {
TProxyDiscoveryResponse response;
diff --git a/yt/yt/client/signature/public.h b/yt/yt/client/signature/public.h
new file mode 100644
index 0000000000..012d19cfda
--- /dev/null
+++ b/yt/yt/client/signature/public.h
@@ -0,0 +1,11 @@
+#include <library/cpp/yt/memory/ref_counted.h>
+
+namespace NYT::NSignature {
+
+///////////////////////////////////////////////////////////////////////////////
+
+DECLARE_REFCOUNTED_CLASS(TSignature)
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NSignature
diff --git a/yt/yt/client/signature/signature.cpp b/yt/yt/client/signature/signature.cpp
new file mode 100644
index 0000000000..4c071c1407
--- /dev/null
+++ b/yt/yt/client/signature/signature.cpp
@@ -0,0 +1,64 @@
+#include "signature.h"
+
+#include <yt/yt/core/yson/consumer.h>
+
+#include <yt/yt/core/ytree/fluent.h>
+#include <yt/yt/core/ytree/convert.h>
+
+namespace NYT::NSignature {
+
+////////////////////////////////////////////////////////////////////////////////
+
+using namespace NYson;
+using namespace NYTree;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TSignature::TSignature(NYson::TYsonString payload)
+ : Payload_(std::move(payload))
+{ }
+
+////////////////////////////////////////////////////////////////////////////////
+
+const TYsonString& TSignature::Payload() const
+{
+ return Payload_;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void Serialize(const TSignature& signature, IYsonConsumer* consumer)
+{
+ consumer->OnBeginMap();
+ BuildYsonMapFragmentFluently(consumer)
+ .Item("header").Value(signature.Header_.ToString())
+ .Item("payload").Value(signature.Payload_.ToString())
+ .Item("signature").Value(TString(
+ reinterpret_cast<const char*>(signature.Signature_.data()),
+ signature.Signature_.size()));
+ consumer->OnEndMap();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void Deserialize(TSignature& signature, INodePtr node)
+{
+ auto mapNode = node->AsMap();
+ signature.Header_ = TYsonString(mapNode->GetChildValueOrThrow<TString>("header"));
+ signature.Payload_ = TYsonString(mapNode->GetChildValueOrThrow<TString>("payload"));
+
+ auto signatureString = mapNode->GetChildValueOrThrow<TString>("signature");
+ auto signatureBytes = std::as_bytes(std::span(TStringBuf(signatureString)));
+ signature.Signature_.resize(signatureBytes.size());
+
+ std::copy(signatureBytes.begin(), signatureBytes.end(), signature.Signature_.begin());
+}
+
+void Deserialize(TSignature& signature, TYsonPullParserCursor* cursor)
+{
+ Deserialize(signature, ExtractTo<INodePtr>(cursor));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NSignature
diff --git a/yt/yt/client/signature/signature.h b/yt/yt/client/signature/signature.h
new file mode 100644
index 0000000000..f6602994ce
--- /dev/null
+++ b/yt/yt/client/signature/signature.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include "public.h"
+
+#include <yt/yt/core/yson/string.h>
+
+#include <yt/yt/core/ytree/public.h>
+
+#include <vector>
+
+namespace NYT::NSignature {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TSignature final
+{
+public:
+ // NB(pavook) only needed for Deserialize internals.
+
+ //! Constructs an empty TSignature.
+ TSignature() = default;
+
+ //! Creates a TSignature containing the given payload without an actual signature.
+ explicit TSignature(NYson::TYsonString payload);
+
+ [[nodiscard]] const NYson::TYsonString& Payload() const;
+
+private:
+ NYson::TYsonString Header_;
+ NYson::TYsonString Payload_;
+ std::vector<std::byte> Signature_;
+
+ friend class TSignatureGenerator;
+ friend class TSignatureValidator;
+
+ friend void Serialize(const TSignature& signature, NYson::IYsonConsumer* consumer);
+ friend void Deserialize(TSignature& signature, NYTree::INodePtr node);
+ friend void Deserialize(TSignature& signature, NYson::TYsonPullParserCursor* cursor);
+};
+
+DEFINE_REFCOUNTED_TYPE(TSignature)
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NSignature
diff --git a/yt/yt/client/signature/unittests/signature_ut.cpp b/yt/yt/client/signature/unittests/signature_ut.cpp
new file mode 100644
index 0000000000..75ae1733fa
--- /dev/null
+++ b/yt/yt/client/signature/unittests/signature_ut.cpp
@@ -0,0 +1,54 @@
+#include <yt/yt/core/test_framework/framework.h>
+
+#include <yt/yt/client/signature/signature.h>
+
+#include <yt/yt/core/yson/string.h>
+
+#include <yt/yt/core/ytree/convert.h>
+
+namespace NYT::NSignature {
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+
+using namespace NYson;
+using namespace NYTree;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TSignatureTest, PayloadConstruct)
+{
+ TSignature signature(TYsonString("payload"_sb));
+ EXPECT_EQ(signature.Payload().ToString(), "payload");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TSignatureTest, DeserializeSerialize)
+{
+ // SignatureSize bytes.
+ TYsonString ysonOK(R"({"header"="header";"payload"="payload";"signature"="signature";})"_sb);
+
+ TSignaturePtr signature;
+ EXPECT_NO_THROW(signature = ConvertTo<TSignaturePtr>(ysonOK));
+ EXPECT_EQ(signature->Payload().ToString(), "payload");
+
+ EXPECT_EQ(ConvertToYsonString(signature, EYsonFormat::Text).ToString(), ysonOK.ToString());
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TSignatureTest, DeserializeFail)
+{
+ {
+ TYsonString ysonFail(
+ R"({"header"="header";"buddy"="payload";"signature"="abacaba";})"_sb
+ );
+ EXPECT_THROW_WITH_SUBSTRING(ConvertTo<TSignaturePtr>(ysonFail), "no child with key \"payload\"");
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT::NSignature
diff --git a/yt/yt/library/oom/unittests/ya.make b/yt/yt/client/signature/unittests/ya.make
index 23392352b9..b7f6fd5fe5 100644
--- a/yt/yt/library/oom/unittests/ya.make
+++ b/yt/yt/client/signature/unittests/ya.make
@@ -1,17 +1,18 @@
-GTEST()
+GTEST(unittester-client-signature)
INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
-ALLOCATOR(TCMALLOC)
-
SRCS(
- oom_ut.cpp
+ signature_ut.cpp
)
INCLUDE(${ARCADIA_ROOT}/yt/opensource.inc)
PEERDIR(
- yt/yt/library/oom
+ yt/yt/core/test_framework
+ yt/yt/client
)
+SIZE(SMALL)
+
END()
diff --git a/yt/yt/client/ya.make b/yt/yt/client/ya.make
index 2b384fe818..9d54b9ab4c 100644
--- a/yt/yt/client/ya.make
+++ b/yt/yt/client/ya.make
@@ -101,6 +101,8 @@ SRCS(
security_client/public.cpp
security_client/helpers.cpp
+ signature/signature.cpp
+
table_client/public.cpp
table_client/adapters.cpp
table_client/table_output.cpp
@@ -237,6 +239,7 @@ RECURSE(
RECURSE_FOR_TESTS(
api/unittests
+ signature/unittests
table_client/unittests
unittests
)
diff --git a/yt/yt/core/bus/tcp/config.h b/yt/yt/core/bus/tcp/config.h
index 22d1bd6cd7..914035e2d9 100644
--- a/yt/yt/core/bus/tcp/config.h
+++ b/yt/yt/core/bus/tcp/config.h
@@ -47,11 +47,11 @@ public:
TEnumIndexedArray<EMultiplexingBand, TMultiplexingBandConfigPtr> MultiplexingBands;
- TTcpDispatcherConfigPtr ApplyDynamic(const TTcpDispatcherDynamicConfigPtr& dynamicConfig) const;
-
//! Used to store TLS/SSL certificate files.
std::optional<TString> BusCertsDirectoryPath;
+ TTcpDispatcherConfigPtr ApplyDynamic(const TTcpDispatcherDynamicConfigPtr& dynamicConfig) const;
+
REGISTER_YSON_STRUCT(TTcpDispatcherConfig);
static void Register(TRegistrar registrar);
@@ -78,6 +78,8 @@ public:
//! Used to store TLS/SSL certificate files.
std::optional<TString> BusCertsDirectoryPath;
+ static void Setup(auto&& registrar);
+
REGISTER_YSON_STRUCT(TTcpDispatcherDynamicConfig);
static void Register(TRegistrar registrar);
diff --git a/yt/yt/core/bus/tcp/configure_dispatcher.cpp b/yt/yt/core/bus/tcp/configure_dispatcher.cpp
new file mode 100644
index 0000000000..75fdd662a5
--- /dev/null
+++ b/yt/yt/core/bus/tcp/configure_dispatcher.cpp
@@ -0,0 +1,41 @@
+#include "dispatcher.h"
+#include "config.h"
+
+#include <yt/yt/core/misc/configurable_singleton_def.h>
+
+namespace NYT::NBus {
+
+using namespace NYTree;
+
+////////////////////////////////////////////////////////////////////////////////
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TTcpDispatcherConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TTcpDispatcherDynamicConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void ConfigureSingleton(const TTcpDispatcherConfigPtr& config)
+{
+ NBus::TTcpDispatcher::Get()->Configure(config);
+}
+
+void ReconfigureSingleton(
+ const TTcpDispatcherConfigPtr& config,
+ const TTcpDispatcherDynamicConfigPtr& dynamicConfig)
+{
+ TTcpDispatcher::Get()->Configure(config->ApplyDynamic(dynamicConfig));
+}
+
+YT_DEFINE_RECONFIGURABLE_SINGLETON(
+ "tcp_dispatcher",
+ TTcpDispatcherConfig,
+ TTcpDispatcherDynamicConfig);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NBus
diff --git a/yt/yt/core/bus/tcp/dispatcher.cpp b/yt/yt/core/bus/tcp/dispatcher.cpp
index 045f3d231d..4b5c7b64f2 100644
--- a/yt/yt/core/bus/tcp/dispatcher.cpp
+++ b/yt/yt/core/bus/tcp/dispatcher.cpp
@@ -1,4 +1,5 @@
#include "dispatcher.h"
+
#include "dispatcher_impl.h"
#include <yt/yt/core/bus/private.h>
diff --git a/yt/yt/core/bus/tcp/public.h b/yt/yt/core/bus/tcp/public.h
index 5e7c54af91..0c86109c1a 100644
--- a/yt/yt/core/bus/tcp/public.h
+++ b/yt/yt/core/bus/tcp/public.h
@@ -1,5 +1,7 @@
#pragma once
+#include <yt/yt/core/misc/configurable_singleton_decl.h>
+
#include <yt/yt/core/bus/public.h>
namespace NYT::NBus {
@@ -18,6 +20,8 @@ DECLARE_REFCOUNTED_CLASS(TBusClientConfig)
struct IPacketTranscoderFactory;
+YT_DECLARE_RECONFIGURABLE_SINGLETON(TTcpDispatcherConfig, TTcpDispatcherDynamicConfig);
+
////////////////////////////////////////////////////////////////////////////////
} // namespace NYT::NBus
diff --git a/yt/yt/core/concurrency/configure_fiber_manager.cpp b/yt/yt/core/concurrency/configure_fiber_manager.cpp
new file mode 100644
index 0000000000..36168664fc
--- /dev/null
+++ b/yt/yt/core/concurrency/configure_fiber_manager.cpp
@@ -0,0 +1,41 @@
+#include "fiber_manager.h"
+#include "config.h"
+
+#include <yt/yt/core/misc/configurable_singleton_def.h>
+
+namespace NYT::NConcurrency {
+
+using namespace NYTree;
+
+////////////////////////////////////////////////////////////////////////////////
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TFiberManagerConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TFiberManagerDynamicConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void ConfigureSingleton(const TFiberManagerConfigPtr& config)
+{
+ TFiberManager::Configure(config);
+}
+
+void ReconfigureSingleton(
+ const TFiberManagerConfigPtr& config,
+ const TFiberManagerDynamicConfigPtr& dynamicConfig)
+{
+ TFiberManager::Configure(config->ApplyDynamic(dynamicConfig));
+}
+
+YT_DEFINE_RECONFIGURABLE_SINGLETON(
+ "fiber_manager",
+ TFiberManagerConfig,
+ TFiberManagerDynamicConfig);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NConcurrency
diff --git a/yt/yt/core/concurrency/coroutine.h b/yt/yt/core/concurrency/coroutine.h
index 7b395585e9..5a4e849908 100644
--- a/yt/yt/core/concurrency/coroutine.h
+++ b/yt/yt/core/concurrency/coroutine.h
@@ -5,6 +5,8 @@
#include <yt/yt/core/actions/callback.h>
+#include <library/cpp/yt/misc/concepts.h>
+
#include <util/system/context.h>
#include <optional>
diff --git a/yt/yt/core/concurrency/execution_stack.h b/yt/yt/core/concurrency/execution_stack.h
index 99038d3cd3..fe02810788 100644
--- a/yt/yt/core/concurrency/execution_stack.h
+++ b/yt/yt/core/concurrency/execution_stack.h
@@ -2,6 +2,10 @@
#include "public.h"
+#if defined(_win_)
+#include <windows.h>
+#endif
+
namespace NYT::NConcurrency {
////////////////////////////////////////////////////////////////////////////////
@@ -66,7 +70,6 @@ private:
friend TExecutionContext CreateExecutionContext(
TExecutionStack* stack,
void (*trampoline)(void*));
-
};
#else
diff --git a/yt/yt/core/concurrency/new_fair_share_thread_pool.cpp b/yt/yt/core/concurrency/new_fair_share_thread_pool.cpp
index a0e9fd4e8e..14541740eb 100644
--- a/yt/yt/core/concurrency/new_fair_share_thread_pool.cpp
+++ b/yt/yt/core/concurrency/new_fair_share_thread_pool.cpp
@@ -9,6 +9,7 @@
#include <yt/yt/core/actions/current_invoker.h>
#include <yt/yt/core/misc/finally.h>
+#include <yt/yt/core/misc/hazard_ptr.h>
#include <yt/yt/core/misc/heap.h>
#include <yt/yt/core/misc/ring_queue.h>
#include <yt/yt/core/misc/mpsc_stack.h>
@@ -714,7 +715,9 @@ public:
while (true) {
auto cookie = GetEventCount()->PrepareWait();
- auto hasAction = ThreadStates_[index].Action.BucketHolder;
+ auto& threadState = ThreadStates_[index];
+
+ auto hasAction = threadState.Action.BucketHolder;
int activeThreadDelta = hasAction ? -1 : 0;
auto callback = DoOnExecute(index, fetchNext);
@@ -738,6 +741,7 @@ public:
}
YT_VERIFY(fetchNext);
+ MaybeRunMaintenance(&threadState, GetCpuInstant(), /*flush*/ true);
Wait(cookie, isStopping);
}
}
@@ -806,6 +810,7 @@ private:
int LastActionsInQueue;
TDuration TimeFromStart;
TDuration TimeFromEnqueue;
+ TCpuInstant LastMaintenanceInstant = {};
};
static_assert(sizeof(TThreadState) >= CacheLineSize);
@@ -1189,6 +1194,8 @@ private:
ReportWaitTime(waitTime);
}
+ MaybeRunMaintenance(&threadState, action.StartedAt, /*flush*/ false);
+
CumulativeSchedulingTimeCounter_.Add(CpuDurationToDuration(GetCpuInstant() - cpuInstant));
if (!fetchNext) {
@@ -1240,6 +1247,17 @@ private:
WaitTimeObserver_(waitTime);
}
}
+
+ static void MaybeRunMaintenance(TThreadState* threadState, TCpuInstant now, bool flush)
+ {
+ YT_ASSERT(threadState);
+
+ constexpr i64 MaintenancePeriod = 1'000'000'000;
+ if (flush || now > threadState->LastMaintenanceInstant + MaintenancePeriod) {
+ ReclaimHazardPointers(false);
+ threadState->LastMaintenanceInstant = now;
+ }
+ }
};
DEFINE_REFCOUNTED_TYPE(TTwoLevelFairShareQueue)
diff --git a/yt/yt/core/concurrency/public.h b/yt/yt/core/concurrency/public.h
index b7634c0730..e25d455dfa 100644
--- a/yt/yt/core/concurrency/public.h
+++ b/yt/yt/core/concurrency/public.h
@@ -1,6 +1,9 @@
#pragma once
#include <yt/yt/core/misc/public.h>
+#include <yt/yt/core/misc/configurable_singleton_decl.h>
+
+#include <library/cpp/yt/misc/enum.h>
namespace NYT::NConcurrency {
@@ -127,6 +130,8 @@ DECLARE_REFCOUNTED_STRUCT(ICallbackProvider)
class TPropagatingStorage;
+YT_DECLARE_RECONFIGURABLE_SINGLETON(TFiberManagerConfig, TFiberManagerDynamicConfig);
+
////////////////////////////////////////////////////////////////////////////////
} // namespace NYT::NConcurrency
diff --git a/yt/yt/core/logging/configure_log_manager.cpp b/yt/yt/core/logging/configure_log_manager.cpp
new file mode 100644
index 0000000000..4ace9c5d5b
--- /dev/null
+++ b/yt/yt/core/logging/configure_log_manager.cpp
@@ -0,0 +1,51 @@
+#include "log_manager.h"
+#include "config.h"
+
+#include <yt/yt/core/misc/configurable_singleton_def.h>
+
+namespace NYT::NLogging {
+
+using namespace NYTree;
+
+////////////////////////////////////////////////////////////////////////////////
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TLogManagerConfigPtr>& parameter)
+{
+ parameter
+ .DefaultCtor([] { return NLogging::TLogManagerConfig::CreateDefault(); })
+ .ResetOnLoad();
+}
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TLogManagerDynamicConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void ConfigureSingleton(const TLogManagerConfigPtr& config)
+{
+ if (!NLogging::TLogManager::Get()->IsConfiguredFromEnv()) {
+ TLogManager::Get()->Configure(
+ config,
+ /*sync*/ true);
+ }
+}
+
+void ReconfigureSingleton(
+ const TLogManagerConfigPtr& config,
+ const TLogManagerDynamicConfigPtr& dynamicConfig)
+{
+ if (!NLogging::TLogManager::Get()->IsConfiguredFromEnv()) {
+ NLogging::TLogManager::Get()->Configure(
+ config->ApplyDynamic(dynamicConfig),
+ /*sync*/ false);
+ }
+}
+
+YT_DEFINE_RECONFIGURABLE_SINGLETON(
+ "logging",
+ TLogManagerConfig,
+ TLogManagerDynamicConfig);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NLogging
diff --git a/yt/yt/core/logging/public.h b/yt/yt/core/logging/public.h
index 2ede67840c..cd9b4d1568 100644
--- a/yt/yt/core/logging/public.h
+++ b/yt/yt/core/logging/public.h
@@ -1,9 +1,12 @@
#pragma once
#include <yt/yt/core/misc/public.h>
+#include <yt/yt/core/misc/configurable_singleton_decl.h>
#include <library/cpp/yt/logging/public.h>
+#include <library/cpp/yt/misc/enum.h>
+
namespace NYT::NLogging {
////////////////////////////////////////////////////////////////////////////////
@@ -45,6 +48,8 @@ DECLARE_REFCOUNTED_STRUCT(IFileLogWriter)
DECLARE_REFCOUNTED_STRUCT(IStreamLogOutput)
DECLARE_REFCOUNTED_STRUCT(ILogCompressionCodec)
+YT_DECLARE_RECONFIGURABLE_SINGLETON(TLogManagerConfig, TLogManagerDynamicConfig);
+
////////////////////////////////////////////////////////////////////////////////
} // namespace NYT::NLogging
diff --git a/yt/yt/core/misc/configurable_singleton_decl-inl.h b/yt/yt/core/misc/configurable_singleton_decl-inl.h
new file mode 100644
index 0000000000..75342d0715
--- /dev/null
+++ b/yt/yt/core/misc/configurable_singleton_decl-inl.h
@@ -0,0 +1,33 @@
+#ifndef CONFIGURABLE_SINGLETON_DECL_INL_H_
+#error "Direct inclusion of this file is not allowed, include configurable_singleton_decl.h"
+// For the sake of sane code completion.
+#include "configurable_singleton_decl.h"
+#endif
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NDetail {
+
+template <class TConfig, bool Static>
+struct TSingletonConfigTag
+{ };
+
+} // namespace NDetail
+
+////////////////////////////////////////////////////////////////////////////////
+
+#undef YT_DECLARE_CONFIGURABLE_SINGLETON
+#undef YT_DECLARE_RECONFIGURABLE_SINGLETON
+
+#define YT_DECLARE_CONFIGURABLE_SINGLETON(configType) \
+ void CheckSingletonConfigRegistered(::NYT::NDetail::TSingletonConfigTag<configType, true>) \
+
+#define YT_DECLARE_RECONFIGURABLE_SINGLETON(configType, dynamicConfigType) \
+ void CheckSingletonConfigRegistered(::NYT::NDetail::TSingletonConfigTag<configType, true>); \
+ void CheckSingletonConfigRegistered(::NYT::NDetail::TSingletonConfigTag<dynamicConfigType, false>)
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/yt/core/misc/configurable_singleton_decl.h b/yt/yt/core/misc/configurable_singleton_decl.h
new file mode 100644
index 0000000000..72d7b157c4
--- /dev/null
+++ b/yt/yt/core/misc/configurable_singleton_decl.h
@@ -0,0 +1,16 @@
+#pragma once
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+#define YT_DECLARE_CONFIGURABLE_SINGLETON(configType)
+#define YT_DECLARE_RECONFIGURABLE_SINGLETON(configType, dynamicConfigType)
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
+
+#define CONFIGURABLE_SINGLETON_DECL_INL_H_
+#include "configurable_singleton_decl-inl.h"
+#undef CONFIGURABLE_SINGLETON_DECL_INL_H_
diff --git a/yt/yt/core/misc/configurable_singleton_def-inl.h b/yt/yt/core/misc/configurable_singleton_def-inl.h
new file mode 100644
index 0000000000..0f46b7c445
--- /dev/null
+++ b/yt/yt/core/misc/configurable_singleton_def-inl.h
@@ -0,0 +1,150 @@
+#ifndef CONFIGURABLE_SINGLETON_DEF_INL_H_
+#error "Direct inclusion of this file is not allowed, include configurable_singleton_def.h"
+// For the sake of sane code completion.
+#include "configurable_singleton_def.h"
+#endif
+
+#include <library/cpp/yt/misc/static_initializer.h>
+
+#include <yt/yt/core/misc/collection_helpers.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NDetail {
+
+template <bool Static>
+template <class TConfig>
+TIntrusivePtr<TConfig> TSingletonsConfigBase<Static>::TryGetSingletonConfig()
+{
+ CheckSingletonConfigRegistered(TSingletonConfigTag<TConfig, true>());
+ return std::any_cast<TIntrusivePtr<TConfig>>(*GetOrCrash(TypeToConfig_, typeid(TConfig)));
+}
+
+template <bool Static>
+template <class TConfig>
+TIntrusivePtr<TConfig> TSingletonsConfigBase<Static>::GetSingletonConfig()
+{
+ auto config = TryGetSingletonConfig<TConfig>();
+ YT_VERIFY(config);
+ return config;
+}
+
+template <bool Static>
+template <class TConfig>
+void TSingletonsConfigBase<Static>::SetSingletonConfig(TIntrusivePtr<TConfig> config)
+{
+ CheckSingletonConfigRegistered(TSingletonConfigTag<TConfig, Static>());
+ *GetOrCrash(TypeToConfig_, typeid(TConfig)) = std::move(config);
+}
+
+template <class TManagerConfig>
+using TRegisterSingletonField = std::function<void(NYTree::TYsonStructRegistrar<TManagerConfig> registrar)>;
+using TConfigureSingleton = std::function<void(const std::any& config)>;
+using TReconfigureSingleton = std::function<void(const std::any& config, const std::any& dynamicConfig)>;
+
+struct TSingletonTraits
+{
+ TRegisterSingletonField<TSingletonsConfig> RegisterField;
+ TRegisterSingletonField<TSingletonsDynamicConfig> RegisterDynamicField;
+ TConfigureSingleton Configure;
+ TReconfigureSingleton Reconfigure;
+};
+
+struct TSingletonConfigHelpers
+{
+ static void RegisterSingleton(
+ const std::string& singletonName,
+ TSingletonTraits singletonTraits);
+
+ template <class TSingletonConfig, class TManagerConfig>
+ static TRegisterSingletonField<TManagerConfig> MakeRegisterField(const std::string& singletonName)
+ {
+ return [=] (NYTree::TYsonStructRegistrar<TManagerConfig> registrar) {
+ SetupSingletonConfigParameter(
+ registrar.template ParameterWithUniversalAccessor<TIntrusivePtr<TSingletonConfig>>(
+ // TODO(babenko): switch to std::string
+ TString(singletonName),
+ [=] (TManagerConfig* config) -> auto& {
+ auto it = config->NameToConfig_.find(singletonName);
+ if (it == config->NameToConfig_.end()) {
+ it = config->NameToConfig_.emplace(singletonName, std::any(TIntrusivePtr<TSingletonConfig>())).first;
+ EmplaceOrCrash(config->TypeToConfig_, std::type_index(typeid(TSingletonConfig)), &it->second);
+ }
+ return *std::any_cast<TIntrusivePtr<TSingletonConfig>>(&it->second);
+ }));
+ };
+ }
+
+ template <class TSingletonConfig>
+ static TConfigureSingleton MakeConfigureSingleton()
+ {
+ return [] (const std::any& config) {
+ auto typedConfig = std::any_cast<TIntrusivePtr<TSingletonConfig>>(config);
+ ConfigureSingleton(typedConfig);
+ };
+ }
+
+ template <class TSingletonConfig, class TDynamicSingletonConfig>
+ static TReconfigureSingleton MakeReconfigureSingleton()
+ {
+ return [] (const std::any& config, const std::any& dynamicConfig) {
+ auto typedConfig = std::any_cast<TIntrusivePtr<TSingletonConfig>>(config);
+ auto typedDynamicConfig = std::any_cast<TIntrusivePtr<TDynamicSingletonConfig>>(dynamicConfig);
+ ReconfigureSingleton(typedConfig, typedDynamicConfig);
+ };
+ }
+
+ template <class TSingletonConfig>
+ static void RegisterSingleton(const std::string& singletonName)
+ {
+ RegisterSingleton(
+ singletonName,
+ TSingletonTraits{
+ .RegisterField = MakeRegisterField<TSingletonConfig, TSingletonsConfig>(singletonName),
+ .Configure = MakeConfigureSingleton<TSingletonConfig>(),
+ });
+ }
+
+ template <class TSingletonConfig, class TDynamicSingletonConfig>
+ static void RegisterReconfigurableSingleton(const std::string& singletonName)
+ {
+ RegisterSingleton(
+ singletonName,
+ TSingletonTraits{
+ .RegisterField = MakeRegisterField<TSingletonConfig, TSingletonsConfig>(singletonName),
+ .RegisterDynamicField = MakeRegisterField<TDynamicSingletonConfig, TSingletonsDynamicConfig>(singletonName),
+ .Configure = MakeConfigureSingleton<TSingletonConfig>(),
+ .Reconfigure = MakeReconfigureSingleton<TSingletonConfig, TDynamicSingletonConfig>(),
+ });
+ }
+};
+
+} // namespace NDetail
+
+////////////////////////////////////////////////////////////////////////////////
+
+#undef YT_DEFINE_CONFIGURABLE_SINGLETON
+#undef YT_DEFINE_RECONFIGURABLE_SINGLETON
+
+#define YT_DEFINE_CONFIGURABLE_SINGLETON(singletonName, configType) \
+ [[maybe_unused]] void CheckSingletonConfigRegistered(::NYT::NDetail::TSingletonConfigTag<configType, true>) \
+ { } \
+ \
+ YT_STATIC_INITIALIZER( \
+ ::NYT::NDetail::TSingletonConfigHelpers::RegisterSingleton<configType>(singletonName))
+
+#define YT_DEFINE_RECONFIGURABLE_SINGLETON(singletonName, configType, dynamicConfigType) \
+ [[maybe_unused]] void CheckSingletonConfigRegistered(::NYT::NDetail::TSingletonConfigTag<configType, true>) \
+ { } \
+ \
+ [[maybe_unused]] void CheckSingletonConfigRegistered(::NYT::NDetail::TSingletonConfigTag<dynamicConfigType, false>) \
+ { } \
+ \
+ YT_STATIC_INITIALIZER( \
+ ::NYT::NDetail::TSingletonConfigHelpers::RegisterReconfigurableSingleton<configType, dynamicConfigType>(singletonName)) \
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/yt/core/misc/configurable_singleton_def.cpp b/yt/yt/core/misc/configurable_singleton_def.cpp
new file mode 100644
index 0000000000..acad95481d
--- /dev/null
+++ b/yt/yt/core/misc/configurable_singleton_def.cpp
@@ -0,0 +1,151 @@
+#include "configurable_singleton_def.h"
+
+#include <library/cpp/yt/memory/leaky_singleton.h>
+
+#include <library/cpp/yt/threading/spin_lock.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TSingletonManagerImpl
+{
+public:
+ static TSingletonManagerImpl* Get()
+ {
+ return LeakySingleton<TSingletonManagerImpl>();
+ }
+
+ void Register(
+ const std::string& singletonName,
+ TSingletonTraits singletonTraits)
+ {
+ YT_VERIFY(!AllRegistered_.load());
+ EmplaceOrCrash(SingletonMap_, singletonName, std::move(singletonTraits));
+ }
+
+
+ void Configure(const TSingletonsConfigPtr& config)
+ {
+ auto guard = Guard(ConfigureLock_);
+
+ if (std::exchange(Configured_, true)) {
+ THROW_ERROR_EXCEPTION("Singletons have already been configured");
+ }
+
+ Config_ = config;
+
+ for (const auto& [name, traits] : Singletons()) {
+ const auto& field = GetOrCrash(config->NameToConfig_, name);
+ traits.Configure(field);
+ }
+ }
+
+ void Reconfigure(const TSingletonsDynamicConfigPtr& dynamicConfig)
+ {
+ auto guard = Guard(ConfigureLock_);
+
+ if (!Configured_) {
+ THROW_ERROR_EXCEPTION("Singletons are not configured yet");
+ }
+
+ for (const auto& [name, traits] : Singletons()) {
+ if (const auto& reconfigure = traits.Reconfigure) {
+ const auto& singletonConfig = GetOrCrash(Config_->NameToConfig_, name);
+ const auto& singletonDynamicConfig = GetOrCrash(dynamicConfig->NameToConfig_, name);
+ reconfigure(singletonConfig, singletonDynamicConfig);
+ }
+ }
+ }
+
+ using TSingletonMap = THashMap<std::string, TSingletonTraits>;
+
+ const TSingletonMap& Singletons() const
+ {
+ AllRegistered_.store(true);
+ return SingletonMap_;
+ }
+
+private:
+ DECLARE_LEAKY_SINGLETON_FRIEND();
+ TSingletonManagerImpl() = default;
+
+ mutable std::atomic<bool> AllRegistered_ = false;
+ THashMap<std::string, TSingletonTraits> SingletonMap_;
+
+ NThreading::TSpinLock ConfigureLock_;
+ TSingletonsConfigPtr Config_;
+ bool Configured_ = false;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+void TSingletonConfigHelpers::RegisterSingleton(
+ const std::string& fieldName,
+ TSingletonTraits singletonTraits)
+{
+ TSingletonManagerImpl::Get()->Register(
+ fieldName,
+ std::move(singletonTraits));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <bool Static>
+void TSingletonsConfigBase<Static>::RegisterSingletons(
+ auto&& registrar,
+ auto&& registerFieldSelector)
+{
+ for (const auto& [_, traits] : NDetail::TSingletonManagerImpl::Get()->Singletons()) {
+ if (const auto& register_ = registerFieldSelector(traits)) {
+ register_(registrar);
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template class TSingletonsConfigBase<false>;
+template class TSingletonsConfigBase<true>;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+
+////////////////////////////////////////////////////////////////////////////////
+
+void TSingletonManager::Configure(const TSingletonsConfigPtr& config)
+{
+ NDetail::TSingletonManagerImpl::Get()->Configure(config);
+}
+
+void TSingletonManager::Reconfigure(const TSingletonsDynamicConfigPtr& dynamicConfig)
+{
+ NDetail::TSingletonManagerImpl::Get()->Reconfigure(dynamicConfig);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void TSingletonsConfig::Register(TRegistrar registrar)
+{
+ RegisterSingletons(
+ registrar,
+ [] (const NDetail::TSingletonTraits& traits) { return traits.RegisterField; });
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void TSingletonsDynamicConfig::Register(TRegistrar registrar)
+{
+ RegisterSingletons(
+ registrar,
+ [] (const NDetail::TSingletonTraits& traits) { return traits.RegisterDynamicField; });
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/yt/core/misc/configurable_singleton_def.h b/yt/yt/core/misc/configurable_singleton_def.h
new file mode 100644
index 0000000000..684d50e314
--- /dev/null
+++ b/yt/yt/core/misc/configurable_singleton_def.h
@@ -0,0 +1,93 @@
+#pragma once
+
+#include <yt/yt/core/ytree/yson_struct.h>
+
+#include <any>
+#include <typeindex>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NDetail {
+
+struct TSingletonConfigHelpers;
+class TSingletonManagerImpl;
+
+template <bool Static>
+class TSingletonsConfigBase
+{
+public:
+ template <class TConfig>
+ TIntrusivePtr<TConfig> TryGetSingletonConfig();
+
+ template <class TConfig>
+ TIntrusivePtr<TConfig> GetSingletonConfig();
+
+ template <class TConfig>
+ void SetSingletonConfig(TIntrusivePtr<TConfig> config);
+
+protected:
+ static void RegisterSingletons(
+ auto&& registrar,
+ auto&& registerFieldSelector);
+
+private:
+ friend struct NYT::NDetail::TSingletonConfigHelpers;
+ friend class NYT::NDetail::TSingletonManagerImpl;
+
+ THashMap<std::string, std::any> NameToConfig_;
+ THashMap<std::type_index, std::any*> TypeToConfig_;
+};
+
+} // namespace NDetail
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TSingletonsConfig
+ : public NDetail::TSingletonsConfigBase<true>
+ , public virtual NYTree::TYsonStruct
+{
+public:
+ REGISTER_YSON_STRUCT(TSingletonsConfig);
+
+ static void Register(TRegistrar registrar);
+};
+
+DEFINE_REFCOUNTED_TYPE(TSingletonsConfig);
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TSingletonsDynamicConfig
+ : public NDetail::TSingletonsConfigBase<false>
+ , public virtual NYTree::TYsonStruct
+{
+public:
+ REGISTER_YSON_STRUCT(TSingletonsDynamicConfig);
+
+ static void Register(TRegistrar registrar);
+};
+
+DEFINE_REFCOUNTED_TYPE(TSingletonsDynamicConfig);
+
+////////////////////////////////////////////////////////////////////////////////
+
+#define YT_DEFINE_CONFIGURABLE_SINGLETON(singletonName, configType)
+#define YT_DEFINE_RECONFIGURABLE_SINGLETON(singletonName, configType, dynamicConfigType)
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TSingletonManager
+{
+public:
+ static void Configure(const TSingletonsConfigPtr& config);
+ static void Reconfigure(const TSingletonsDynamicConfigPtr& dynamicConfig);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
+
+#define CONFIGURABLE_SINGLETON_DEF_INL_H_
+#include "configurable_singleton_def-inl.h"
+#undef CONFIGURABLE_SINGLETON_DEF_INL_H_
diff --git a/yt/yt/core/misc/public.h b/yt/yt/core/misc/public.h
index 9cff9ba2cd..2ce8dd5a2d 100644
--- a/yt/yt/core/misc/public.h
+++ b/yt/yt/core/misc/public.h
@@ -3,8 +3,6 @@
#include "common.h"
#include "error_code.h"
-#include <library/cpp/yt/misc/concepts.h>
-
// Google Protobuf forward declarations.
namespace google::protobuf {
@@ -102,6 +100,9 @@ DECLARE_REFCOUNTED_CLASS(TAsyncExpiringCacheConfig)
DECLARE_REFCOUNTED_CLASS(TLogDigestConfig)
DECLARE_REFCOUNTED_CLASS(THistogramDigestConfig)
+DECLARE_REFCOUNTED_CLASS(TSingletonsConfig)
+DECLARE_REFCOUNTED_CLASS(TSingletonsDynamicConfig)
+
class TSignalRegistry;
class TBloomFilterBuilder;
diff --git a/yt/yt/core/misc/unittests/configurable_singleton_ut.cpp b/yt/yt/core/misc/unittests/configurable_singleton_ut.cpp
new file mode 100644
index 0000000000..64fdbfd6f5
--- /dev/null
+++ b/yt/yt/core/misc/unittests/configurable_singleton_ut.cpp
@@ -0,0 +1,234 @@
+#include <yt/yt/core/test_framework/framework.h>
+
+#include <yt/yt/core/misc/configurable_singleton_def.h>
+
+namespace NYT {
+namespace {
+
+using namespace NYTree;
+
+////////////////////////////////////////////////////////////////////////////////
+
+DECLARE_REFCOUNTED_STRUCT(TRequiredSingletonConfig);
+DECLARE_REFCOUNTED_STRUCT(TOptionalSingletonConfig);
+DECLARE_REFCOUNTED_STRUCT(TDefaultNewSingletonConfig);
+DECLARE_REFCOUNTED_STRUCT(TReconfigurableSingletonConfig);
+DECLARE_REFCOUNTED_STRUCT(TReconfigurableSingletonDynamicConfig);
+
+YT_DECLARE_CONFIGURABLE_SINGLETON(TRequiredSingletonConfig);
+YT_DECLARE_CONFIGURABLE_SINGLETON(TOptionalSingletonConfig);
+YT_DECLARE_CONFIGURABLE_SINGLETON(TDefaultNewSingletonConfig);
+YT_DECLARE_RECONFIGURABLE_SINGLETON(TReconfigurableSingletonConfig, TReconfigurableSingletonDynamicConfig);
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TRequiredSingletonConfig
+ : public TYsonStruct
+{
+ int Speed;
+
+ REGISTER_YSON_STRUCT(TRequiredSingletonConfig);
+
+ static void Register(TRegistrar registarar)
+ {
+ registarar.Parameter("speed", &TThis::Speed);
+ }
+};
+
+DEFINE_REFCOUNTED_TYPE(TRequiredSingletonConfig)
+
+int ConfiguredSpeed = -1;
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TRequiredSingletonConfigPtr>& /*parameter*/)
+{ }
+
+void ConfigureSingleton(const TRequiredSingletonConfigPtr& config)
+{
+ ConfiguredSpeed = config->Speed;
+}
+
+YT_DEFINE_CONFIGURABLE_SINGLETON("required", TRequiredSingletonConfig);
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TOptionalSingletonConfig
+ : public TYsonStruct
+{
+ int Depth;
+
+ REGISTER_YSON_STRUCT(TOptionalSingletonConfig);
+
+ static void Register(TRegistrar registarar)
+ {
+ registarar.Parameter("depth", &TThis::Depth);
+ }
+};
+
+DEFINE_REFCOUNTED_TYPE(TOptionalSingletonConfig)
+
+int ConfiguredDepth = -1;
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TOptionalSingletonConfigPtr>& parameter)
+{
+ parameter.Optional();
+}
+
+void ConfigureSingleton(const TOptionalSingletonConfigPtr& config)
+{
+ if (config) {
+ ConfiguredDepth = config->Depth;
+ }
+}
+
+YT_DEFINE_CONFIGURABLE_SINGLETON("optional", TOptionalSingletonConfig);
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TDefaultNewSingletonConfig
+ : public TYsonStruct
+{
+ int Width;
+
+ REGISTER_YSON_STRUCT(TDefaultNewSingletonConfig);
+
+ static void Register(TRegistrar registarar)
+ {
+ registarar.Parameter("width", &TThis::Width)
+ .Default(456);
+ }
+};
+
+DEFINE_REFCOUNTED_TYPE(TDefaultNewSingletonConfig)
+
+int ConfiguredWidth = -1;
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TDefaultNewSingletonConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void ConfigureSingleton(const TDefaultNewSingletonConfigPtr& config)
+{
+ ConfiguredWidth = config->Width;
+}
+
+YT_DEFINE_CONFIGURABLE_SINGLETON("default_new", TDefaultNewSingletonConfig);
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TReconfigurableSingletonConfig
+ : public TYsonStruct
+{
+ int Cost;
+
+ REGISTER_YSON_STRUCT(TReconfigurableSingletonConfig);
+
+ static void Register(TRegistrar registarar)
+ {
+ registarar.Parameter("cost", &TThis::Cost)
+ .Default(777);
+ }
+};
+
+DEFINE_REFCOUNTED_TYPE(TReconfigurableSingletonConfig)
+
+struct TReconfigurableSingletonDynamicConfig
+ : public TYsonStruct
+{
+ std::optional<int> Cost;
+
+ REGISTER_YSON_STRUCT(TReconfigurableSingletonDynamicConfig);
+
+ static void Register(TRegistrar registarar)
+ {
+ registarar.Parameter("cost", &TThis::Cost)
+ .Default();
+ }
+};
+
+DEFINE_REFCOUNTED_TYPE(TReconfigurableSingletonDynamicConfig)
+
+int ConfiguredCost = -1;
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TReconfigurableSingletonConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TReconfigurableSingletonDynamicConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void ConfigureSingleton(const TReconfigurableSingletonConfigPtr& config)
+{
+ ConfiguredCost = config->Cost;
+}
+
+void ReconfigureSingleton(
+ const TReconfigurableSingletonConfigPtr& config,
+ const TReconfigurableSingletonDynamicConfigPtr& dynamicConfig)
+{
+ ConfiguredCost = dynamicConfig->Cost.value_or(config->Cost);
+}
+
+YT_DEFINE_RECONFIGURABLE_SINGLETON(
+ "reconfigurable",
+ TReconfigurableSingletonConfig,
+ TReconfigurableSingletonDynamicConfig);
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TConfigurableSingletonTest, Run)
+{
+ auto config = ConvertTo<TSingletonsConfigPtr>(NYson::TYsonString(TString(R"""({
+ required = {
+ speed = 123;
+ };
+ })""")));
+ auto dynamicConfig1 = ConvertTo<TSingletonsDynamicConfigPtr>(NYson::TYsonString(TString(R"""({
+ reconfigurable = {
+ cost = 888;
+ };
+ })""")));
+ auto dynamicConfig2 = ConvertTo<TSingletonsDynamicConfigPtr>(NYson::TYsonString(TString(R"""({
+ reconfigurable = {
+ cost = 999;
+ };
+ })""")));
+
+ EXPECT_THROW_WITH_SUBSTRING(TSingletonManager::Reconfigure(dynamicConfig1), "Singletons are not configured yet");
+
+ EXPECT_EQ(ConfiguredSpeed, -1);
+ EXPECT_EQ(ConfiguredDepth, -1);
+ EXPECT_EQ(ConfiguredWidth, -1);
+ EXPECT_EQ(ConfiguredCost, -1);
+
+ TSingletonManager::Configure(config);
+
+ EXPECT_EQ(ConfiguredSpeed, 123);
+ EXPECT_EQ(ConfiguredDepth, -1);
+ EXPECT_EQ(ConfiguredWidth, 456);
+ EXPECT_EQ(ConfiguredCost, 777);
+
+ EXPECT_THROW_WITH_SUBSTRING(TSingletonManager::Configure(config), "Singletons have already been configured");
+
+ TSingletonManager::Reconfigure(dynamicConfig1);
+
+ EXPECT_EQ(ConfiguredSpeed, 123);
+ EXPECT_EQ(ConfiguredDepth, -1);
+ EXPECT_EQ(ConfiguredWidth, 456);
+ EXPECT_EQ(ConfiguredCost, 888);
+
+ TSingletonManager::Reconfigure(dynamicConfig2);
+
+ EXPECT_EQ(ConfiguredSpeed, 123);
+ EXPECT_EQ(ConfiguredDepth, -1);
+ EXPECT_EQ(ConfiguredWidth, 456);
+ EXPECT_EQ(ConfiguredCost, 999);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT
diff --git a/yt/yt/core/misc/unittests/ya.make b/yt/yt/core/misc/unittests/ya.make
index a422838270..79d62c52c2 100644
--- a/yt/yt/core/misc/unittests/ya.make
+++ b/yt/yt/core/misc/unittests/ya.make
@@ -72,6 +72,7 @@ SRCS(
yverify_ut.cpp
zerocopy_output_writer_ut.cpp
hedging_manager_ut.cpp
+ configurable_singleton_ut.cpp
proto/ref_counted_tracker_ut.proto
)
diff --git a/yt/yt/core/net/address.cpp b/yt/yt/core/net/address.cpp
index ba490e7739..d048167af3 100644
--- a/yt/yt/core/net/address.cpp
+++ b/yt/yt/core/net/address.cpp
@@ -15,6 +15,7 @@
#include <yt/yt/core/misc/async_expiring_cache.h>
#include <yt/yt/core/misc/fs.h>
+#include <yt/yt/core/misc/configurable_singleton_def.h>
#include <yt/yt/core/profiling/timing.h>
diff --git a/yt/yt/core/net/configure_address_resolver.cpp b/yt/yt/core/net/configure_address_resolver.cpp
new file mode 100644
index 0000000000..16ae5220fd
--- /dev/null
+++ b/yt/yt/core/net/configure_address_resolver.cpp
@@ -0,0 +1,28 @@
+#include "address.h"
+#include "config.h"
+
+#include <yt/yt/core/misc/configurable_singleton_def.h>
+
+namespace NYT::NNet {
+
+using namespace NYTree;
+
+////////////////////////////////////////////////////////////////////////////////
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TAddressResolverConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void ConfigureSingleton(const TAddressResolverConfigPtr& config)
+{
+ TAddressResolver::Get()->Configure(config);
+}
+
+YT_DEFINE_CONFIGURABLE_SINGLETON(
+ "address_resolver",
+ TAddressResolverConfig);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NNet
diff --git a/yt/yt/core/net/public.h b/yt/yt/core/net/public.h
index fde7eea47a..ea4891db83 100644
--- a/yt/yt/core/net/public.h
+++ b/yt/yt/core/net/public.h
@@ -1,9 +1,13 @@
#pragma once
#include <yt/yt/core/misc/public.h>
+#include <yt/yt/core/misc/configurable_singleton_decl.h>
+#include <yt/yt/core/misc/error_code.h>
#include <library/cpp/yt/memory/intrusive_ptr.h>
+#include <library/cpp/yt/misc/guid.h>
+
namespace NYT::NNet {
////////////////////////////////////////////////////////////////////////////////
@@ -32,6 +36,8 @@ YT_DEFINE_ERROR_ENUM(
((ResolveTimedOut) (1501))
);
+YT_DECLARE_CONFIGURABLE_SINGLETON(TAddressResolverConfig);
+
////////////////////////////////////////////////////////////////////////////////
} // namespace NYT::NNet
diff --git a/yt/yt/core/rpc/configure_dispatcher.cpp b/yt/yt/core/rpc/configure_dispatcher.cpp
new file mode 100644
index 0000000000..ec95cd855d
--- /dev/null
+++ b/yt/yt/core/rpc/configure_dispatcher.cpp
@@ -0,0 +1,41 @@
+#include "dispatcher.h"
+#include "config.h"
+
+#include <yt/yt/core/misc/configurable_singleton_def.h>
+
+namespace NYT::NRpc {
+
+using namespace NYTree;
+
+////////////////////////////////////////////////////////////////////////////////
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TDispatcherConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TDispatcherDynamicConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void ConfigureSingleton(const TDispatcherConfigPtr& config)
+{
+ TDispatcher::Get()->Configure(config);
+}
+
+void ReconfigureSingleton(
+ const TDispatcherConfigPtr& config,
+ const TDispatcherDynamicConfigPtr& dynamicConfig)
+{
+ TDispatcher::Get()->Configure(config->ApplyDynamic(dynamicConfig));
+}
+
+YT_DEFINE_RECONFIGURABLE_SINGLETON(
+ "rpc_dispatcher",
+ TDispatcherConfig,
+ TDispatcherDynamicConfig);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NRpc
diff --git a/yt/yt/core/rpc/dispatcher.cpp b/yt/yt/core/rpc/dispatcher.cpp
index 64edfdc3ae..0ede4496f9 100644
--- a/yt/yt/core/rpc/dispatcher.cpp
+++ b/yt/yt/core/rpc/dispatcher.cpp
@@ -161,7 +161,6 @@ void TDispatcher::SetServiceDiscovery(IServiceDiscoveryPtr serviceDiscovery)
Impl_->SetServiceDiscovery(std::move(serviceDiscovery));
}
-
////////////////////////////////////////////////////////////////////////////////
} // namespace NYT::NRpc
diff --git a/yt/yt/core/rpc/grpc/configure_dispatcher.cpp b/yt/yt/core/rpc/grpc/configure_dispatcher.cpp
new file mode 100644
index 0000000000..e12e1f1739
--- /dev/null
+++ b/yt/yt/core/rpc/grpc/configure_dispatcher.cpp
@@ -0,0 +1,28 @@
+#include "dispatcher.h"
+#include "config.h"
+
+#include <yt/yt/core/misc/configurable_singleton_def.h>
+
+namespace NYT::NRpc::NGrpc {
+
+using namespace NYTree;
+
+////////////////////////////////////////////////////////////////////////////////
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TDispatcherConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void ConfigureSingleton(const TDispatcherConfigPtr& config)
+{
+ TDispatcher::Get()->Configure(config);
+}
+
+YT_DEFINE_CONFIGURABLE_SINGLETON(
+ "grpc_dispatcher",
+ TDispatcherConfig);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NRpc::NGrpc
diff --git a/yt/yt/core/rpc/grpc/public.h b/yt/yt/core/rpc/grpc/public.h
index 7cb846bf33..737faf3f4c 100644
--- a/yt/yt/core/rpc/grpc/public.h
+++ b/yt/yt/core/rpc/grpc/public.h
@@ -1,5 +1,7 @@
#pragma once
+#include <yt/yt/core/misc/configurable_singleton_decl.h>
+
#include <yt/yt/core/logging/log.h>
namespace NYT::NRpc::NGrpc {
@@ -44,6 +46,8 @@ const THashSet<TStringBuf>& GetNativeMetadataKeys();
constexpr int GenericErrorStatusCode = 100;
+YT_DECLARE_CONFIGURABLE_SINGLETON(TDispatcherConfig);
+
////////////////////////////////////////////////////////////////////////////////
} // namespace NYT::NRpc::NGrpc
diff --git a/yt/yt/core/rpc/grpc/ya.make b/yt/yt/core/rpc/grpc/ya.make
index 4622b67b93..5fc1d908f7 100644
--- a/yt/yt/core/rpc/grpc/ya.make
+++ b/yt/yt/core/rpc/grpc/ya.make
@@ -6,6 +6,7 @@ PROTO_NAMESPACE(yt)
SRCS(
config.cpp
+ GLOBAL configure_dispatcher.cpp
public.cpp
dispatcher.cpp
server.cpp
diff --git a/yt/yt/core/rpc/http/server.cpp b/yt/yt/core/rpc/http/server.cpp
index e0caa56bb7..783b82cd51 100644
--- a/yt/yt/core/rpc/http/server.cpp
+++ b/yt/yt/core/rpc/http/server.cpp
@@ -371,6 +371,11 @@ private:
rpcHeader->set_request_codec(ToProto(NCompression::ECodec::None));
rpcHeader->set_response_codec(ToProto(NCompression::ECodec::None));
+ ToProto(
+ rpcHeader->MutableExtension(NRpc::NProto::TRequestHeader::tracing_ext),
+ NTracing::TryGetCurrentTraceContext(),
+ /*sendBaggage*/ false);
+
return {};
}
};
diff --git a/yt/yt/core/rpc/public.h b/yt/yt/core/rpc/public.h
index 42933a8774..ea0a147594 100644
--- a/yt/yt/core/rpc/public.h
+++ b/yt/yt/core/rpc/public.h
@@ -1,5 +1,7 @@
#pragma once
+#include <yt/yt/core/misc/configurable_singleton_decl.h>
+
#include <yt/yt/core/actions/callback.h>
#include <yt/yt/core/concurrency/public.h>
@@ -197,6 +199,8 @@ DEFINE_ENUM(EMessageFormat,
((Yson) (2))
);
+YT_DECLARE_RECONFIGURABLE_SINGLETON(TDispatcherConfig, TDispatcherDynamicConfig);
+
////////////////////////////////////////////////////////////////////////////////
} // namespace NYT::NRpc
diff --git a/yt/yt/core/service_discovery/yp/configure_service_discovery.cpp b/yt/yt/core/service_discovery/yp/configure_service_discovery.cpp
new file mode 100644
index 0000000000..3321226a4e
--- /dev/null
+++ b/yt/yt/core/service_discovery/yp/configure_service_discovery.cpp
@@ -0,0 +1,30 @@
+#include "service_discovery.h"
+#include "config.h"
+
+#include <yt/yt/core/misc/configurable_singleton_def.h>
+
+#include <yt/yt/core/rpc/dispatcher.h>
+
+namespace NYT::NServiceDiscovery::NYP {
+
+using namespace NYTree;
+
+////////////////////////////////////////////////////////////////////////////////
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TServiceDiscoveryConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void ConfigureSingleton(const TServiceDiscoveryConfigPtr& config)
+{
+ NRpc::TDispatcher::Get()->SetServiceDiscovery(CreateServiceDiscovery(config));
+}
+
+YT_DEFINE_CONFIGURABLE_SINGLETON(
+ "yp_service_discovery",
+ TServiceDiscoveryConfig);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NServiceDiscovery::NYP
diff --git a/yt/yt/core/service_discovery/yp/public.h b/yt/yt/core/service_discovery/yp/public.h
index 53d4de18d8..c90b9dc5a7 100644
--- a/yt/yt/core/service_discovery/yp/public.h
+++ b/yt/yt/core/service_discovery/yp/public.h
@@ -1,6 +1,7 @@
#pragma once
#include <yt/yt/core/misc/public.h>
+#include <yt/yt/core/misc/configurable_singleton_decl.h>
namespace NYT::NServiceDiscovery::NYP {
@@ -8,6 +9,8 @@ namespace NYT::NServiceDiscovery::NYP {
DECLARE_REFCOUNTED_CLASS(TServiceDiscoveryConfig)
+YT_DECLARE_CONFIGURABLE_SINGLETON(TServiceDiscoveryConfig);
+
////////////////////////////////////////////////////////////////////////////////
} // namespace NYT::NServiceDiscovery::NYP
diff --git a/yt/yt/core/service_discovery/yp/ya.make b/yt/yt/core/service_discovery/yp/ya.make
index cc37fa9639..e0efcc5a88 100644
--- a/yt/yt/core/service_discovery/yp/ya.make
+++ b/yt/yt/core/service_discovery/yp/ya.make
@@ -9,6 +9,7 @@ PEERDIR(
SRCS(
config.cpp
+ GLOBAL configure_service_discovery.cpp
)
IF (NOT OPENSOURCE)
diff --git a/yt/yt/core/ya.make b/yt/yt/core/ya.make
index 2368a67992..9794fce0a2 100644
--- a/yt/yt/core/ya.make
+++ b/yt/yt/core/ya.make
@@ -27,6 +27,7 @@ SRCS(
bus/tcp/dispatcher.cpp
bus/tcp/dispatcher_impl.cpp
bus/tcp/config.cpp
+ GLOBAL bus/tcp/configure_dispatcher.cpp
bus/tcp/packet.cpp
bus/tcp/client.cpp
bus/tcp/server.cpp
@@ -53,6 +54,7 @@ SRCS(
concurrency/async_stream_pipe.cpp
concurrency/async_stream.cpp
concurrency/config.cpp
+ GLOBAL concurrency/configure_fiber_manager.cpp
concurrency/coroutine.cpp
concurrency/delayed_executor.cpp
concurrency/execution_stack.cpp
@@ -97,6 +99,7 @@ SRCS(
logging/compression.cpp
logging/config.cpp
+ GLOBAL logging/configure_log_manager.cpp
logging/formatter.cpp
logging/fluent_log.cpp
GLOBAL logging/log.cpp
@@ -164,10 +167,12 @@ SRCS(
misc/cache_config.cpp
misc/utf8_decoder.cpp
misc/zerocopy_output_writer.cpp
+ misc/configurable_singleton_def.cpp
net/address.cpp
net/connection.cpp
net/config.cpp
+ GLOBAL net/configure_address_resolver.cpp
net/dialer.cpp
net/helpers.cpp
net/listener.cpp
@@ -195,6 +200,7 @@ SRCS(
rpc/channel_detail.cpp
rpc/client.cpp
rpc/config.cpp
+ GLOBAL rpc/configure_dispatcher.cpp
rpc/dispatcher.cpp
rpc/dynamic_channel_pool.cpp
rpc/hedging_channel.cpp
@@ -244,6 +250,7 @@ SRCS(
yson/async_writer.cpp
yson/attribute_consumer.cpp
yson/config.cpp
+ GLOBAL yson/configure_protobuf_interop.cpp
yson/consumer.cpp
yson/forwarding_consumer.cpp
yson/lexer.cpp
diff --git a/yt/yt/core/yson/configure_protobuf_interop.cpp b/yt/yt/core/yson/configure_protobuf_interop.cpp
new file mode 100644
index 0000000000..ca621c0664
--- /dev/null
+++ b/yt/yt/core/yson/configure_protobuf_interop.cpp
@@ -0,0 +1,41 @@
+#include "protobuf_interop.h"
+#include "config.h"
+
+#include <yt/yt/core/misc/configurable_singleton_def.h>
+
+namespace NYT::NYson {
+
+using namespace NYTree;
+
+////////////////////////////////////////////////////////////////////////////////
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TProtobufInteropConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TProtobufInteropDynamicConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void ConfigureSingleton(const TProtobufInteropConfigPtr& config)
+{
+ SetProtobufInteropConfig(config);
+}
+
+void ReconfigureSingleton(
+ const TProtobufInteropConfigPtr& config,
+ const TProtobufInteropDynamicConfigPtr& dynamicConfig)
+{
+ ConfigureSingleton(config->ApplyDynamic(dynamicConfig));
+}
+
+YT_DEFINE_RECONFIGURABLE_SINGLETON(
+ "protobuf_interop",
+ TProtobufInteropConfig,
+ TProtobufInteropDynamicConfig);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NYson
diff --git a/yt/yt/core/yson/public.h b/yt/yt/core/yson/public.h
index d0932dea56..dae0c0b298 100644
--- a/yt/yt/core/yson/public.h
+++ b/yt/yt/core/yson/public.h
@@ -1,6 +1,7 @@
#pragma once
#include <yt/yt/core/misc/public.h>
+#include <yt/yt/core/misc/configurable_singleton_decl.h>
#include <library/cpp/yt/yson/public.h>
#include <library/cpp/yt/yson_string/public.h>
@@ -98,6 +99,8 @@ class TProtobufMessageType;
//! An opaque reflected counterpart of ::google::protobuf::EnumDescriptor.
class TProtobufEnumType;
+YT_DECLARE_RECONFIGURABLE_SINGLETON(TProtobufInteropConfig, TProtobufInteropDynamicConfig);
+
////////////////////////////////////////////////////////////////////////////////
} // namespace NYT::NYson
diff --git a/yt/yt/core/yson/string.h b/yt/yt/core/yson/string.h
index 45938e3f7d..bb0992d9c7 100644
--- a/yt/yt/core/yson/string.h
+++ b/yt/yt/core/yson/string.h
@@ -2,6 +2,8 @@
#include "public.h"
+#include <yt/yt/core/misc/serialize.h>
+
#include <library/cpp/yt/yson_string/string.h>
namespace NYT::NYson {
diff --git a/yt/yt/core/yson/token.h b/yt/yt/core/yson/token.h
index 66c3e91075..c1de561903 100644
--- a/yt/yt/core/yson/token.h
+++ b/yt/yt/core/yson/token.h
@@ -4,6 +4,8 @@
#include <yt/yt/core/misc/property.h>
+#include <library/cpp/yt/string/string_builder.h>
+
namespace NYT::NYson {
////////////////////////////////////////////////////////////////////////////////
diff --git a/yt/yt/core/yson/writer.cpp b/yt/yt/core/yson/writer.cpp
index d31abf0b46..32e3f1636c 100644
--- a/yt/yt/core/yson/writer.cpp
+++ b/yt/yt/core/yson/writer.cpp
@@ -132,30 +132,6 @@ void WriteUtf8String(const char* str, size_t len, IOutputStream& output)
}
}
-size_t FloatToStringWithNanInf(double value, char* buf, size_t size)
-{
- if (std::isfinite(value)) {
- return FloatToString(value, buf, size);
- }
-
- static const TStringBuf nanLiteral = "%nan";
- static const TStringBuf infLiteral = "%inf";
- static const TStringBuf negativeInfLiteral = "%-inf";
-
- TStringBuf str;
- if (std::isnan(value)) {
- str = nanLiteral;
- } else if (std::isinf(value) && value > 0) {
- str = infLiteral;
- } else {
- str = negativeInfLiteral;
- }
- YT_VERIFY(str.size() + 1 <= size);
- ::memcpy(buf, str.data(), str.size() + 1);
- return str.size();
-}
-
-
} // namespace
////////////////////////////////////////////////////////////////////////////////
@@ -277,7 +253,7 @@ void TYsonWriter::OnDoubleScalar(double value)
Stream_->Write(&value, sizeof(double));
} else {
char buf[256];
- auto str = TStringBuf(buf, FloatToStringWithNanInf(value, buf, sizeof(buf)));
+ auto str = TStringBuf(buf, NDetail::FloatToStringWithNanInf(value, buf, sizeof(buf)));
Stream_->Write(str);
if (str.find('.') == TString::npos && str.find('e') == TString::npos && std::isfinite(value)) {
Stream_->Write(".");
diff --git a/yt/yt/core/ytree/unittests/text_yson_convert_ut.cpp b/yt/yt/core/ytree/unittests/text_yson_convert_ut.cpp
new file mode 100644
index 0000000000..75913bed0b
--- /dev/null
+++ b/yt/yt/core/ytree/unittests/text_yson_convert_ut.cpp
@@ -0,0 +1,273 @@
+#include <yt/yt/core/test_framework/framework.h>
+
+#include <yt/yt/core/ytree/convert.h>
+
+#include <library/cpp/yt/misc/source_location.h>
+
+#include <library/cpp/yt/yson_string/convert.h>
+
+namespace NYT::NYTree {
+namespace {
+
+using namespace NYson;
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class T>
+void CheckEqualConversionToTextYson(const T& value, const TSourceLocation& loc = YT_CURRENT_SOURCE_LOCATION)
+{
+ EXPECT_EQ(ConvertToTextYsonString(value).AsStringBuf(), ConvertToYsonString(value, EYsonFormat::Text).AsStringBuf())
+ << NYT::Format("At %v", loc);
+}
+
+template <class T, class U>
+void CheckEqualConversionToFromTextYson(const U& value, const TSourceLocation& loc = YT_CURRENT_SOURCE_LOCATION)
+{
+ auto yson = ConvertToTextYsonString(value);
+ EXPECT_EQ(ConvertFromTextYsonString<T>(yson), ConvertTo<T>(yson))
+ << NYT::Format("At %v", loc);
+}
+
+template <class T>
+void CheckEqualConversionFromTextYson(TStringBuf value, const TSourceLocation& loc = YT_CURRENT_SOURCE_LOCATION)
+{
+ NYson::TYsonString yson(value);
+ EXPECT_EQ(ConvertTo<T>(yson), ConvertTo<T>(yson))
+ << NYT::Format("At %v", loc);
+}
+
+TEST(TTextYsonConvertTest, ConvertToTextIntegrals)
+{
+ CheckEqualConversionToTextYson<i8>(+14);
+ CheckEqualConversionToTextYson<i8>(0);
+ CheckEqualConversionToTextYson<i8>(-15);
+ CheckEqualConversionToTextYson<i32>(+100);
+ CheckEqualConversionToTextYson<i32>(0);
+ CheckEqualConversionToTextYson<i32>(-123);
+ CheckEqualConversionToTextYson<i64>(+100);
+ CheckEqualConversionToTextYson<i64>(0);
+ CheckEqualConversionToTextYson<i64>(-123);
+
+ CheckEqualConversionToTextYson<ui8>(+100);
+ CheckEqualConversionToTextYson<ui8>(0);
+ CheckEqualConversionToTextYson<ui32>(+100);
+ CheckEqualConversionToTextYson<ui32>(0);
+ CheckEqualConversionToTextYson<ui64>(+100);
+ CheckEqualConversionToTextYson<ui64>(0);
+}
+
+TEST(TTextYsonConvertTest, ConvertToTextIntegralsLimits)
+{
+ CheckEqualConversionToTextYson<i64>(std::numeric_limits<i64>::max());
+ CheckEqualConversionToTextYson<i64>(std::numeric_limits<i64>::min());
+
+ CheckEqualConversionToTextYson<ui64>(std::numeric_limits<ui64>::max());
+ CheckEqualConversionToTextYson<ui64>(std::numeric_limits<ui64>::min());
+}
+
+TEST(TTextYsonConvertTest, ConvertToTextFloats)
+{
+ CheckEqualConversionToTextYson<float>(0.0);
+ CheckEqualConversionToTextYson<float>(-0.0);
+ CheckEqualConversionToTextYson<float>(-7.7777);
+ CheckEqualConversionToTextYson<float>(+9.243);
+
+ CheckEqualConversionToTextYson<double>(0.0);
+ CheckEqualConversionToTextYson<double>(-0.0);
+ CheckEqualConversionToTextYson<double>(-7.7777);
+ CheckEqualConversionToTextYson<double>(+9.243);
+}
+
+TEST(TTextYsonConvertTest, ConvertToTextFloatsSpecialValues)
+{
+ CheckEqualConversionToTextYson<double>(std::numeric_limits<double>::min());
+ CheckEqualConversionToTextYson<double>(std::numeric_limits<double>::max());
+ CheckEqualConversionToTextYson<double>(std::numeric_limits<double>::infinity());
+ CheckEqualConversionToTextYson<double>(-std::numeric_limits<double>::infinity());
+ CheckEqualConversionToTextYson<double>(std::numeric_limits<double>::quiet_NaN());
+}
+
+TEST(TTextYsonConvertTest, ConvertToTextOtherPrimitiveTypes)
+{
+ CheckEqualConversionToTextYson<bool>(true);
+ CheckEqualConversionToTextYson<bool>(false);
+
+ CheckEqualConversionToTextYson<TInstant>(TInstant::Now());
+ CheckEqualConversionToTextYson<TInstant>(TInstant::Zero());
+ CheckEqualConversionToTextYson<TInstant>(TInstant::FromValue(42));
+
+ CheckEqualConversionToTextYson<TDuration>(TDuration::Zero());
+ CheckEqualConversionToTextYson<TDuration>(TDuration::Seconds(2));
+ CheckEqualConversionToTextYson<TDuration>(TDuration::MilliSeconds(123));
+ CheckEqualConversionToTextYson<TDuration>(TDuration::MicroSeconds(12));
+
+ CheckEqualConversionToTextYson<std::string>("Hello, world!");
+ CheckEqualConversionToTextYson<std::string>("This is a so-called \"quotation marks\" test");
+ CheckEqualConversionToTextYson<std::string>("This tests \r other \b hidden symbols \n");
+ CheckEqualConversionToTextYson<std::string>("And this one tests special numbers numbers \x012");
+
+ CheckEqualConversionToTextYson<TGuid>(TGuid::Create());
+}
+
+TEST(TTextYsonConvertTest, ConvertFromTextIntegrals)
+{
+ CheckEqualConversionToFromTextYson<i8>(+15);
+ CheckEqualConversionToFromTextYson<i8>(0);
+ CheckEqualConversionToFromTextYson<i8>(-15);
+ CheckEqualConversionToFromTextYson<i32>(+100);
+ CheckEqualConversionToFromTextYson<i32>(0);
+ CheckEqualConversionToFromTextYson<i32>(-123);
+ CheckEqualConversionToFromTextYson<i64>(+100);
+ CheckEqualConversionToFromTextYson<i64>(0);
+ CheckEqualConversionToFromTextYson<i64>(-123);
+
+ CheckEqualConversionToFromTextYson<ui8>(+100);
+ CheckEqualConversionToFromTextYson<ui8>(0);
+ CheckEqualConversionToFromTextYson<ui32>(+100);
+ CheckEqualConversionToFromTextYson<ui32>(0);
+ CheckEqualConversionToFromTextYson<ui64>(+100);
+ CheckEqualConversionToFromTextYson<ui64>(0);
+}
+
+TEST(TTextYsonConvertTest, ConvertFromTextIntegralsLimits)
+{
+ CheckEqualConversionToFromTextYson<i64>(std::numeric_limits<i64>::max());
+ CheckEqualConversionToFromTextYson<i64>(std::numeric_limits<i64>::min());
+
+ CheckEqualConversionToFromTextYson<ui64>(std::numeric_limits<ui64>::max());
+ CheckEqualConversionToFromTextYson<ui64>(std::numeric_limits<ui64>::min());
+}
+
+TEST(TTextYsonConvertTest, ConvertFromTextFloats)
+{
+ CheckEqualConversionToFromTextYson<double>(0.0);
+ CheckEqualConversionToFromTextYson<double>(-0.0);
+ CheckEqualConversionToFromTextYson<double>(-7.7777);
+ CheckEqualConversionToFromTextYson<double>(+9.243);
+}
+
+TEST(TTextYsonConvertTest, ConvertFromTextFloatsSpecialValues)
+{
+ CheckEqualConversionToFromTextYson<double>(std::numeric_limits<double>::min());
+ CheckEqualConversionToFromTextYson<double>(std::numeric_limits<double>::max());
+ CheckEqualConversionToFromTextYson<double>(std::numeric_limits<double>::infinity());
+ CheckEqualConversionToFromTextYson<double>(-std::numeric_limits<double>::infinity());
+
+ // nans do not compare.
+ // CheckEqualConversionFromTextYson<double>(std::numeric_limits<double>::quiet_NaN());
+}
+
+TEST(TTextYsonConvertTest, ConvertFromTextOtherPrimitiveTypes)
+{
+ CheckEqualConversionToTextYson<bool>(true);
+ CheckEqualConversionToTextYson<bool>(false);
+ CheckEqualConversionToTextYson<bool>("true");
+ CheckEqualConversionToTextYson<bool>("false");
+ CheckEqualConversionToTextYson<bool>("0");
+ CheckEqualConversionToTextYson<bool>("1");
+
+ CheckEqualConversionToTextYson<TInstant>(TInstant::Now());
+ CheckEqualConversionToTextYson<TInstant>(TInstant::Zero());
+ CheckEqualConversionToTextYson<TInstant>(TInstant::FromValue(42));
+
+ CheckEqualConversionToTextYson<TDuration>(TDuration::Zero());
+ CheckEqualConversionToTextYson<TDuration>(TDuration::Seconds(2));
+ CheckEqualConversionToTextYson<TDuration>(TDuration::MilliSeconds(123));
+ CheckEqualConversionToTextYson<TDuration>(TDuration::MicroSeconds(12));
+
+ CheckEqualConversionToTextYson<std::string>("Hello, world!");
+ CheckEqualConversionToTextYson<std::string>("This is a so-called \"quotation marks\" test");
+ CheckEqualConversionToTextYson<std::string>("This tests \r other \b hidden symbols \n");
+ CheckEqualConversionToTextYson<std::string>("And this one tests special numbers numbers \x012");
+
+ CheckEqualConversionToTextYson<TGuid>(TGuid::Create());
+}
+
+TEST(TTextYsonConvertTest, ConvertFromTextIntegralsTypeMissmatch)
+{
+ CheckEqualConversionToFromTextYson<i8>(static_cast<ui64>(+100));
+ CheckEqualConversionToFromTextYson<i8>(static_cast<ui64>(0));
+ CheckEqualConversionToFromTextYson<i32>(static_cast<ui64>(+100));
+ CheckEqualConversionToFromTextYson<i32>(static_cast<ui64>(0));
+ CheckEqualConversionToFromTextYson<i64>(static_cast<ui64>(+100));
+ CheckEqualConversionToFromTextYson<i64>(static_cast<ui64>(0));
+}
+
+TEST(TTextYsonConvertTest, ConvertFromTextTypeMissmatch)
+{
+ CheckEqualConversionFromTextYson<bool>("%true");
+ CheckEqualConversionFromTextYson<bool>("%false");
+ CheckEqualConversionFromTextYson<bool>("1");
+ CheckEqualConversionFromTextYson<bool>("0");
+ CheckEqualConversionFromTextYson<bool>("-0");
+ CheckEqualConversionFromTextYson<bool>("1u");
+ CheckEqualConversionFromTextYson<bool>("0u");
+
+ CheckEqualConversionFromTextYson<bool>(ConvertToTextYsonString("true").AsStringBuf());
+ CheckEqualConversionFromTextYson<bool>(ConvertToTextYsonString("false").AsStringBuf());
+ CheckEqualConversionFromTextYson<bool>(ConvertToTextYsonString("1").AsStringBuf());
+ CheckEqualConversionFromTextYson<bool>(ConvertToTextYsonString("0").AsStringBuf());
+}
+
+TEST(TTextYsonConvertTest, ConvertFromTextYsonStringThrowBasicCases)
+{
+ auto fromPayload = [] (const auto& value) {
+ return NYson::TYsonString(TString(value));
+ };
+
+ // Overflow.
+ EXPECT_ANY_THROW(ConvertFromTextYsonString<i8>(fromPayload("123123123213")));
+ EXPECT_ANY_THROW(ConvertTo<i8>(fromPayload("123123123213")));
+
+ // Negative.
+ EXPECT_ANY_THROW(ConvertFromTextYsonString<ui64>(fromPayload("-123")));
+ EXPECT_ANY_THROW(ConvertTo<ui64>(fromPayload("-123")));
+
+ // Non-numeric.
+ EXPECT_ANY_THROW(ConvertFromTextYsonString<i64>(fromPayload("haha")));
+ EXPECT_ANY_THROW(ConvertFromTextYsonString<i64>(fromPayload("123qq")));
+ EXPECT_ANY_THROW(ConvertFromTextYsonString<i64>(fromPayload("-123u")));
+ EXPECT_ANY_THROW(ConvertTo<i64>(fromPayload("haha")));
+ EXPECT_ANY_THROW(ConvertTo<i64>(fromPayload("123qq")));
+ EXPECT_ANY_THROW(ConvertTo<i64>(fromPayload("-123u")));
+
+ // Big positive to bool
+ EXPECT_ANY_THROW(ConvertFromTextYsonString<bool>(fromPayload("42")));
+ EXPECT_ANY_THROW(ConvertTo<bool>(fromPayload("42")));
+
+ // Garbage to bool
+ EXPECT_ANY_THROW(ConvertFromTextYsonString<bool>(fromPayload("%falsse")));
+ EXPECT_ANY_THROW(ConvertTo<bool>(fromPayload("%falsse")));
+
+ // Wrong string to bool
+ EXPECT_ANY_THROW(ConvertFromTextYsonString<bool>(fromPayload("\"True\"")));
+ EXPECT_ANY_THROW(ConvertFromTextYsonString<bool>(fromPayload("\"False\"")));
+ EXPECT_ANY_THROW(ConvertFromTextYsonString<bool>(fromPayload("\"1u\"")));
+ EXPECT_ANY_THROW(ConvertFromTextYsonString<bool>(fromPayload("\"0u\"")));
+ EXPECT_ANY_THROW(ConvertTo<bool>(fromPayload("\"True\"")));
+ EXPECT_ANY_THROW(ConvertTo<bool>(fromPayload("\"False\"")));
+ EXPECT_ANY_THROW(ConvertTo<bool>(fromPayload("\"1u\"")));
+ EXPECT_ANY_THROW(ConvertTo<bool>(fromPayload("\"0u\"")));
+
+ // Wrong string to string
+ EXPECT_ANY_THROW(ConvertFromTextYsonString<std::string>(fromPayload("")));
+ EXPECT_ANY_THROW(ConvertFromTextYsonString<std::string>(fromPayload("\"")));
+ EXPECT_ANY_THROW(ConvertFromTextYsonString<std::string>(fromPayload("haha\"")));
+ EXPECT_ANY_THROW(ConvertFromTextYsonString<std::string>(fromPayload("\'oops\'")));
+ EXPECT_ANY_THROW(ConvertTo<std::string>(fromPayload("")));
+ EXPECT_ANY_THROW(ConvertTo<std::string>(fromPayload("\"")));
+ EXPECT_ANY_THROW(ConvertTo<std::string>(fromPayload("haha\"")));
+ EXPECT_ANY_THROW(ConvertTo<std::string>(fromPayload("\'oops\'")));
+
+ // Wrong literal to double
+ EXPECT_ANY_THROW(ConvertFromTextYsonString<double>(fromPayload("%%")));
+ EXPECT_ANY_THROW(ConvertFromTextYsonString<std::string>(fromPayload("%42inf")));
+ EXPECT_ANY_THROW(ConvertFromTextYsonString<std::string>(fromPayload("%NaaN")));
+ EXPECT_ANY_THROW(ConvertTo<double>(fromPayload("%%")));
+ EXPECT_ANY_THROW(ConvertTo<std::string>(fromPayload("%42inf")));
+ EXPECT_ANY_THROW(ConvertTo<std::string>(fromPayload("%NaaN")));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+} // namespace
+} // namespace NYT::NYTree
diff --git a/yt/yt/core/ytree/unittests/ya.make b/yt/yt/core/ytree/unittests/ya.make
index 7196cea98c..1bc54b95aa 100644
--- a/yt/yt/core/ytree/unittests/ya.make
+++ b/yt/yt/core/ytree/unittests/ya.make
@@ -10,6 +10,7 @@ SRCS(
resolver_ut.cpp
serialize_ut.cpp
service_combiner_ut.cpp
+ text_yson_convert_ut.cpp
tree_builder_ut.cpp
lazy_ypath_service_ut.cpp
yson_schema_ut.cpp
diff --git a/yt/yt/core/ytree/ypath_client.h b/yt/yt/core/ytree/ypath_client.h
index 08cb92c5a8..b9b2200baa 100644
--- a/yt/yt/core/ytree/ypath_client.h
+++ b/yt/yt/core/ytree/ypath_client.h
@@ -12,6 +12,8 @@
#include <library/cpp/yt/memory/ref.h>
+#include <library/cpp/yt/logging/logger.h>
+
namespace NYT::NYTree {
////////////////////////////////////////////////////////////////////////////////
diff --git a/yt/yt/library/backtrace_introspector/http/handler.cpp b/yt/yt/library/backtrace_introspector/http/handler.cpp
deleted file mode 100644
index fe3cb65564..0000000000
--- a/yt/yt/library/backtrace_introspector/http/handler.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-#include "handler.h"
-
-#include <yt/yt/core/http/server.h>
-
-#include <yt/yt/core/concurrency/action_queue.h>
-
-#include <yt/yt/library/backtrace_introspector/introspect.h>
-
-namespace NYT::NBacktraceIntrospector {
-
-using namespace NHttp;
-using namespace NConcurrency;
-
-////////////////////////////////////////////////////////////////////////////////
-
-class THandlerBase
- : public IHttpHandler
-{
-public:
- void HandleRequest(const IRequestPtr& /*req*/, const IResponseWriterPtr& rsp) override
- {
- try {
- static const auto queue = New<TActionQueue>("BacktraceIntro");
- auto dumpFuture = BIND(&THandlerBase::Dump, MakeStrong(this))
- .AsyncVia(queue->GetInvoker())
- .Run();
-
- auto dump = WaitFor(dumpFuture)
- .ValueOrThrow();
-
- WaitFor(rsp->WriteBody(TSharedRef::FromString(dump)))
- .ThrowOnError();
-
- WaitFor(rsp->Close())
- .ThrowOnError();
- } catch (const std::exception& ex) {
- if (!rsp->AreHeadersFlushed()) {
- rsp->SetStatus(EStatusCode::InternalServerError);
- WaitFor(rsp->WriteBody(TSharedRef::FromString(ex.what())))
- .ThrowOnError();
- }
- throw;
- }
- }
-
-protected:
- virtual TString Dump() = 0;
-};
-
-class TThreadsHandler
- : public THandlerBase
-{
-private:
- TString Dump() override
- {
- return FormatIntrospectionInfos(IntrospectThreads());
- }
-};
-
-class TFibersHandler
- : public THandlerBase
-{
-private:
- TString Dump() override
- {
- return FormatIntrospectionInfos(IntrospectFibers());
- }
-};
-
-void Register(
- const IRequestPathMatcherPtr& handlers,
- const TString& prefix)
-{
- handlers->Add(prefix + "/threads", New<TThreadsHandler>());
- handlers->Add(prefix + "/fibers", New<TFibersHandler>());
-}
-
-void Register(
- const IServerPtr& server,
- const TString& prefix)
-{
- Register(server->GetPathMatcher(), prefix);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NBacktraceIntrospector
diff --git a/yt/yt/library/backtrace_introspector/http/handler.h b/yt/yt/library/backtrace_introspector/http/handler.h
deleted file mode 100644
index be795b7e5d..0000000000
--- a/yt/yt/library/backtrace_introspector/http/handler.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#pragma once
-
-#include <yt/yt/core/http/public.h>
-
-namespace NYT::NBacktraceIntrospector {
-
-////////////////////////////////////////////////////////////////////////////////
-
-//! Registers introspector handlers.
-void Register(
- const NHttp::IRequestPathMatcherPtr& handlers,
- const TString& prefix = {});
-
-void Register(
- const NHttp::IServerPtr& server,
- const TString& prefix = {});
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NBacktraceIntrospector
diff --git a/yt/yt/library/backtrace_introspector/http/ya.make b/yt/yt/library/backtrace_introspector/http/ya.make
deleted file mode 100644
index 504d20a2e3..0000000000
--- a/yt/yt/library/backtrace_introspector/http/ya.make
+++ /dev/null
@@ -1,16 +0,0 @@
-LIBRARY()
-
-INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
-
-SRCS(
- handler.cpp
-)
-
-PEERDIR(
- yt/yt/core
- yt/yt/core/http
-
- yt/yt/library/backtrace_introspector
-)
-
-END()
diff --git a/yt/yt/library/backtrace_introspector/introspect.cpp b/yt/yt/library/backtrace_introspector/introspect.cpp
deleted file mode 100644
index cfbd24a246..0000000000
--- a/yt/yt/library/backtrace_introspector/introspect.cpp
+++ /dev/null
@@ -1,224 +0,0 @@
-#include "introspect.h"
-
-#include "private.h"
-
-#include <yt/yt/core/misc/collection_helpers.h>
-#include <yt/yt/core/misc/finally.h>
-#include <yt/yt/core/misc/proc.h>
-
-#include <yt/yt/core/concurrency/fiber.h>
-#include <yt/yt/core/concurrency/scheduler_api.h>
-
-#include <yt/yt/core/tracing/trace_context.h>
-
-#include <library/cpp/yt/memory/safe_memory_reader.h>
-
-#include <library/cpp/yt/backtrace/backtrace.h>
-
-#include <library/cpp/yt/backtrace/cursors/libunwind/libunwind_cursor.h>
-
-#include <library/cpp/yt/backtrace/cursors/frame_pointer/frame_pointer_cursor.h>
-
-#include <library/cpp/yt/backtrace/cursors/interop/interop.h>
-
-#include <util/system/yield.h>
-
-namespace NYT::NBacktraceIntrospector {
-
-using namespace NConcurrency;
-using namespace NThreading;
-using namespace NTracing;
-using namespace NBacktrace;
-
-////////////////////////////////////////////////////////////////////////////////
-
-static constexpr auto& Logger = BacktraceIntrospectorLogger;
-
-////////////////////////////////////////////////////////////////////////////////
-
-std::vector<TFiberIntrospectionInfo> IntrospectFibers()
-{
- YT_LOG_INFO("Fiber introspection started");
-
- YT_LOG_INFO("Collecting waiting fibers backtraces");
-
- std::vector<TFiberIntrospectionInfo> infos;
- THashSet<TFiberId> waitingFiberIds;
- THashMap<TFiberId, EFiberState> fiberStates;
-
- auto introspectionAction = [&] (NYT::NConcurrency::TFiber::TFiberList& fibers) {
- for (auto& fiberRef : fibers) {
- auto* fiber = fiberRef.AsFiber();
-
- auto fiberId = fiber->GetFiberId();
- if (fiberId == InvalidFiberId) {
- continue;
- }
-
- EmplaceOrCrash(fiberStates, fiberId, EFiberState::Introspecting);
-
- EFiberState state;
-
- auto onIntrospectionLockAcquired = [&] {
- YT_LOG_DEBUG("Waiting fiber is successfully locked for introspection (FiberId: %x)",
- fiberId);
-
- const auto& propagatingStorage = *NConcurrency::TryGetPropagatingStorage(*fiber->GetFls());
- const auto* traceContext = TryGetTraceContextFromPropagatingStorage(propagatingStorage);
-
- TFiberIntrospectionInfo info{
- .State = EFiberState::Waiting,
- .FiberId = fiberId,
- .WaitingSince = fiber->GetWaitingSince(),
- .TraceId = traceContext ? traceContext->GetTraceId() : TTraceId(),
- .TraceLoggingTag = traceContext ? traceContext->GetLoggingTag() : TString(),
- };
-
- auto optionalContext = TrySynthesizeLibunwindContextFromMachineContext(*fiber->GetMachineContext());
- if (!optionalContext) {
- YT_LOG_WARNING("Failed to synthesize libunwind context (FiberId: %x)",
- fiberId);
- return;
- }
-
- TLibunwindCursor cursor(*optionalContext);
- while (!cursor.IsFinished()) {
- info.Backtrace.push_back(cursor.GetCurrentIP());
- cursor.MoveNext();
- }
-
- infos.push_back(std::move(info));
- InsertOrCrash(waitingFiberIds, fiberId);
-
- YT_LOG_DEBUG("Fiber introspection completed (FiberId: %x)",
- info.FiberId);
- };
- if (!fiber->TryLockForIntrospection(&state, onIntrospectionLockAcquired)) {
- YT_LOG_DEBUG("Failed to lock fiber for introspection (FiberId: %x, State: %v)",
- fiberId,
- state);
- fiberStates[fiberId] = state;
- }
- }
- };
-
- TFiber::ReadFibers(introspectionAction);
-
- YT_LOG_INFO("Collecting running fibers backtraces");
-
- THashSet<TFiberId> runningFiberIds;
- for (auto& info : IntrospectThreads()) {
- if (info.FiberId == InvalidFiberId) {
- continue;
- }
-
- if (waitingFiberIds.contains(info.FiberId)) {
- continue;
- }
-
- if (!runningFiberIds.insert(info.FiberId).second) {
- continue;
- }
-
- infos.push_back(TFiberIntrospectionInfo{
- .State = EFiberState::Running,
- .FiberId = info.FiberId,
- .ThreadId = info.ThreadId,
- .ThreadName = std::move(info.ThreadName),
- .TraceId = info.TraceId,
- .TraceLoggingTag = std::move(info.TraceLoggingTag),
- .Backtrace = std::move(info.Backtrace),
- });
- }
-
- for (const auto& [fiberId, fiberState] : fiberStates) {
- if (fiberId == InvalidFiberId) {
- continue;
- }
- if (runningFiberIds.contains(fiberId)) {
- continue;
- }
- if (waitingFiberIds.contains(fiberId)) {
- continue;
- }
-
- infos.push_back(TFiberIntrospectionInfo{
- .State = fiberState,
- .FiberId = fiberId,
- });
- }
-
- YT_LOG_INFO("Fiber introspection completed");
-
- return infos;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-namespace {
-
-void FormatBacktrace(TStringBuilder* builder, const std::vector<const void*>& backtrace)
-{
- if (!backtrace.empty()) {
- builder->AppendString("Backtrace:\n");
- SymbolizeBacktrace(
- TRange(backtrace),
- [&] (TStringBuf str) {
- builder->AppendFormat(" %v", str);
- });
- }
-}
-
-} // namespace
-
-TString FormatIntrospectionInfos(const std::vector<TThreadIntrospectionInfo>& infos)
-{
- TStringBuilder builder;
- for (const auto& info : infos) {
- builder.AppendFormat("Thread id: %v\n", info.ThreadId);
- builder.AppendFormat("Thread name: %v\n", info.ThreadName);
- if (info.FiberId != InvalidFiberId) {
- builder.AppendFormat("Fiber id: %x\n", info.FiberId);
- }
- if (info.TraceId) {
- builder.AppendFormat("Trace id: %v\n", info.TraceId);
- }
- if (info.TraceLoggingTag) {
- builder.AppendFormat("Trace logging tag: %v\n", info.TraceLoggingTag);
- }
- FormatBacktrace(&builder, info.Backtrace);
- builder.AppendString("\n");
- }
- return builder.Flush();
-}
-
-TString FormatIntrospectionInfos(const std::vector<TFiberIntrospectionInfo>& infos)
-{
- TStringBuilder builder;
- for (const auto& info : infos) {
- builder.AppendFormat("Fiber id: %x\n", info.FiberId);
- builder.AppendFormat("State: %v\n", info.State);
- if (info.WaitingSince) {
- builder.AppendFormat("Waiting since: %v\n", info.WaitingSince);
- }
- if (info.ThreadId != InvalidThreadId) {
- builder.AppendFormat("Thread id: %v\n", info.ThreadId);
- }
- if (!info.ThreadName.empty()) {
- builder.AppendFormat("Thread name: %v\n", info.ThreadName);
- }
- if (info.TraceId) {
- builder.AppendFormat("Trace id: %v\n", info.TraceId);
- }
- if (info.TraceLoggingTag) {
- builder.AppendFormat("Trace logging tag: %v\n", info.TraceLoggingTag);
- }
- FormatBacktrace(&builder, info.Backtrace);
- builder.AppendString("\n");
- }
- return builder.Flush();
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NBacktraceIntrospector
diff --git a/yt/yt/library/backtrace_introspector/introspect.h b/yt/yt/library/backtrace_introspector/introspect.h
deleted file mode 100644
index 2be09d2ec8..0000000000
--- a/yt/yt/library/backtrace_introspector/introspect.h
+++ /dev/null
@@ -1,57 +0,0 @@
-#pragma once
-
-#include "public.h"
-
-#include <yt/yt/core/concurrency/public.h>
-
-#include <yt/yt/core/threading/public.h>
-
-#include <yt/yt/core/tracing/public.h>
-
-namespace NYT::NBacktraceIntrospector {
-
-////////////////////////////////////////////////////////////////////////////////
-// Thread introspection API
-
-struct TThreadIntrospectionInfo
-{
- NThreading::TThreadId ThreadId;
- NConcurrency::TFiberId FiberId;
- TString ThreadName;
- NTracing::TTraceId TraceId;
- //! Empty if no trace context is known.
- TString TraceLoggingTag;
- std::vector<const void*> Backtrace;
-};
-
-std::vector<TThreadIntrospectionInfo> IntrospectThreads();
-
-////////////////////////////////////////////////////////////////////////////////
-// Fiber introspection API
-
-struct TFiberIntrospectionInfo
-{
- NConcurrency::EFiberState State;
- NConcurrency::TFiberId FiberId;
- //! Zero if fiber is not waiting.
- TInstant WaitingSince;
- //! |InvalidThreadId| is fiber is not running.
- NThreading::TThreadId ThreadId;
- //! Empty if fiber is not running.
- TString ThreadName;
- NTracing::TTraceId TraceId;
- //! Empty if no trace context is known.
- TString TraceLoggingTag;
- std::vector<const void*> Backtrace;
-};
-
-std::vector<TFiberIntrospectionInfo> IntrospectFibers();
-
-////////////////////////////////////////////////////////////////////////////////
-
-TString FormatIntrospectionInfos(const std::vector<TThreadIntrospectionInfo>& infos);
-TString FormatIntrospectionInfos(const std::vector<TFiberIntrospectionInfo>& infos);
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NBacktraceIntrospector
diff --git a/yt/yt/library/backtrace_introspector/introspect_dummy.cpp b/yt/yt/library/backtrace_introspector/introspect_dummy.cpp
deleted file mode 100644
index e29293c7f5..0000000000
--- a/yt/yt/library/backtrace_introspector/introspect_dummy.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-#include "introspect.h"
-
-namespace NYT::NBacktraceIntrospector {
-
-////////////////////////////////////////////////////////////////////////////////
-
-std::vector<TThreadIntrospectionInfo> IntrospectThreads()
-{
- return {};
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NBacktraceIntrospector
diff --git a/yt/yt/library/backtrace_introspector/introspect_linux.cpp b/yt/yt/library/backtrace_introspector/introspect_linux.cpp
deleted file mode 100644
index f2fdf1e8c2..0000000000
--- a/yt/yt/library/backtrace_introspector/introspect_linux.cpp
+++ /dev/null
@@ -1,217 +0,0 @@
-#include "introspect.h"
-
-#include "private.h"
-
-#include <yt/yt/core/misc/finally.h>
-#include <yt/yt/core/misc/proc.h>
-
-#include <yt/yt/core/concurrency/fiber.h>
-#include <yt/yt/core/concurrency/scheduler_api.h>
-
-#include <yt/yt/core/tracing/trace_context.h>
-
-#include <library/cpp/yt/memory/safe_memory_reader.h>
-
-#include <library/cpp/yt/backtrace/backtrace.h>
-
-#include <library/cpp/yt/backtrace/cursors/libunwind/libunwind_cursor.h>
-
-#include <library/cpp/yt/backtrace/cursors/frame_pointer/frame_pointer_cursor.h>
-
-#include <library/cpp/yt/backtrace/cursors/interop/interop.h>
-
-#include <library/cpp/yt/misc/thread_name.h>
-
-#include <util/system/yield.h>
-
-#include <sys/syscall.h>
-
-namespace NYT::NBacktraceIntrospector {
-
-using namespace NConcurrency;
-using namespace NTracing;
-using namespace NBacktrace;
-
-////////////////////////////////////////////////////////////////////////////////
-
-static constexpr auto& Logger = BacktraceIntrospectorLogger;
-
-////////////////////////////////////////////////////////////////////////////////
-
-namespace {
-
-struct TStaticString
-{
- TStaticString() = default;
-
- explicit TStaticString(TStringBuf str)
- {
- Length = std::min(std::ssize(str), std::ssize(Buffer));
- std::copy(str.data(), str.data() + Length, Buffer.data());
- }
-
- operator TString() const
- {
- return TString(Buffer.data(), static_cast<size_t>(Length));
- }
-
- std::array<char, 256> Buffer;
- int Length = 0;
-};
-
-struct TStaticBacktrace
-{
- operator std::vector<const void*>() const
- {
- return std::vector<const void*>(Frames.data(), Frames.data() + FrameCount);
- }
-
- std::array<const void*, 100> Frames;
- int FrameCount = 0;
-};
-
-struct TSignalHandlerContext
-{
- TSignalHandlerContext();
- ~TSignalHandlerContext();
-
- std::atomic<bool> Finished = false;
-
- TFiberId FiberId = {};
- TTraceId TraceId = {};
- TStaticString TraceLoggingTag;
- TStaticBacktrace Backtrace;
- TThreadName ThreadName = {};
-
- TSafeMemoryReader* MemoryReader = Singleton<TSafeMemoryReader>();
-
- void SetFinished()
- {
- Finished.store(true);
- }
-
- void WaitUntilFinished()
- {
- while (!Finished.load()) {
- ThreadYield();
- }
- }
-};
-
-static TSignalHandlerContext* SignalHandlerContext;
-
-TSignalHandlerContext::TSignalHandlerContext()
-{
- YT_VERIFY(!SignalHandlerContext);
- SignalHandlerContext = this;
-}
-
-TSignalHandlerContext::~TSignalHandlerContext()
-{
- YT_VERIFY(SignalHandlerContext == this);
- SignalHandlerContext = nullptr;
-}
-
-void SignalHandler(int sig, siginfo_t* /*info*/, void* threadContext)
-{
- YT_VERIFY(sig == SIGUSR1);
-
- SignalHandlerContext->FiberId = GetCurrentFiberId();
- SignalHandlerContext->ThreadName = GetCurrentThreadName();
- if (const auto* traceContext = TryGetCurrentTraceContext()) {
- SignalHandlerContext->TraceId = traceContext->GetTraceId();
- SignalHandlerContext->TraceLoggingTag = TStaticString(traceContext->GetLoggingTag());
- }
-
- auto cursorContext = FramePointerCursorContextFromUcontext(*static_cast<const ucontext_t*>(threadContext));
- TFramePointerCursor cursor(SignalHandlerContext->MemoryReader, cursorContext);
- while (!cursor.IsFinished() && SignalHandlerContext->Backtrace.FrameCount < std::ssize(SignalHandlerContext->Backtrace.Frames)) {
- SignalHandlerContext->Backtrace.Frames[SignalHandlerContext->Backtrace.FrameCount++] = cursor.GetCurrentIP();
- cursor.MoveNext();
- }
-
- SignalHandlerContext->SetFinished();
-}
-
-} // namespace
-
-std::vector<TThreadIntrospectionInfo> IntrospectThreads()
-{
- static std::atomic<bool> IntrospectionLock;
-
- if (IntrospectionLock.exchange(true)) {
- THROW_ERROR_EXCEPTION("Thread introspection is already in progress");
- }
-
- auto introspectionLockGuard = Finally([] {
- YT_VERIFY(IntrospectionLock.exchange(false));
- });
-
- YT_LOG_INFO("Thread introspection started");
-
- {
- struct sigaction action;
- action.sa_flags = SA_SIGINFO | SA_RESTART;
- ::sigemptyset(&action.sa_mask);
- action.sa_sigaction = SignalHandler;
-
- if (::sigaction(SIGUSR1, &action, nullptr) != 0) {
- THROW_ERROR_EXCEPTION("Failed to install signal handler")
- << TError::FromSystem();
- }
- }
-
- std::vector<TThreadIntrospectionInfo> infos;
- for (auto threadId : GetCurrentProcessThreadIds()) {
- if (!IsUserspaceThread(threadId)) {
- YT_LOG_DEBUG("Skipping a non-userspace thread (ThreadId: %v)",
- threadId);
- continue;
- }
-
- TSignalHandlerContext signalHandlerContext;
- if (::syscall(SYS_tkill, threadId, SIGUSR1) != 0) {
- YT_LOG_DEBUG(TError::FromSystem(), "Failed to signal to thread (ThreadId: %v)",
- threadId);
- continue;
- }
-
- YT_LOG_DEBUG("Sent signal to thread (ThreadId: %v)",
- threadId);
-
- signalHandlerContext.WaitUntilFinished();
-
- YT_LOG_DEBUG("Signal handler finished (ThreadId: %v, FiberId: %x)",
- threadId,
- signalHandlerContext.FiberId);
-
- infos.push_back(TThreadIntrospectionInfo{
- .ThreadId = threadId,
- .FiberId = signalHandlerContext.FiberId,
- .ThreadName = TString(signalHandlerContext.ThreadName.Buffer.data(), static_cast<size_t>(signalHandlerContext.ThreadName.Length)),
- .TraceId = signalHandlerContext.TraceId,
- .TraceLoggingTag = signalHandlerContext.TraceLoggingTag,
- .Backtrace = signalHandlerContext.Backtrace,
- });
- }
-
- {
- struct sigaction action;
- action.sa_flags = SA_RESTART;
- ::sigemptyset(&action.sa_mask);
- action.sa_handler = SIG_IGN;
-
- if (::sigaction(SIGUSR1, &action, nullptr) != 0) {
- THROW_ERROR_EXCEPTION("Failed to de-install signal handler")
- << TError::FromSystem();
- }
- }
-
- YT_LOG_INFO("Thread introspection completed");
-
- return infos;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NBacktraceIntrospector
diff --git a/yt/yt/library/backtrace_introspector/private.h b/yt/yt/library/backtrace_introspector/private.h
deleted file mode 100644
index 3f99c307a5..0000000000
--- a/yt/yt/library/backtrace_introspector/private.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#pragma once
-
-#include "public.h"
-
-#include <yt/yt/core/logging/log.h>
-
-namespace NYT::NBacktraceIntrospector {
-
-////////////////////////////////////////////////////////////////////////////////
-
-YT_DEFINE_GLOBAL(const NLogging::TLogger, BacktraceIntrospectorLogger, "BacktraceIntrospector");
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NBacktraceIntrospector
-
diff --git a/yt/yt/library/backtrace_introspector/public.h b/yt/yt/library/backtrace_introspector/public.h
deleted file mode 100644
index 54a8bd06ed..0000000000
--- a/yt/yt/library/backtrace_introspector/public.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#pragma once
-
-namespace NYT::NBacktraceIntrospector {
-
-////////////////////////////////////////////////////////////////////////////////
-
-struct TThreadIntrospectionInfo;
-struct TFiberIntrospectionInfo;
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NBacktraceIntrospector
diff --git a/yt/yt/library/backtrace_introspector/unittests/introspect_ut.cpp b/yt/yt/library/backtrace_introspector/unittests/introspect_ut.cpp
deleted file mode 100644
index a939417958..0000000000
--- a/yt/yt/library/backtrace_introspector/unittests/introspect_ut.cpp
+++ /dev/null
@@ -1,198 +0,0 @@
-#include <yt/yt/core/test_framework/framework.h>
-
-#include <yt/yt/library/backtrace_introspector/introspect.h>
-
-#include <yt/yt/core/concurrency/action_queue.h>
-#include <yt/yt/core/concurrency/delayed_executor.h>
-
-#include <yt/yt/core/actions/bind.h>
-#include <yt/yt/core/actions/future.h>
-
-#include <yt/yt/core/tracing/trace_context.h>
-
-#include <yt/yt/core/logging/log.h>
-
-#include <yt/yt/core/misc/collection_helpers.h>
-
-namespace NYT::NBacktraceIntrospector {
-namespace {
-
-using namespace NConcurrency;
-using namespace NTracing;
-
-////////////////////////////////////////////////////////////////////////////////
-
-NLogging::TLogger Logger("Test");
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(TBacktraceIntrospectorTest, Fibers)
-{
- constexpr int HeavyQueueCount = 5;
- std::vector<TActionQueuePtr> heavyQueues;
- const TString HeavyThreadNamePrefix("Heavy:");
- for (int index = 0; index < HeavyQueueCount; ++index) {
- heavyQueues.push_back(New<TActionQueue>(HeavyThreadNamePrefix + ToString(index)));
- }
-
- constexpr int LightQueueCount = 3;
- std::vector<TActionQueuePtr> lightQueues;
- const TString LightThreadNamePrefix("Light:");
- for (int index = 0; index < LightQueueCount; ++index) {
- lightQueues.push_back(New<TActionQueue>(LightThreadNamePrefix + ToString(index)));
- }
-
- constexpr int HeavyCallbackCount = 3;
- std::vector<TTraceContextPtr> heavyTraceContexts;
- std::set<TTraceId> expectedHeavyTraceIds;
- for (int index = 0; index < HeavyCallbackCount; ++index) {
- auto traceContext = TTraceContext::NewRoot("Heavy");
- traceContext->SetLoggingTag(Format("HeavyLoggingTag:%v", index));
- heavyTraceContexts.push_back(traceContext);
- InsertOrCrash(expectedHeavyTraceIds, traceContext->GetTraceId());
- }
-
- std::vector<TFuture<void>> heavyFutures;
- for (int index = 0; index < HeavyCallbackCount; ++index) {
- heavyFutures.push_back(
- BIND([&, index] {
- TTraceContextGuard traceContextGuard(heavyTraceContexts[index]);
- YT_LOG_INFO("Heavy callback started (Index: %v)", index);
- Sleep(TDuration::Seconds(3));
- YT_LOG_INFO("Heavy callback finished (Index: %v)", index);
- })
- .AsyncVia(heavyQueues[index % HeavyQueueCount]->GetInvoker())
- .Run());
- }
-
- constexpr int LightCallbackCount = 10;
- std::vector<TTraceContextPtr> lightTraceContexts;
- std::set<TTraceId> expectedLightTraceIds;
- for (int index = 0; index < LightCallbackCount; ++index) {
- auto traceContext = TTraceContext::NewRoot("Light");
- traceContext->SetLoggingTag(Format("LightLoggingTag:%v", index));
- lightTraceContexts.push_back(traceContext);
- InsertOrCrash(expectedLightTraceIds, traceContext->GetTraceId());
- }
-
- std::vector<TFuture<void>> lightFutures;
- for (int index = 0; index < LightCallbackCount; ++index) {
- lightFutures.push_back(
- BIND([&, index] {
- TTraceContextGuard traceContextGuard(lightTraceContexts[index]);
- YT_LOG_INFO("Light callback started (Index: %v)", index);
- TDelayedExecutor::WaitForDuration(TDuration::Seconds(1));
- YT_LOG_INFO("Light callback finished (Index: %v)", index);
- })
- .AsyncVia(lightQueues[index % LightQueueCount]->GetInvoker())
- .Run());
- }
-
- Sleep(TDuration::MilliSeconds(100));
-
- auto infos = IntrospectFibers();
- Cerr << FormatIntrospectionInfos(infos);
-
- std::set<TTraceId> actualHeavyTraceIds;
- std::set<TTraceId> actualLightTraceIds;
- for (const auto& info : infos) {
- if (!info.TraceId) {
- continue;
- }
- switch (info.State) {
- case EFiberState::Running:
- EXPECT_TRUE(actualHeavyTraceIds.insert(info.TraceId).second);
- if (expectedHeavyTraceIds.contains(info.TraceId)) {
- EXPECT_TRUE(info.ThreadName.StartsWith(HeavyThreadNamePrefix));
- }
- break;
-
- case EFiberState::Waiting:
- EXPECT_TRUE(actualLightTraceIds.insert(info.TraceId).second);
- break;
-
- default:
- break;
- }
- }
-
- EXPECT_EQ(expectedLightTraceIds, actualLightTraceIds);
- EXPECT_EQ(expectedHeavyTraceIds, actualHeavyTraceIds);
-
- for (const auto& future : heavyFutures) {
- future.Get().ThrowOnError();
- }
-
- for (const auto& future : lightFutures) {
- future.Get().ThrowOnError();
- }
-
- for (const auto& queue : heavyQueues) {
- queue->Shutdown(/*graceful*/ true);
- }
- for (const auto& queue : lightQueues) {
- queue->Shutdown(/*graceful*/ true);
- }
-}
-
-TEST(TBacktraceIntrospectorTest, Threads)
-{
- constexpr int QueueCount = 5;
- std::vector<TActionQueuePtr> queues;
- const TString ThreadNamePrefix("Queue:");
- for (int index = 0; index < QueueCount; ++index) {
- queues.push_back(New<TActionQueue>(ThreadNamePrefix + ToString(index)));
- }
-
- constexpr int CallbackCount = 3;
- std::vector<TTraceContextPtr> traceContexts;
- std::set<TTraceId> expectedTraceIds;
- for (int index = 0; index < CallbackCount; ++index) {
- auto traceContext = TTraceContext::NewRoot("Heavy");
- traceContexts.push_back(traceContext);
- InsertOrCrash(expectedTraceIds, traceContext->GetTraceId());
- }
-
- std::vector<TFuture<void>> futures;
- for (int index = 0; index < CallbackCount; ++index) {
- futures.push_back(
- BIND([&, index] {
- TTraceContextGuard traceContextGuard(traceContexts[index]);
- YT_LOG_INFO("Callback started (Index: %v)", index);
- Sleep(TDuration::Seconds(3));
- YT_LOG_INFO("Callback finished (Index: %v)", index);
- })
- .AsyncVia(queues[index % QueueCount]->GetInvoker())
- .Run());
- }
-
- Sleep(TDuration::MilliSeconds(100));
-
- auto infos = IntrospectThreads();
- Cerr << FormatIntrospectionInfos(infos);
-
- std::set<TTraceId> actualTraceIds;
- for (const auto& info : infos) {
- if (!info.TraceId) {
- continue;
- }
- EXPECT_TRUE(actualTraceIds.insert(info.TraceId).second);
- if (expectedTraceIds.contains(info.TraceId)) {
- EXPECT_TRUE(info.ThreadName.StartsWith(ThreadNamePrefix));
- }
- }
-
- EXPECT_EQ(expectedTraceIds, actualTraceIds);
-
- for (const auto& future : futures) {
- future.Get().ThrowOnError();
- }
- for (const auto& queue : queues) {
- queue->Shutdown(/*graceful*/ true);
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace
-} // namespace NYT::NBacktraceIntrospector
diff --git a/yt/yt/library/backtrace_introspector/unittests/ya.make b/yt/yt/library/backtrace_introspector/unittests/ya.make
deleted file mode 100644
index 393215d01e..0000000000
--- a/yt/yt/library/backtrace_introspector/unittests/ya.make
+++ /dev/null
@@ -1,15 +0,0 @@
-GTEST()
-
-SRCS(
- introspect_ut.cpp
-)
-
-INCLUDE(${ARCADIA_ROOT}/yt/opensource.inc)
-
-PEERDIR(
- yt/yt/library/backtrace_introspector
-
- yt/yt/core/test_framework
-)
-
-END()
diff --git a/yt/yt/library/backtrace_introspector/ya.make b/yt/yt/library/backtrace_introspector/ya.make
deleted file mode 100644
index 884b8fb562..0000000000
--- a/yt/yt/library/backtrace_introspector/ya.make
+++ /dev/null
@@ -1,31 +0,0 @@
-LIBRARY()
-
-INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
-
-SRCS(
- introspect.cpp
-)
-IF (OS_LINUX)
- SRCS(introspect_linux.cpp)
-ELSE()
- SRCS(introspect_dummy.cpp)
-ENDIF()
-
-PEERDIR(
- yt/yt/core
-
- library/cpp/yt/backtrace/cursors/interop
- library/cpp/yt/backtrace/cursors/libunwind
- library/cpp/yt/backtrace/cursors/frame_pointer
- library/cpp/yt/misc
-)
-
-END()
-
-RECURSE(
- http
-)
-
-RECURSE_FOR_TESTS(
- unittests
-)
diff --git a/yt/yt/library/formats/arrow_parser.cpp b/yt/yt/library/formats/arrow_parser.cpp
index c3c169352b..7c112716e5 100644
--- a/yt/yt/library/formats/arrow_parser.cpp
+++ b/yt/yt/library/formats/arrow_parser.cpp
@@ -245,6 +245,7 @@ private:
void ParseSimpleNumeric(FuncType makeUnversionedValueFunc)
{
auto array = std::static_pointer_cast<ArrayType>(Array_);
+ YT_VERIFY(array->length() <= std::ssize(*RowValues_));
for (int rowIndex = 0; rowIndex < array->length(); ++rowIndex) {
if (array->IsNull(rowIndex)) {
(*RowValues_)[rowIndex] = MakeUnversionedNullValue(ColumnId_);
@@ -258,6 +259,7 @@ private:
arrow::Status ParseStringLikeArray(auto makeUnversionedValueFunc)
{
auto array = std::static_pointer_cast<ArrayType>(Array_);
+ YT_VERIFY(array->length() <= std::ssize(*RowValues_));
for (int rowIndex = 0; rowIndex < array->length(); ++rowIndex) {
if (array->IsNull(rowIndex)) {
(*RowValues_)[rowIndex] = MakeUnversionedNullValue(ColumnId_);
@@ -295,6 +297,7 @@ private:
arrow::Status ParseBoolean()
{
auto array = std::static_pointer_cast<arrow::BooleanArray>(Array_);
+ YT_VERIFY(array->length() <= std::ssize(*RowValues_));
for (int rowIndex = 0; rowIndex < array->length(); rowIndex++) {
if (array->IsNull(rowIndex)) {
(*RowValues_)[rowIndex] = MakeUnversionedNullValue(ColumnId_);
@@ -308,6 +311,7 @@ private:
arrow::Status ParseNull()
{
auto array = std::static_pointer_cast<arrow::NullArray>(Array_);
+ YT_VERIFY(array->length() <= std::ssize(*RowValues_));
for (int rowIndex = 0; rowIndex < array->length(); rowIndex++) {
(*RowValues_)[rowIndex] = MakeUnversionedNullValue(ColumnId_);
}
@@ -834,19 +838,21 @@ void PrepareArrayForSimpleLogicalType(
{
CheckMatchingArrowTypes(columnType, column);
if (column->type()->id() == arrow::Type::DICTIONARY) {
- auto dictionaryColumn = std::static_pointer_cast<arrow::DictionaryArray>(column);
- TUnversionedRowValues dictionaryValues(rowsValues[columnIndex].size());
- auto dictionaryValuesColumn = dictionaryColumn->dictionary();
- CheckMatchingArrowTypes(columnType, dictionaryValuesColumn);
+ auto dictionaryArrayColumn = std::static_pointer_cast<arrow::DictionaryArray>(column);
+ auto dictionary = dictionaryArrayColumn->dictionary();
+ TUnversionedRowValues dictionaryValues(dictionary->length());
+ CheckMatchingArrowTypes(columnType, dictionary);
- TArraySimpleVisitor visitor(columnType, columnId, dictionaryValuesColumn, bufferForStringLikeValues, &dictionaryValues);
- ThrowOnError(dictionaryColumn->dictionary()->type()->Accept(&visitor));
+ TArraySimpleVisitor visitor(columnType, columnId, dictionary, bufferForStringLikeValues, &dictionaryValues);
+ ThrowOnError(dictionaryArrayColumn->dictionary()->type()->Accept(&visitor));
for (int offset = 0; offset < std::ssize(rowsValues[columnIndex]); offset++) {
- if (dictionaryColumn->IsNull(offset)) {
+ if (dictionaryArrayColumn->IsNull(offset)) {
rowsValues[columnIndex][offset] = MakeUnversionedNullValue(columnId);
} else {
- rowsValues[columnIndex][offset] = dictionaryValues[dictionaryColumn->GetValueIndex(offset)];
+ auto dictionaryValueIndex = dictionaryArrayColumn->GetValueIndex(offset);
+ YT_VERIFY(dictionaryValueIndex < std::ssize(dictionaryValues));
+ rowsValues[columnIndex][offset] = dictionaryValues[dictionaryValueIndex];
}
}
} else {
diff --git a/yt/yt/library/formats/unittests/arrow_parser_ut.cpp b/yt/yt/library/formats/unittests/arrow_parser_ut.cpp
new file mode 100644
index 0000000000..4e960edb08
--- /dev/null
+++ b/yt/yt/library/formats/unittests/arrow_parser_ut.cpp
@@ -0,0 +1,690 @@
+#include <yt/yt/core/test_framework/framework.h>
+
+#include "row_helpers.h"
+
+#include <yt/yt/library/formats/arrow_parser.h>
+
+#include <yt/yt/client/formats/config.h>
+#include <yt/yt/client/formats/parser.h>
+#include <yt/yt/client/table_client/name_table.h>
+#include <yt/yt/client/table_client/validate_logical_type.h>
+#include <yt/yt/library/formats/format.h>
+
+#include <contrib/libs/apache/arrow/cpp/src/arrow/api.h>
+#include <contrib/libs/apache/arrow/cpp/src/arrow/io/api.h>
+#include <contrib/libs/apache/arrow/cpp/src/arrow/io/memory.h>
+#include <contrib/libs/apache/arrow/cpp/src/arrow/ipc/api.h>
+#include <contrib/libs/apache/arrow/cpp/src/parquet/arrow/writer.h>
+
+namespace NYT {
+
+namespace {
+
+using namespace NFormats;
+using namespace NTableClient;
+using namespace NYTree;
+using namespace NYson;
+
+using namespace std::string_literals;
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::string GetEos()
+{
+ std::string eos;
+ eos.assign(4, 0);
+ return eos;
+}
+
+void Verify(const arrow::Status& status)
+{
+ YT_VERIFY(status.ok());
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::string MakeOutputFromRecordBatch(const std::shared_ptr<arrow::RecordBatch>& recordBatch)
+{
+ auto outputStream = arrow::io::BufferOutputStream::Create().ValueOrDie();
+ auto arrowWriter = arrow::ipc::MakeStreamWriter(outputStream, recordBatch->schema()).ValueOrDie();
+ Verify(arrowWriter->WriteRecordBatch(*recordBatch));
+ auto buffer = outputStream->Finish().ValueOrDie();
+ return buffer->ToString();
+}
+
+std::string MakeIntegerArrow(const std::vector<int8_t>& data)
+{
+ arrow::Int8Builder builder;
+
+ for (const auto& value : data) {
+ Verify(builder.Append(value));
+ }
+
+ auto intArray = builder.Finish();
+
+ auto arrowSchema = arrow::schema({arrow::field("integer", arrow::int8())});
+ std::vector<std::shared_ptr<arrow::Array>> columns = {*intArray};
+ auto recordBatch = arrow::RecordBatch::Make(arrowSchema, columns[0]->length(), columns);
+ return MakeOutputFromRecordBatch(recordBatch);
+}
+
+std::string MakeOptionalIntegerArrow()
+{
+ arrow::Int8Builder builder;
+
+ Verify(builder.Append(1));
+ Verify(builder.AppendNull());
+ Verify(builder.AppendNull());
+
+ auto data = builder.Finish();
+
+ auto arrowSchema = arrow::schema({arrow::field("opt", arrow::int8())});
+ std::vector<std::shared_ptr<arrow::Array>> columns = {*data};
+ auto recordBatch = arrow::RecordBatch::Make(arrowSchema, columns[0]->length(), columns);
+ return MakeOutputFromRecordBatch(recordBatch);
+}
+
+std::string MakeBooleanArrow(const std::vector<bool>& data)
+{
+ arrow::BooleanBuilder builder;
+
+ for (const auto& value : data) {
+ Verify(builder.Append(value));
+ }
+
+ auto boolArray = builder.Finish();
+
+ auto arrowSchema = arrow::schema({arrow::field("bool", arrow::boolean())});
+ std::vector<std::shared_ptr<arrow::Array>> columns = {*boolArray};
+ auto recordBatch = arrow::RecordBatch::Make(arrowSchema, columns[0]->length(), columns);
+ return MakeOutputFromRecordBatch(recordBatch);
+}
+
+std::string MakeIntAndStringArrow(const std::vector<int8_t>& data, const std::vector<std::string>& stringData)
+{
+ arrow::Int8Builder builder;
+
+ for (const auto& value : data) {
+ Verify(builder.Append(value));
+ }
+ auto intArray = builder.Finish();
+
+ arrow::StringBuilder stringBuilder;
+
+ for (const auto& value : stringData) {
+ Verify(stringBuilder.Append(value));
+ }
+
+ auto stringArray = stringBuilder.Finish();
+
+ auto arrowSchema = arrow::schema({
+ arrow::field("integer", arrow::int8()),
+ arrow::field("string", arrow::binary()),
+ });
+
+ std::vector<std::shared_ptr<arrow::Array>> columns = {*intArray, *stringArray};
+ auto recordBatch = arrow::RecordBatch::Make(arrowSchema, columns[0]->length(), columns);
+
+ return MakeOutputFromRecordBatch(recordBatch);
+}
+
+std::string MakeIntListArrow(const std::vector<std::optional<std::vector<int32_t>>>& data)
+{
+ auto* pool = arrow::default_memory_pool();
+ auto valueBuilder = std::make_shared<arrow::Int32Builder>(pool);
+ auto listBuilder = std::make_unique<arrow::ListBuilder>(pool, valueBuilder);
+
+ for (const auto& list : data) {
+ if (list) {
+ Verify(listBuilder->Append());
+ for (const auto& value : *list) {
+ Verify(valueBuilder->Append(value));
+ }
+ } else {
+ Verify(listBuilder->AppendNull());
+ }
+ }
+
+ auto arrowSchema = arrow::schema({arrow::field("list", listBuilder->type())});
+
+ std::shared_ptr<arrow::Array> listArray;
+ Verify(listBuilder->Finish(&listArray));
+ std::vector<std::shared_ptr<arrow::Array>> columns = {listArray};
+
+ auto recordBatch = arrow::RecordBatch::Make(arrowSchema, columns[0]->length(), columns);
+
+ return MakeOutputFromRecordBatch(recordBatch);
+}
+
+std::string MakeStringListArrow(const std::vector<std::vector<std::string>>& data)
+{
+ auto* pool = arrow::default_memory_pool();
+
+ auto valueBuilder = std::make_shared<arrow::StringBuilder>(pool);
+ auto listBuilder = std::make_unique<arrow::ListBuilder>(pool, valueBuilder);
+
+ for (const auto& list : data) {
+ Verify(listBuilder->Append());
+ for (const auto& value : list) {
+ Verify(valueBuilder->Append(value));
+ }
+ }
+
+ auto arrowSchema = arrow::schema({arrow::field("list", listBuilder->type())});
+
+ std::shared_ptr<arrow::Array> listArray;
+ Verify(listBuilder->Finish(&listArray));
+ std::vector<std::shared_ptr<arrow::Array>> columns = {listArray};
+
+ auto recordBatch = arrow::RecordBatch::Make(arrowSchema, columns[0]->length(), columns);
+
+ return MakeOutputFromRecordBatch(recordBatch);
+}
+
+std::string MakeMapArrow(const std::vector<std::vector<int32_t>>& key, const std::vector<std::vector<int32_t>>& value)
+{
+ auto* pool = arrow::default_memory_pool();
+
+ auto keyBuilder = std::make_shared<arrow::Int32Builder>(pool);
+ auto valueBuilder = std::make_shared<arrow::Int32Builder>(pool);
+ auto mapBuilder = std::make_unique<arrow::MapBuilder>(pool, keyBuilder, valueBuilder);
+
+ for (ssize_t mapIndex = 0; mapIndex < std::ssize(key); mapIndex++) {
+ Verify(mapBuilder->Append());
+ for (int valueNumber = 0; valueNumber < std::ssize(key[mapIndex]); valueNumber++) {
+ Verify(keyBuilder->Append(key[mapIndex][valueNumber]));
+ Verify(valueBuilder->Append(value[mapIndex][valueNumber]));
+ }
+ }
+
+ auto arrowSchema = arrow::schema({arrow::field("map", mapBuilder->type())});
+
+ std::shared_ptr<arrow::Array> mapArray;
+ Verify(mapBuilder->Finish(&mapArray));
+ std::vector<std::shared_ptr<arrow::Array>> columns = {mapArray};
+
+ auto recordBatch = arrow::RecordBatch::Make(arrowSchema, columns[0]->length(), columns);
+
+ return MakeOutputFromRecordBatch(recordBatch);
+}
+
+std::string MakeDictionaryArrow(bool addExtraValues = false)
+{
+ auto* pool = arrow::default_memory_pool();
+
+ arrow::DictionaryBuilder<arrow::Int32Type> dictionaryBuilder(pool);
+
+ std::vector<int32_t> values = {1, 2, 1};
+
+ for (auto value : values) {
+ Verify(dictionaryBuilder.Append(value));
+ }
+
+ if (addExtraValues) {
+ arrow::Int32Builder builder;
+ Verify(builder.Append(3));
+ Verify(builder.Append(4));
+ Verify(builder.Append(5));
+ auto intArray = *builder.Finish();
+ Verify(dictionaryBuilder.InsertMemoValues(*intArray));
+ }
+
+ auto arrowSchema = arrow::schema({arrow::field("integer", dictionaryBuilder.type())});
+
+ std::shared_ptr<arrow::Array> array;
+ Verify(dictionaryBuilder.Finish(&array));
+
+ std::vector<std::shared_ptr<arrow::Array>> columns = {array};
+
+ auto recordBatch = arrow::RecordBatch::Make(arrowSchema, columns[0]->length(), columns);
+
+ return MakeOutputFromRecordBatch(recordBatch);
+}
+
+std::string MakeStructArrow(const std::vector<std::string>& stringData, const std::vector<int64_t>& intData)
+{
+ auto* pool = arrow::default_memory_pool();
+
+ auto stringBuilder = std::make_shared<arrow::StringBuilder>(pool);
+ auto intBuilder = std::make_shared<arrow::Int64Builder>(pool);
+
+ std::vector<std::shared_ptr<arrow::Field>> fields = {
+ std::make_shared<arrow::Field>("bar", std::make_shared<arrow::StringType>()),
+ std::make_shared<arrow::Field>("foo", std::make_shared<arrow::Int64Type>())
+ };
+
+ arrow::StructBuilder structBuilder(
+ std::make_shared<arrow::StructType>(fields),
+ pool,
+ {stringBuilder, intBuilder});
+
+ for (int index = 0; index < std::ssize(stringData); index++) {
+ Verify(structBuilder.Append());
+ Verify(stringBuilder->Append(stringData[index]));
+ Verify(intBuilder->Append(intData[index]));
+ }
+
+ std::shared_ptr<arrow::Schema> arrowSchema = arrow::schema({arrow::field("struct", structBuilder.type())});
+
+ std::shared_ptr<arrow::Array> structArray;
+ Verify(structBuilder.Finish(&structArray));
+ std::vector<std::shared_ptr<arrow::Array>> columns = {structArray};
+
+ auto recordBatch = arrow::RecordBatch::Make(arrowSchema, columns[0]->length(), columns);
+
+ return MakeOutputFromRecordBatch(recordBatch);
+}
+
+std::string MakeDecimalArrows(std::vector<TString> values, std::vector<std::tuple<int, int, int>> columnParameters)
+{
+ auto* pool = arrow::default_memory_pool();
+
+ auto makeColumn = [&]<class TBuilder, class TType, class TValue>(int precision, int scale) {
+ auto builder = std::make_shared<TBuilder>(std::make_shared<TType>(precision, scale), pool);
+ for (const auto& value : values) {
+ Verify(builder->Append(TValue(std::string(value))));
+ }
+ return builder->Finish().ValueOrDie();
+ };
+
+ std::vector<std::shared_ptr<arrow::Array>> columns;
+ for (const auto& [bitness, precision, scale] : columnParameters) {
+ if (bitness == 128) {
+ columns.push_back(makeColumn.template operator()<arrow::Decimal128Builder, arrow::Decimal128Type, arrow::Decimal128>(precision, scale));
+ } else if (bitness == 256) {
+ columns.push_back(makeColumn.template operator()<arrow::Decimal256Builder, arrow::Decimal256Type, arrow::Decimal256>(precision, scale));
+ } else {
+ YT_ABORT();
+ }
+ }
+
+ arrow::FieldVector fields;
+ for (const auto& [bitness, precision, scale] : columnParameters) {
+ std::shared_ptr<arrow::DataType> type;
+ if (bitness == 128) {
+ type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+ } else if (bitness == 256) {
+ type = std::make_shared<arrow::Decimal256Type>(precision, scale);
+ } else {
+ YT_ABORT();
+ }
+ fields.push_back(std::make_shared<arrow::Field>(Format("decimal%v_%v_%v", bitness, precision, scale), type));
+ }
+
+ auto recordBatch = arrow::RecordBatch::Make(arrow::schema(std::move(fields)), columns[0]->length(), columns);
+
+ return MakeOutputFromRecordBatch(recordBatch);
+}
+
+std::string MakeDecimalListArrow(std::vector<TString> values)
+{
+ // Create a single column with one value, which is a list containing all the #values.
+ // Type of the list is Decimal128(10, 3).
+ auto* pool = arrow::default_memory_pool();
+ auto decimalBuilder = std::make_shared<arrow::Decimal128Builder>(std::make_shared<arrow::Decimal128Type>(10, 3), pool);
+ auto listBuilder = std::make_unique<arrow::ListBuilder>(pool, decimalBuilder);
+
+ Verify(listBuilder->Append());
+ for (const auto& value : values) {
+ Verify(decimalBuilder->Append(arrow::Decimal128(std::string(value))));
+ }
+ std::shared_ptr<arrow::Array> listArray;
+ Verify(listBuilder->Finish(&listArray));
+ auto arrowSchema = arrow::schema({arrow::field("list", listArray->type())});
+ std::vector<std::shared_ptr<arrow::Array>> columns = {listArray};
+ auto recordBatch = arrow::RecordBatch::Make(arrowSchema, columns[0]->length(), columns);
+ return MakeOutputFromRecordBatch(recordBatch);
+}
+
+void TestArrowParserWithDictionary(bool addExtraValues = false)
+{
+ auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{
+ TColumnSchema("integer", EValueType::Int64)
+ });
+
+ TCollectingValueConsumer collectedRows(tableSchema);
+
+ auto parser = CreateParserForArrow(&collectedRows);
+
+ auto data = MakeDictionaryArrow(addExtraValues);
+ parser->Read(data);
+ parser->Finish();
+
+ ASSERT_EQ(collectedRows.Size(), 3u);
+
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(0, "integer")), 1);
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(1, "integer")), 2);
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(2, "integer")), 1);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TArrowParserTest, Simple)
+{
+ auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{
+ TColumnSchema("integer", EValueType::Int64)
+ });
+
+ TCollectingValueConsumer collectedRows(tableSchema);
+
+ auto parser = CreateParserForArrow(&collectedRows);
+
+ auto data = MakeIntegerArrow({1, 2, 3});
+ parser->Read(data);
+ parser->Finish();
+
+ ASSERT_EQ(collectedRows.Size(), 3u);
+
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(0, "integer")), 1);
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(1, "integer")), 2);
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(2, "integer")), 3);
+}
+
+TEST(TArrowParserTest, Optional)
+{
+ auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{
+ TColumnSchema("opt", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64)))
+ });
+
+ TCollectingValueConsumer collectedRows(tableSchema);
+
+ auto parser = CreateParserForArrow(&collectedRows);
+
+ auto data = MakeOptionalIntegerArrow();
+ parser->Read(data);
+ parser->Finish();
+
+ ASSERT_EQ(collectedRows.Size(), 3u);
+
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(0, "opt")), 1);
+ ASSERT_TRUE(IsNull(collectedRows.GetRowValue(1, "opt")));
+ ASSERT_TRUE(IsNull(collectedRows.GetRowValue(2, "opt")));
+}
+
+TEST(TArrowParserTest, Dictionary)
+{
+ TestArrowParserWithDictionary(false);
+ TestArrowParserWithDictionary(true);
+}
+
+TEST(TArrowParserTest, Bool)
+{
+ auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{
+ TColumnSchema("bool", EValueType::Boolean),
+ });
+
+ TCollectingValueConsumer collectedRows(tableSchema);
+
+ auto parser = CreateParserForArrow(&collectedRows);
+
+ auto data = MakeBooleanArrow({true, false, true});
+ parser->Read(data);
+ parser->Finish();
+
+ ASSERT_EQ(collectedRows.Size(), 3u);
+
+ ASSERT_EQ(GetBoolean(collectedRows.GetRowValue(0, "bool")), true);
+ ASSERT_EQ(GetBoolean(collectedRows.GetRowValue(1, "bool")), false);
+ ASSERT_EQ(GetBoolean(collectedRows.GetRowValue(2, "bool")), true);
+}
+
+TEST(TArrowParserTest, String)
+{
+ auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{
+ TColumnSchema("integer", EValueType::Any),
+ TColumnSchema("string", EValueType::String),
+ });
+
+ TCollectingValueConsumer collectedRows(tableSchema);
+
+ auto parser = CreateParserForArrow(&collectedRows);
+
+ auto data = MakeIntAndStringArrow({1, 2, 3}, {"foo", "bar", "yt"});
+ parser->Read(data);
+ parser->Finish();
+
+ ASSERT_EQ(collectedRows.Size(), 3u);
+
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(0, "integer")), 1);
+ ASSERT_EQ(GetString(collectedRows.GetRowValue(0, "string")), "foo");
+
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(1, "integer")), 2);
+ ASSERT_EQ(GetString(collectedRows.GetRowValue(1, "string")), "bar");
+
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(2, "integer")), 3);
+ ASSERT_EQ(GetString(collectedRows.GetRowValue(2, "string")), "yt");
+}
+
+
+TString ConvertToYsonTextStringStable(const INodePtr& node)
+{
+ TStringStream out;
+ TYsonWriter writer(&out, EYsonFormat::Text);
+ VisitTree(node, &writer, true, TAttributeFilter());
+ writer.Flush();
+ return out.Str();
+}
+
+TEST(TArrowParserTest, ListOfIntegers)
+{
+ auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{
+ TColumnSchema("list", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))),
+ });
+
+ TCollectingValueConsumer collectedRows(tableSchema);
+
+ auto parser = CreateParserForArrow(&collectedRows);
+
+ auto data = MakeIntListArrow({std::vector{1, 2, 3}, std::nullopt, std::vector{4, 5}});
+ parser->Read(data);
+ parser->Finish();
+
+ auto firstNode = GetComposite(collectedRows.GetRowValue(0, "list"));
+ ASSERT_EQ(ConvertToYsonTextStringStable(firstNode), "[1;2;3;]");
+
+ ASSERT_EQ(EValueType::Null, collectedRows.GetRowValue(1, "list").Type);
+
+ auto thirdNode = GetComposite(collectedRows.GetRowValue(2, "list"));
+ ASSERT_EQ(ConvertToYsonTextStringStable(thirdNode), "[4;5;]");
+}
+
+TEST(TArrowParserTest, ListOfStrings)
+{
+ auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{
+ TColumnSchema("list", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))),
+ });
+
+ TCollectingValueConsumer collectedRows(tableSchema);
+
+ auto parser = CreateParserForArrow(&collectedRows);
+
+ auto data = MakeStringListArrow({{"foo", "bar"}, {"42", "universe"}});
+ parser->Read(data);
+ parser->Finish();
+
+ auto firstNode = GetComposite(collectedRows.GetRowValue(0, "list"));
+ ASSERT_EQ(ConvertToYsonTextStringStable(firstNode), "[\"foo\";\"bar\";]");
+
+ auto secondNode = GetComposite(collectedRows.GetRowValue(1, "list"));
+ ASSERT_EQ(ConvertToYsonTextStringStable(secondNode), "[\"42\";\"universe\";]");
+}
+
+TEST(TArrowParserTest, Map)
+{
+ auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{
+ TColumnSchema(
+ "map",
+ DictLogicalType(
+ SimpleLogicalType(ESimpleLogicalValueType::Int64),
+ SimpleLogicalType(ESimpleLogicalValueType::Uint64))),
+ });
+
+ TCollectingValueConsumer collectedRows(tableSchema);
+
+ auto parser = CreateParserForArrow(&collectedRows);
+
+ auto data = MakeMapArrow({{1, 3}, {3}}, {{2, 2}, {2}});
+ parser->Read(data);
+ parser->Finish();
+
+ auto firstNode = GetComposite(collectedRows.GetRowValue(0, "map"));
+ ASSERT_EQ(ConvertToYsonTextStringStable(firstNode), "[[1;2;];[3;2;];]");
+
+ auto secondNode = GetComposite(collectedRows.GetRowValue(1, "map"));
+ ASSERT_EQ(ConvertToYsonTextStringStable(secondNode), "[[3;2;];]");
+}
+
+TEST(TArrowParserTest, SeveralIntArrays)
+{
+ auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{
+ TColumnSchema("integer", EValueType::Int64),
+ });
+
+ TCollectingValueConsumer collectedRows(tableSchema);
+
+ auto parser = CreateParserForArrow(&collectedRows);
+ auto data = Format("%v%v%v", MakeIntegerArrow({1, 2, 3}), GetEos(), MakeIntegerArrow({5, 6}));
+
+ parser->Read(data);
+ parser->Finish();
+
+ ASSERT_EQ(collectedRows.Size(), 5u);
+
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(0, "integer")), 1);
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(1, "integer")), 2);
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(2, "integer")), 3);
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(3, "integer")), 5);
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(4, "integer")), 6);
+}
+
+TEST(TArrowParserTest, Struct)
+{
+ auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{
+ TColumnSchema("struct", StructLogicalType({
+ {"bar", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"foo", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
+ })),
+ });
+
+ TCollectingValueConsumer collectedRows(tableSchema);
+
+ auto parser = CreateParserForArrow(&collectedRows);
+
+ parser->Read(MakeStructArrow({"one", "two"}, {1, 2}));
+ parser->Finish();
+
+ auto firstNode = GetComposite(collectedRows.GetRowValue(0, "struct"));
+ ASSERT_EQ(ConvertToYsonTextStringStable(firstNode), "[\"one\";1;]");
+
+ auto secondNode = GetComposite(collectedRows.GetRowValue(1, "struct"));
+ ASSERT_EQ(ConvertToYsonTextStringStable(secondNode), "[\"two\";2;]");
+}
+
+TEST(TArrowParserTest, DecimalVariousPrecisions)
+{
+ auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{
+ TColumnSchema("decimal128_10_3", DecimalLogicalType(10, 3)),
+ TColumnSchema("decimal128_35_3", DecimalLogicalType(35, 3)),
+ TColumnSchema("decimal128_38_3", DecimalLogicalType(38, 3)),
+ TColumnSchema("decimal256_10_3", DecimalLogicalType(10, 3)),
+ TColumnSchema("decimal256_35_3", DecimalLogicalType(35, 3)),
+ TColumnSchema("decimal256_38_3", DecimalLogicalType(38, 3)),
+ TColumnSchema("decimal256_76_3", DecimalLogicalType(76, 3)),
+ });
+
+ TCollectingValueConsumer collectedRows(tableSchema);
+
+ std::vector<TString> values = {"3.141", "0.000", "-2.718", "9999999.999"};
+
+ auto parser = CreateParserForArrow(&collectedRows);
+
+ parser->Read(MakeDecimalArrows(values, {{128, 10, 3}, {128, 35, 3}, {128, 38, 3}, {256, 10, 3}, {256, 35, 3}, {256, 38, 3}, {256, 76, 3}}));
+ parser->Finish();
+
+ auto collectStrings = [&] (TStringBuf columnName) {
+ std::vector<TString> result;
+ for (size_t index = 0; index < values.size(); ++index) {
+ result.push_back(collectedRows.GetRowValue(index, columnName).AsString());
+ }
+ return result;
+ };
+
+ std::vector<TString> expectedValues_10_3 =
+ {"\x80\x00\x00\x00\x00\x00\x0c\x45"s, "\x80\x00\x00\x00\x00\x00\x00\x00"s, "\x7f\xff\xff\xff\xff\xff\xf5\x62"s, "\x80\x00\x00\x02\x54\x0b\xe3\xff"s};
+ std::vector<TString> expectedValues_35_3 =
+ {
+ "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0c\x45"s, "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"s,
+ "\x7f\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xf5\x62"s, "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x54\x0b\xe3\xff"s,
+ };
+ std::vector<TString> expectedValues_38_3 =
+ {
+ "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0c\x45"s, "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"s,
+ "\x7f\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xf5\x62"s, "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x54\x0b\xe3\xff"s
+ };
+ std::vector<TString> expectedValues_76_3 =
+ {
+ "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0c\x45"s,
+ "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"s,
+ "\x7f\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xf5\x62"s,
+ "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x54\x0b\xe3\xff"s,
+ };
+ ASSERT_EQ(expectedValues_10_3, collectStrings("decimal128_10_3"));
+ ASSERT_EQ(expectedValues_35_3, collectStrings("decimal128_35_3"));
+ ASSERT_EQ(expectedValues_38_3, collectStrings("decimal128_38_3"));
+ ASSERT_EQ(expectedValues_10_3, collectStrings("decimal256_10_3"));
+ ASSERT_EQ(expectedValues_35_3, collectStrings("decimal256_35_3"));
+ ASSERT_EQ(expectedValues_38_3, collectStrings("decimal256_38_3"));
+ ASSERT_EQ(expectedValues_76_3, collectStrings("decimal256_76_3"));
+}
+
+TEST(TArrowParserTest, ListOfDecimals)
+{
+ auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{
+ TColumnSchema("list", ListLogicalType(DecimalLogicalType(10, 3))),
+ });
+
+ TCollectingValueConsumer collectedRows(tableSchema);
+
+ std::vector<TString> values = {"3.141", "0.000", "-2.718", "9999999.999"};
+
+ auto parser = CreateParserForArrow(&collectedRows);
+
+ parser->Read(MakeDecimalListArrow(values));
+ parser->Finish();
+
+ auto firstList = ConvertTo<std::vector<TString>>(GetComposite(collectedRows.GetRowValue(0, "list")));
+ std::vector<TString> secondList = {
+ "\x80\x00\x00\x00\x00\x00\x0c\x45"s, "\x80\x00\x00\x00\x00\x00\x00\x00"s,
+ "\x7f\xff\xff\xff\xff\xff\xf5\x62"s, "\x80\x00\x00\x02\x54\x0b\xe3\xff"s
+ };
+ ASSERT_EQ(firstList, secondList);
+}
+
+TEST(TArrowParserTest, BlockingInput)
+{
+ auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{
+ TColumnSchema("integer", EValueType::Int64)
+ });
+
+ TCollectingValueConsumer collectedRows(tableSchema);
+
+ auto parser = CreateParserForArrow(&collectedRows);
+
+ auto data = MakeIntegerArrow({1, 2, 3});
+ for (auto i : data) {
+ std::string s(1, i);
+ parser->Read(s);
+ }
+ parser->Finish();
+
+ ASSERT_EQ(collectedRows.Size(), 3u);
+
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(0, "integer")), 1);
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(1, "integer")), 2);
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(2, "integer")), 3);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT
diff --git a/yt/yt/library/formats/unittests/dsv_parser_ut.cpp b/yt/yt/library/formats/unittests/dsv_parser_ut.cpp
new file mode 100644
index 0000000000..f939585099
--- /dev/null
+++ b/yt/yt/library/formats/unittests/dsv_parser_ut.cpp
@@ -0,0 +1,365 @@
+#include <yt/yt/core/test_framework/framework.h>
+
+#include <yt/yt/core/test_framework/yson_consumer_mock.h>
+
+#include <yt/yt/library/formats/dsv_parser.h>
+
+namespace NYT::NFormats {
+namespace {
+
+using namespace NYson;
+
+using ::testing::InSequence;
+using ::testing::StrictMock;
+using ::testing::NiceMock;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TDsvParserTest, Simple)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("integer"));
+ EXPECT_CALL(Mock, OnStringScalar("42"));
+ EXPECT_CALL(Mock, OnKeyedItem("string"));
+ EXPECT_CALL(Mock, OnStringScalar("some"));
+ EXPECT_CALL(Mock, OnKeyedItem("double"));
+ EXPECT_CALL(Mock, OnStringScalar("10"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("foo"));
+ EXPECT_CALL(Mock, OnStringScalar("bar"));
+ EXPECT_CALL(Mock, OnKeyedItem("one"));
+ EXPECT_CALL(Mock, OnStringScalar("1"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input =
+ "integer=42\tstring=some\tdouble=10\n"
+ "foo=bar\tone=1\n";
+ ParseDsv(input, &Mock);
+}
+
+TEST(TDsvParserTest, EmptyInput)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ TString input = "";
+ ParseDsv(input, &Mock);
+}
+
+TEST(TDsvParserTest, BinaryData)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+
+ auto a = TString("\0\0\0\0", 4);
+ auto b = TString("\x80\0\x16\xC8", 4);
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("ntr"));
+ EXPECT_CALL(Mock, OnStringScalar(a));
+ EXPECT_CALL(Mock, OnKeyedItem("xrp"));
+ EXPECT_CALL(Mock, OnStringScalar(b));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input = "ntr=\\0\\0\\0\\0\txrp=\x80\\0\x16\xC8\n";
+ ParseDsv(input, &Mock);
+}
+
+TEST(TDsvParserTest, EmptyRecord)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input = "\n";
+ ParseDsv(input, &Mock);
+}
+
+TEST(TDsvParserTest, EmptyRecords)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnEndMap());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input = "\n\n";
+ ParseDsv(input, &Mock);
+}
+
+TEST(TDsvParserTest, EmptyKeysAndValues)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem(""));
+ EXPECT_CALL(Mock, OnStringScalar(""));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input = "=\n";
+ ParseDsv(input, &Mock);
+}
+
+TEST(TDsvParserTest, UnescapedZeroInInput)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+
+ TString input = TString("a\0b=v", 5);
+ EXPECT_ANY_THROW({
+ ParseDsv(input, &Mock);
+ });
+}
+
+TEST(TDsvParserTest, ZerosAreNotTerminals)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ TString key = TString("a\0b", 3);
+ TString value = TString("c\0d", 3);
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem(key));
+ EXPECT_CALL(Mock, OnStringScalar(value));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input = "a\\0b=c\\0d\n";
+ ParseDsv(input, &Mock);
+}
+
+TEST(TDsvParserTest, UnterminatedRecord)
+{
+ NiceMock<TMockYsonConsumer> Mock;
+
+ TString input = "a=b";
+ EXPECT_ANY_THROW({
+ ParseDsv(input, &Mock);
+ });
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TTskvParserTest: public ::testing::Test
+{
+public:
+ StrictMock<TMockYsonConsumer> Mock;
+ NiceMock<TMockYsonConsumer> ErrorMock;
+
+ TDsvFormatConfigPtr Config;
+
+ void SetUp() override {
+ Config = New<TDsvFormatConfig>();
+ Config->LinePrefix = "tskv";
+ }
+};
+
+TEST_F(TTskvParserTest, Simple)
+{
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnEndMap());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("id"));
+ EXPECT_CALL(Mock, OnStringScalar("1"));
+ EXPECT_CALL(Mock, OnKeyedItem("guid"));
+ EXPECT_CALL(Mock, OnStringScalar("100500"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("id"));
+ EXPECT_CALL(Mock, OnStringScalar("2"));
+ EXPECT_CALL(Mock, OnKeyedItem("guid"));
+ EXPECT_CALL(Mock, OnStringScalar("20025"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input =
+ "tskv\n"
+ "tskv\tid=1\tguid=100500\t\n"
+ "tskv\tid=2\tguid=20025\n";
+ ParseDsv(input, &Mock, Config);
+}
+
+TEST_F(TTskvParserTest, SimpleWithNewLine)
+{
+ InSequence dummy;
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("foo"));
+ EXPECT_CALL(Mock, OnStringScalar("bar"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input = "tskv\tfoo=bar\n";
+ ParseDsv(input, &Mock, Config);
+}
+
+TEST_F(TTskvParserTest, Escaping)
+{
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnEndMap());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("a=b"));
+ EXPECT_CALL(Mock, OnStringScalar("c=d or e=f"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key_with_\t,\r_and_\n"));
+ EXPECT_CALL(Mock, OnStringScalar("value_with_\t,\\_and_\r\n"));
+ EXPECT_CALL(Mock, OnKeyedItem("another_key"));
+ EXPECT_CALL(Mock, OnStringScalar("another_value"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input =
+ "t\\s\\kv\n"
+ "tskv" "\t" "a\\=b" "=" "c\\=d or e=f" "\n" // Note: unescaping is less strict
+ "tskv" "\t"
+ "key_with_\\t,\r_and_\\n"
+ "="
+ "value_with_\\t,\\\\_and_\\r\\n"
+ "\t"
+ "an\\other_\\key=anoth\\er_v\\alue"
+ "\n";
+
+ ParseDsv(input, &Mock, Config);
+}
+
+TEST_F(TTskvParserTest, DisabledEscaping)
+{
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnEndMap());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("a\\"));
+ EXPECT_CALL(Mock, OnStringScalar("b\\t=c\\=d or e=f\\0"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input =
+ "tskv\t\\x\\y\n"
+ "tskv" "\t" "a\\=b\\t" "=" "c\\=d or e=f\\0" "\n";
+
+ Config->EnableEscaping = false;
+
+ ParseDsv(input, &Mock, Config);
+}
+
+TEST_F(TTskvParserTest, AllowedUnescapedSymbols)
+{
+ Config->LinePrefix = "prefix_with_=";
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("just_key"));
+ EXPECT_CALL(Mock, OnStringScalar("value_with_="));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input = "prefix_with_=" "\t" "just_key" "=" "value_with_=" "\n";
+ ParseDsv(input, &Mock, Config);
+}
+
+TEST_F(TTskvParserTest, UndefinedValues)
+{
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnEndMap());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("a"));
+ EXPECT_CALL(Mock, OnStringScalar("b"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input =
+ "tskv" "\t" "tskv" "\t" "tskv" "\n"
+ "tskv\t" "some_key" "\t\t\t" "a=b" "\t" "another_key" "\n" // Note: consequent \t
+ "tskv\n";
+ ParseDsv(input, &Mock, Config);
+}
+
+
+TEST_F(TTskvParserTest, OnlyLinePrefix)
+{
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input = "tskv\n";
+ ParseDsv(input, &Mock, Config);
+}
+
+TEST_F(TTskvParserTest, OnlyLinePrefixAndTab)
+{
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input = "tskv\t\n";
+ ParseDsv(input, &Mock, Config);
+}
+
+TEST_F(TTskvParserTest, NotFinishedLinePrefix)
+{
+ TString input = "tsk";
+
+ EXPECT_ANY_THROW({
+ ParseDsv(input, &ErrorMock, Config);
+ });
+}
+
+TEST_F(TTskvParserTest, WrongLinePrefix)
+{
+ TString input =
+ "tskv\ta=b\n"
+ "tZkv\tc=d\te=f\n"
+ "tskv\ta=b\n";
+
+ EXPECT_ANY_THROW({
+ ParseDsv(input, &ErrorMock, Config);
+ });
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT::NDriver
diff --git a/yt/yt/library/formats/unittests/dsv_writer_ut.cpp b/yt/yt/library/formats/unittests/dsv_writer_ut.cpp
new file mode 100644
index 0000000000..842a669557
--- /dev/null
+++ b/yt/yt/library/formats/unittests/dsv_writer_ut.cpp
@@ -0,0 +1,316 @@
+#include <yt/yt/core/test_framework/framework.h>
+
+#include <yt/yt/library/formats/dsv_parser.h>
+#include <yt/yt/library/formats/dsv_writer.h>
+
+#include <yt/yt/client/table_client/name_table.h>
+#include <yt/yt/client/table_client/unversioned_row.h>
+
+#include <yt/yt/core/concurrency/async_stream.h>
+
+namespace NYT::NFormats {
+namespace {
+
+using namespace NYTree;
+using namespace NYson;
+using namespace NConcurrency;
+using namespace NTableClient;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TDsvWriterTest, StringScalar)
+{
+ TStringStream outputStream;
+ TDsvNodeConsumer consumer(&outputStream);
+
+ consumer.OnStringScalar("0-2-xb-1234");
+ EXPECT_EQ("0-2-xb-1234", outputStream.Str());
+}
+
+TEST(TDsvWriterTest, ListContainingDifferentTypes)
+{
+ TStringStream outputStream;
+ TDsvNodeConsumer consumer(&outputStream);
+
+ consumer.OnBeginList();
+ consumer.OnListItem();
+ consumer.OnInt64Scalar(100);
+ consumer.OnListItem();
+ consumer.OnStringScalar("foo");
+ consumer.OnListItem();
+ consumer.OnListItem();
+ consumer.OnBeginMap();
+ consumer.OnKeyedItem("a");
+ consumer.OnStringScalar("10");
+ consumer.OnKeyedItem("b");
+ consumer.OnStringScalar("c");
+ consumer.OnEndMap();
+ consumer.OnEndList();
+
+ TString output =
+ "100\n"
+ "foo\n"
+ "\n"
+ "a=10\tb=c\n";
+
+ EXPECT_EQ(output, outputStream.Str());
+}
+
+TEST(TDsvWriterTest, ListInsideList)
+{
+ TStringStream outputStream;
+ TDsvNodeConsumer consumer(&outputStream);
+
+ consumer.OnBeginList();
+ consumer.OnListItem();
+ EXPECT_ANY_THROW(consumer.OnBeginList());
+}
+
+TEST(TDsvWriterTest, ListInsideMap)
+{
+ TStringStream outputStream;
+ TDsvNodeConsumer consumer(&outputStream);
+
+ consumer.OnBeginMap();
+ consumer.OnKeyedItem("foo");
+ EXPECT_ANY_THROW(consumer.OnBeginList());
+}
+
+TEST(TDsvWriterTest, MapInsideMap)
+{
+ TStringStream outputStream;
+ TDsvNodeConsumer consumer(&outputStream);
+
+ consumer.OnBeginMap();
+ consumer.OnKeyedItem("foo");
+ EXPECT_ANY_THROW(consumer.OnBeginMap());
+}
+
+TEST(TDsvWriterTest, WithoutEsacping)
+{
+ auto config = New<TDsvFormatConfig>();
+ config->EnableEscaping = false;
+
+ TStringStream outputStream;
+ TDsvNodeConsumer consumer(&outputStream, config);
+
+ consumer.OnStringScalar("string_with_\t_\\_=_and_\n");
+
+ TString output = "string_with_\t_\\_=_and_\n";
+
+ EXPECT_EQ(output, outputStream.Str());
+}
+
+TEST(TDsvWriterTest, ListUsingOnRaw)
+{
+ TStringStream outputStream;
+ TDsvNodeConsumer consumer(&outputStream);
+
+ consumer.OnRaw("[10; 20; 30]", EYsonType::Node);
+ TString output =
+ "10\n"
+ "20\n"
+ "30\n";
+
+ EXPECT_EQ(output, outputStream.Str());
+}
+
+TEST(TDsvWriterTest, MapUsingOnRaw)
+{
+ TStringStream outputStream;
+ TDsvNodeConsumer consumer(&outputStream);
+
+ consumer.OnRaw("{a=b; c=d}", EYsonType::Node);
+ TString output = "a=b\tc=d";
+
+ EXPECT_EQ(output, outputStream.Str());
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TDsvWriterTest, SimpleTabular)
+{
+ auto nameTable = New<TNameTable>();
+ auto integerId = nameTable->RegisterName("integer");
+ auto stringId = nameTable->RegisterName("string");
+ auto doubleId = nameTable->RegisterName("double");
+ auto fooId = nameTable->RegisterName("foo");
+ auto oneId = nameTable->RegisterName("one");
+ auto tableIndexId = nameTable->RegisterName(TableIndexColumnName);
+ auto rowIndexId = nameTable->RegisterName(RowIndexColumnName);
+ auto rangeIndexId = nameTable->RegisterName(RangeIndexColumnName);
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedInt64Value(42, integerId));
+ row1.AddValue(MakeUnversionedStringValue("some", stringId));
+ row1.AddValue(MakeUnversionedDoubleValue(10., doubleId));
+ row1.AddValue(MakeUnversionedInt64Value(2, tableIndexId));
+ row1.AddValue(MakeUnversionedInt64Value(42, rowIndexId));
+ row1.AddValue(MakeUnversionedInt64Value(1, rangeIndexId));
+
+ TUnversionedRowBuilder row2;
+ row2.AddValue(MakeUnversionedStringValue("bar", fooId));
+ row2.AddValue(MakeUnversionedSentinelValue(EValueType::Null, integerId));
+ row2.AddValue(MakeUnversionedInt64Value(1, oneId));
+ row2.AddValue(MakeUnversionedInt64Value(2, tableIndexId));
+ row2.AddValue(MakeUnversionedInt64Value(43, rowIndexId));
+
+ std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow()};
+
+ TStringStream outputStream;
+ auto config = New<TDsvFormatConfig>();
+ config->EnableTableIndex = true;
+
+ auto controlAttributes = New<TControlAttributesConfig>();
+ controlAttributes->EnableTableIndex = true;
+ auto writer = CreateSchemalessWriterForDsv(
+ config,
+ nameTable,
+ CreateAsyncAdapter(static_cast<IOutputStream*>(&outputStream)),
+ false,
+ controlAttributes,
+ 0);
+
+ EXPECT_EQ(true, writer->Write(rows));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString output =
+ "integer=42\tstring=some\tdouble=10.\t@table_index=2\n"
+ "foo=bar\tone=1\t@table_index=2\n";
+ EXPECT_EQ(output, outputStream.Str());
+}
+
+TEST(TDsvWriterTest, AnyTabular)
+{
+ auto nameTable = New<TNameTable>();
+ auto anyId = nameTable->RegisterName("any");
+
+ TUnversionedRowBuilder row;
+ row.AddValue(MakeUnversionedAnyValue("[]", anyId));
+
+ std::vector<TUnversionedRow> rows = { row.GetRow() };
+
+ TStringStream outputStream;
+ auto controlAttributes = New<TControlAttributesConfig>();
+ auto writer = CreateSchemalessWriterForDsv(
+ New<TDsvFormatConfig>(),
+ nameTable,
+ CreateAsyncAdapter(static_cast<IOutputStream*>(&outputStream)),
+ false,
+ controlAttributes,
+ 0);
+
+ EXPECT_FALSE(writer->Write(rows));
+ EXPECT_ANY_THROW(writer->GetReadyEvent().Get().ThrowOnError());
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TTskvWriterTest, SimpleTabular)
+{
+ auto nameTable = New<TNameTable>();
+ auto id1 = nameTable->RegisterName("id");
+ auto id2 = nameTable->RegisterName("guid");
+ auto tableIndexId = nameTable->RegisterName(TableIndexColumnName);
+ auto rowIndexId = nameTable->RegisterName(RowIndexColumnName);
+ auto rangeIndexId = nameTable->RegisterName(RangeIndexColumnName);
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedInt64Value(2, tableIndexId));
+ row1.AddValue(MakeUnversionedInt64Value(42, rowIndexId));
+ row1.AddValue(MakeUnversionedInt64Value(1, rangeIndexId));
+
+
+ TUnversionedRowBuilder row2;
+ row2.AddValue(MakeUnversionedStringValue("1", id1));
+ row2.AddValue(MakeUnversionedInt64Value(100500, id2));
+
+ TUnversionedRowBuilder row3;
+ row3.AddValue(MakeUnversionedStringValue("2", id1));
+ row3.AddValue(MakeUnversionedInt64Value(20025, id2));
+
+ std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow(), row3.GetRow() };
+
+ TStringStream outputStream;
+ auto config = New<TDsvFormatConfig>();
+ config->LinePrefix = "tskv";
+
+ auto controlAttributes = New<TControlAttributesConfig>();
+ auto writer = CreateSchemalessWriterForDsv(
+ config,
+ nameTable,
+ CreateAsyncAdapter(static_cast<IOutputStream*>(&outputStream)),
+ false,
+ controlAttributes,
+ 0);
+
+ EXPECT_EQ(true, writer->Write(rows));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString output =
+ "tskv\n"
+ "tskv\tid=1\tguid=100500\n"
+ "tskv\tid=2\tguid=20025\n";
+
+ EXPECT_EQ(output, outputStream.Str());
+}
+
+TEST(TTskvWriterTest, Escaping)
+{
+ auto key1 = TString("\0 is escaped", 12);
+
+ auto nameTable = New<TNameTable>();
+ auto id1 = nameTable->RegisterName(key1);
+ auto id2 = nameTable->RegisterName("Escaping in in key: \r \t \n \\ =");
+
+ TUnversionedRowBuilder row;
+ row.AddValue(MakeUnversionedStringValue(key1, id1));
+ row.AddValue(MakeUnversionedStringValue("Escaping in value: \r \t \n \\ =", id2));
+
+ std::vector<TUnversionedRow> rows = { row.GetRow() };
+
+ TStringStream outputStream;
+ auto config = New<TDsvFormatConfig>();
+ config->LinePrefix = "tskv";
+
+ auto controlAttributes = New<TControlAttributesConfig>();
+ auto writer = CreateSchemalessWriterForDsv(
+ config,
+ nameTable,
+ CreateAsyncAdapter(static_cast<IOutputStream*>(&outputStream)),
+ false,
+ controlAttributes,
+ 0);
+
+ EXPECT_EQ(true, writer->Write(rows));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString output =
+ "tskv"
+ "\t"
+
+ "\\0 is escaped"
+ "="
+ "\\0 is escaped"
+
+ "\t"
+
+ "Escaping in in key: \\r \\t \\n \\\\ \\="
+ "="
+ "Escaping in value: \\r \\t \\n \\\\ =" // Note: = is not escaped
+
+ "\n";
+
+ EXPECT_EQ(output, outputStream.Str());
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT::NFormats
diff --git a/yt/yt/library/formats/unittests/format_writer_ut.h b/yt/yt/library/formats/unittests/format_writer_ut.h
new file mode 100644
index 0000000000..9da194d588
--- /dev/null
+++ b/yt/yt/library/formats/unittests/format_writer_ut.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <yt/yt/library/formats/format.h>
+
+#include <yt/yt/client/table_client/name_table.h>
+#include <yt/yt/client/table_client/unversioned_row.h>
+
+namespace NYT::NFormats {
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+
+void TestNameTableExpansion(ISchemalessFormatWriterPtr writer, NTableClient::TNameTablePtr nameTable)
+{
+ // We write five rows, on each iteration we double number of
+ // columns in the NameTable.
+ for (int iteration = 0; iteration < 5; ++iteration) {
+ NTableClient::TUnversionedOwningRowBuilder row;
+ for (int index = 0; index < (1 << iteration); ++index) {
+ auto key = "Column" + ToString(index);
+ auto value = "Value" + ToString(index);
+ int columnId = nameTable->GetIdOrRegisterName(key);
+ row.AddValue(NTableClient::MakeUnversionedStringValue(value, columnId));
+ }
+ auto completeRow = row.FinishRow();
+ EXPECT_EQ(true, writer->Write({completeRow.Get()}));
+ }
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT::NFormats
diff --git a/yt/yt/library/formats/unittests/protobuf_format_ut.cpp b/yt/yt/library/formats/unittests/protobuf_format_ut.cpp
new file mode 100644
index 0000000000..f3cb743833
--- /dev/null
+++ b/yt/yt/library/formats/unittests/protobuf_format_ut.cpp
@@ -0,0 +1,4659 @@
+#include "row_helpers.h"
+#include "yson_helpers.h"
+#include "yt/yt/client/table_client/public.h"
+
+#include <yt/yt/core/test_framework/framework.h>
+
+#include <yt/yt/core/concurrency/async_stream.h>
+#include <yt/yt/core/json/json_parser.h>
+#include <yt/yt/core/yson/string.h>
+#include <yt/yt/core/ytree/fluent.h>
+
+#include <yt/yt/client/formats/config.h>
+#include <yt/yt/client/formats/parser.h>
+#include <yt/yt/client/formats/format.h>
+#include <yt/yt/client/table_client/logical_type.h>
+#include <yt/yt/client/table_client/name_table.h>
+#include <yt/yt/client/table_client/value_consumer.h>
+#include <yt/yt/client/table_client/unversioned_row.h>
+
+#include <yt/yt/library/formats/format.h>
+#include <yt/yt/library/formats/lenval_control_constants.h>
+#include <yt/yt/library/formats/protobuf_writer.h>
+#include <yt/yt/library/formats/protobuf_parser.h>
+#include <yt/yt/library/formats/protobuf.h>
+
+#include <yt/yt/library/formats/unittests/protobuf_format_ut.pb.h>
+
+#include <yt/yt/library/named_value/named_value.h>
+
+#include <util/random/fast.h>
+
+#include <google/protobuf/text_format.h>
+#include <google/protobuf/descriptor.h>
+#include <google/protobuf/descriptor.pb.h>
+
+using namespace std::string_view_literals;
+
+
+namespace NYT {
+namespace {
+
+using namespace NYson;
+using namespace NYTree;
+using namespace NFormats;
+using namespace NTableClient;
+using namespace NConcurrency;
+using namespace NProtobufFormatTest;
+
+using ::google::protobuf::FileDescriptor;
+using NNamedValue::MakeRow;
+
+////////////////////////////////////////////////////////////////////////////////
+
+DEFINE_ENUM(EProtoFormatType,
+ (FileDescriptorLegacy)
+ (FileDescriptor)
+ (Structured)
+);
+
+////////////////////////////////////////////////////////////////////////////////
+
+#define EXPECT_NODES_EQUAL(a, b) \
+ EXPECT_TRUE(AreNodesEqual((a), (b))) \
+ << #a ": " << ConvertToYsonString((a), EYsonFormat::Text).ToString() \
+ << "\n\n" #b ": " << ConvertToYsonString((b), EYsonFormat::Text).ToString();
+
+////////////////////////////////////////////////////////////////////////////////
+
+TString ConvertToTextYson(const INodePtr& node)
+{
+ return ConvertToYsonString(node, EYsonFormat::Text).ToString();
+}
+
+// Hardcoded serialization of file descriptor used in old format description.
+TString FileDescriptorLegacy = "\x0a\xb6\x03\x0a\x29\x6a\x75\x6e\x6b\x2f\x65\x72\x6d\x6f\x6c\x6f\x76\x64\x2f\x74\x65\x73\x74\x2d\x70\x72\x6f\x74\x6f\x62"
+ "\x75\x66\x2f\x6d\x65\x73\x73\x61\x67\x65\x2e\x70\x72\x6f\x74\x6f\x22\x2d\x0a\x0f\x54\x45\x6d\x62\x65\x64\x65\x64\x4d\x65\x73\x73\x61\x67\x65\x12"
+ "\x0b\x0a\x03\x4b\x65\x79\x18\x01\x20\x01\x28\x09\x12\x0d\x0a\x05\x56\x61\x6c\x75\x65\x18\x02\x20\x01\x28\x09\x22\xb3\x02\x0a\x08\x54\x4d\x65\x73"
+ "\x73\x61\x67\x65\x12\x0e\x0a\x06\x44\x6f\x75\x62\x6c\x65\x18\x01\x20\x01\x28\x01\x12\x0d\x0a\x05\x46\x6c\x6f\x61\x74\x18\x02\x20\x01\x28\x02\x12"
+ "\x0d\x0a\x05\x49\x6e\x74\x36\x34\x18\x03\x20\x01\x28\x03\x12\x0e\x0a\x06\x55\x49\x6e\x74\x36\x34\x18\x04\x20\x01\x28\x04\x12\x0e\x0a\x06\x53\x49"
+ "\x6e\x74\x36\x34\x18\x05\x20\x01\x28\x12\x12\x0f\x0a\x07\x46\x69\x78\x65\x64\x36\x34\x18\x06\x20\x01\x28\x06\x12\x10\x0a\x08\x53\x46\x69\x78\x65"
+ "\x64\x36\x34\x18\x07\x20\x01\x28\x10\x12\x0d\x0a\x05\x49\x6e\x74\x33\x32\x18\x08\x20\x01\x28\x05\x12\x0e\x0a\x06\x55\x49\x6e\x74\x33\x32\x18\x09"
+ "\x20\x01\x28\x0d\x12\x0e\x0a\x06\x53\x49\x6e\x74\x33\x32\x18\x0a\x20\x01\x28\x11\x12\x0f\x0a\x07\x46\x69\x78\x65\x64\x33\x32\x18\x0b\x20\x01\x28"
+ "\x07\x12\x10\x0a\x08\x53\x46\x69\x78\x65\x64\x33\x32\x18\x0c\x20\x01\x28\x0f\x12\x0c\x0a\x04\x42\x6f\x6f\x6c\x18\x0d\x20\x01\x28\x08\x12\x0e\x0a"
+ "\x06\x53\x74\x72\x69\x6e\x67\x18\x0e\x20\x01\x28\x09\x12\x0d\x0a\x05\x42\x79\x74\x65\x73\x18\x0f\x20\x01\x28\x0c\x12\x14\x0a\x04\x45\x6e\x75\x6d"
+ "\x18\x10\x20\x01\x28\x0e\x32\x06\x2e\x45\x45\x6e\x75\x6d\x12\x21\x0a\x07\x4d\x65\x73\x73\x61\x67\x65\x18\x11\x20\x01\x28\x0b\x32\x10\x2e\x54\x45"
+ "\x6d\x62\x65\x64\x65\x64\x4d\x65\x73\x73\x61\x67\x65\x2a\x24\x0a\x05\x45\x45\x6e\x75\x6d\x12\x07\x0a\x03\x4f\x6e\x65\x10\x01\x12\x07\x0a\x03\x54"
+ "\x77\x6f\x10\x02\x12\x09\x0a\x05\x54\x68\x72\x65\x65\x10\x03";
+
+TString GenerateRandomLenvalString(TFastRng64& rng, ui32 size)
+{
+ TString result;
+ result.append(reinterpret_cast<const char*>(&size), sizeof(size));
+
+ size += sizeof(ui32);
+
+ while (result.size() < size) {
+ ui64 num = rng.GenRand();
+ result.append(reinterpret_cast<const char*>(&num), sizeof(num));
+ }
+ if (result.size() > size) {
+ result.resize(size);
+ }
+ return result;
+}
+
+static TProtobufFormatConfigPtr MakeProtobufFormatConfig(const std::vector<const ::google::protobuf::Descriptor*>& descriptorList)
+{
+ ::google::protobuf::FileDescriptorSet fileDescriptorSet;
+ THashSet<const ::google::protobuf::FileDescriptor*> files;
+
+ std::function<void(const ::google::protobuf::FileDescriptor*)> addFile;
+ addFile = [&] (const ::google::protobuf::FileDescriptor* fileDescriptor) {
+ if (!files.insert(fileDescriptor).second) {
+ return;
+ }
+
+ // N.B. We want to write dependencies in fileDescriptorSet in topological order
+ // so we traverse dependencies first and the add current fileDescriptor.
+ for (int i = 0; i < fileDescriptor->dependency_count(); ++i) {
+ addFile(fileDescriptor->dependency(i));
+ }
+ fileDescriptor->CopyTo(fileDescriptorSet.add_file());
+ };
+ std::vector<TString> typeNames;
+
+ for (const auto* descriptor : descriptorList) {
+ addFile(descriptor->file());
+ typeNames.push_back(descriptor->full_name());
+ }
+
+ auto formatConfigYsonString = BuildYsonStringFluently()
+ .BeginMap()
+ .Item("file_descriptor_set_text").Value(fileDescriptorSet.ShortDebugString())
+ .Item("type_names").Value(typeNames)
+ .EndMap();
+
+ return ConvertTo<TProtobufFormatConfigPtr>(formatConfigYsonString);
+}
+
+INodePtr ParseYson(TStringBuf data)
+{
+ return ConvertToNode(NYson::TYsonString(TString{data}));
+}
+
+TString LenvalBytes(const ::google::protobuf::Message& message)
+{
+ TStringStream out;
+ ui32 messageSize = static_cast<ui32>(message.ByteSizeLong());
+ out.Write(&messageSize, sizeof(messageSize));
+ if (!message.SerializeToArcadiaStream(&out)) {
+ THROW_ERROR_EXCEPTION("Can not serialize message");
+ }
+ return out.Str();
+}
+
+void EnsureTypesMatch(EValueType expected, EValueType actual)
+{
+ if (expected != actual) {
+ THROW_ERROR_EXCEPTION("Mismatching type: expected %Qlv, actual %Qlv",
+ expected,
+ actual);
+ }
+}
+
+double GetDouble(const TUnversionedValue& row)
+{
+ EnsureTypesMatch(EValueType::Double, row.Type);
+ return row.Data.Double;
+}
+
+template <typename TMessage>
+TCollectingValueConsumer ParseRows(
+ const TMessage& message,
+ const TProtobufFormatConfigPtr& config,
+ const TTableSchemaPtr& schema = New<TTableSchema>(),
+ int count = 1)
+{
+ TString lenvalBytes;
+ TStringOutput out(lenvalBytes);
+ auto messageSize = static_cast<ui32>(message.ByteSize());
+ for (int i = 0; i < count; ++i) {
+ out.Write(&messageSize, sizeof(messageSize));
+ if (!message.SerializeToArcadiaStream(&out)) {
+ THROW_ERROR_EXCEPTION("Failed to serialize message");
+ }
+ }
+
+ TCollectingValueConsumer rowCollector(schema);
+ auto parser = CreateParserForProtobuf(&rowCollector, config, 0);
+ parser->Read(lenvalBytes);
+ parser->Finish();
+ if (static_cast<ssize_t>(rowCollector.Size()) != count) {
+ THROW_ERROR_EXCEPTION("rowCollector has wrong size: expected %v, actual %v",
+ count,
+ rowCollector.Size());
+ }
+ return rowCollector;
+}
+
+template <typename TMessage>
+TCollectingValueConsumer ParseRows(
+ const TMessage& message,
+ const INodePtr& config,
+ const TTableSchemaPtr& schema = New<TTableSchema>(),
+ int count = 1)
+{
+ return ParseRows(message, ConvertTo<TProtobufFormatConfigPtr>(config->Attributes().ToMap()), schema, count);
+}
+
+
+void AddDependencies(
+ const FileDescriptor* fileDescriptor,
+ std::vector<const FileDescriptor*>& fileDescriptors,
+ THashSet<const FileDescriptor*>& fileDescriptorSet)
+{
+ if (fileDescriptorSet.contains(fileDescriptor)) {
+ return;
+ }
+ fileDescriptorSet.insert(fileDescriptor);
+ for (int i = 0; i < fileDescriptor->dependency_count(); ++i) {
+ AddDependencies(fileDescriptor->dependency(i), fileDescriptors, fileDescriptorSet);
+ }
+ fileDescriptors.push_back(fileDescriptor);
+}
+
+template <typename ... Ts>
+INodePtr CreateFileDescriptorConfig(std::optional<EComplexTypeMode> complexTypeMode = {})
+{
+ std::vector<const FileDescriptor*> fileDescriptors;
+ THashSet<const FileDescriptor*> fileDescriptorSet;
+ std::vector<const FileDescriptor*> originalFileDescriptors = {Ts::descriptor()->file()...};
+
+ for (auto d : originalFileDescriptors) {
+ AddDependencies(d, fileDescriptors, fileDescriptorSet);
+ }
+
+ ::google::protobuf::FileDescriptorSet fileDescriptorSetProto;
+ for (auto fileDescriptor : fileDescriptors) {
+ fileDescriptor->CopyTo(fileDescriptorSetProto.add_file());
+ }
+ TString fileDescriptorSetText;
+ ::google::protobuf::TextFormat::Printer().PrintToString(fileDescriptorSetProto, &fileDescriptorSetText);
+ std::vector<TString> typeNames = {Ts::descriptor()->full_name()...};
+ return BuildYsonNodeFluently()
+ .BeginAttributes()
+ .Item("file_descriptor_set_text").Value(fileDescriptorSetText)
+ .Item("type_names").Value(typeNames)
+ .OptionalItem("complex_type_mode", complexTypeMode)
+ .EndAttributes()
+ .Value("protobuf");
+}
+
+static const auto EnumerationsConfig = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("EEnum")
+ .BeginMap()
+ .Item("One").Value(1)
+ .Item("Two").Value(2)
+ .Item("Three").Value(3)
+ .Item("MinusFortyTwo").Value(-42)
+ .Item("MaxInt32").Value(std::numeric_limits<int>::max())
+ .Item("MinInt32").Value(std::numeric_limits<int>::min())
+ .EndMap()
+ .EndMap();
+
+INodePtr CreateAllFieldsConfig(EProtoFormatType protoFormatType)
+{
+ switch (protoFormatType) {
+ case EProtoFormatType::FileDescriptor:
+ return CreateFileDescriptorConfig<TMessage>();
+ case EProtoFormatType::FileDescriptorLegacy:
+ return BuildYsonNodeFluently()
+ .BeginAttributes()
+ .Item("file_descriptor_set")
+ .Value(FileDescriptorLegacy)
+ .Item("file_indices")
+ .BeginList()
+ .Item().Value(0)
+ .EndList()
+ .Item("message_indices")
+ .BeginList()
+ .Item().Value(1)
+ .EndList()
+ .EndAttributes()
+ .Value("protobuf");
+ case EProtoFormatType::Structured:
+ return BuildYsonNodeFluently()
+ .BeginAttributes()
+ .Item("enumerations").Value(EnumerationsConfig)
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("Double")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("double")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("Float")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("float")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("Int64")
+ .Item("field_number").Value(3)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("UInt64")
+ .Item("field_number").Value(4)
+ .Item("proto_type").Value("uint64")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("SInt64")
+ .Item("field_number").Value(5)
+ .Item("proto_type").Value("sint64")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("Fixed64")
+ .Item("field_number").Value(6)
+ .Item("proto_type").Value("fixed64")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("SFixed64")
+ .Item("field_number").Value(7)
+ .Item("proto_type").Value("sfixed64")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("Int32")
+ .Item("field_number").Value(8)
+ .Item("proto_type").Value("int32")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("UInt32")
+ .Item("field_number").Value(9)
+ .Item("proto_type").Value("uint32")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("SInt32")
+ .Item("field_number").Value(10)
+ .Item("proto_type").Value("sint32")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("Fixed32")
+ .Item("field_number").Value(11)
+ .Item("proto_type").Value("fixed32")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("SFixed32")
+ .Item("field_number").Value(12)
+ .Item("proto_type").Value("sfixed32")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("Bool")
+ .Item("field_number").Value(13)
+ .Item("proto_type").Value("bool")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("String")
+ .Item("field_number").Value(14)
+ .Item("proto_type").Value("string")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("Bytes")
+ .Item("field_number").Value(15)
+ .Item("proto_type").Value("bytes")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("Enum")
+ .Item("field_number").Value(16)
+ .Item("proto_type").Value("enum_string")
+ .Item("enumeration_name").Value("EEnum")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("Message")
+ .Item("field_number").Value(17)
+ .Item("proto_type").Value("message")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("AnyWithMap")
+ .Item("field_number").Value(18)
+ .Item("proto_type").Value("any")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("AnyWithInt64")
+ .Item("field_number").Value(19)
+ .Item("proto_type").Value("any")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("AnyWithString")
+ .Item("field_number").Value(20)
+ .Item("proto_type").Value("any")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("OtherColumns")
+ .Item("field_number").Value(21)
+ .Item("proto_type").Value("other_columns")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("MissingInt64")
+ .Item("field_number").Value(22)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndAttributes()
+ .Value("protobuf");
+ }
+ Y_ABORT();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TLenvalEntry
+{
+ TString RowData;
+ ui32 TableIndex;
+ ui64 TabletIndex;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TLenvalParser
+{
+public:
+ explicit TLenvalParser(IInputStream* input)
+ : Input_(input)
+ { }
+
+ explicit TLenvalParser(TStringBuf input)
+ : StreamHolder_(std::make_unique<TMemoryInput>(input))
+ , Input_(StreamHolder_.get())
+ { }
+
+ std::optional<TLenvalEntry> Next()
+ {
+ ui32 rowSize;
+ size_t read = Input_->Load(&rowSize, sizeof(rowSize));
+ if (read == 0) {
+ return std::nullopt;
+ } else if (read < sizeof(rowSize)) {
+ THROW_ERROR_EXCEPTION("corrupted lenval: can't read row length");
+ }
+ switch (rowSize) {
+ case LenvalTableIndexMarker: {
+ ui32 tableIndex;
+ read = Input_->Load(&tableIndex, sizeof(tableIndex));
+ if (read != sizeof(tableIndex)) {
+ THROW_ERROR_EXCEPTION("corrupted lenval: can't read table index");
+ }
+ CurrentTableIndex_ = tableIndex;
+ return Next();
+ }
+ case LenvalTabletIndexMarker: {
+ ui64 tabletIndex;
+ read = Input_->Load(&tabletIndex, sizeof(tabletIndex));
+ if (read != sizeof(tabletIndex)) {
+ THROW_ERROR_EXCEPTION("corrupted lenval: can't read tablet index");
+ }
+ CurrentTabletIndex_ = tabletIndex;
+ return Next();
+ }
+ case LenvalEndOfStream:
+ EndOfStream_ = true;
+ return std::nullopt;
+ case LenvalKeySwitch:
+ case LenvalRangeIndexMarker:
+ case LenvalRowIndexMarker:
+ THROW_ERROR_EXCEPTION("marker is unsupported");
+ default: {
+ TLenvalEntry result;
+ result.RowData.resize(rowSize);
+ result.TableIndex = CurrentTableIndex_;
+ result.TabletIndex = CurrentTabletIndex_;
+ Input_->Load(result.RowData.Detach(), rowSize);
+
+ return result;
+ }
+ }
+ }
+
+ bool IsEndOfStream() const
+ {
+ return EndOfStream_;
+ }
+
+private:
+ std::unique_ptr<IInputStream> StreamHolder_;
+ IInputStream* Input_;
+ ui32 CurrentTableIndex_ = 0;
+ ui64 CurrentTabletIndex_ = 0;
+ bool EndOfStream_ = false;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+
+TProtobufFormatConfigPtr ParseAndValidateConfig(const INodePtr& node, std::vector<TTableSchemaPtr> schemas = {})
+{
+ auto config = ConvertTo<TProtobufFormatConfigPtr>(node);
+ if (schemas.empty()) {
+ schemas.assign(config->Tables.size(), New<TTableSchema>());
+ }
+ New<TProtobufParserFormatDescription>()->Init(config, schemas);
+ New<TProtobufWriterFormatDescription>()->Init(config, schemas);
+ return config;
+}
+
+} // namespace
+
+INodePtr BuildEmbeddedConfig(EComplexTypeMode complexTypeMode, EProtoFormatType formatType) {
+ if (formatType == EProtoFormatType::FileDescriptor) {
+ return CreateFileDescriptorConfig<NYT::TEmbeddingMessage>(complexTypeMode);
+ }
+
+ auto config = BuildYsonNodeFluently()
+ .BeginAttributes()
+ .Item("tables").BeginList()
+ .Item().BeginMap()
+ .Item("columns").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("*")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("embedded_message")
+ .Item("fields").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("other_columns_field")
+ .Item("field_number").Value(15)
+ .Item("proto_type").Value("other_columns")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("embedded_num")
+ .Item("field_number").Value(10)
+ .Item("proto_type").Value("uint64")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("embedded_extra_field")
+ .Item("field_number").Value(11)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("variant")
+ .Item("proto_type").Value("oneof")
+ .Item("fields").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("str_variant")
+ .Item("field_number").Value(101)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("uint_variant")
+ .Item("field_number").Value(102)
+ .Item("proto_type").Value("uint64")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("*")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("embedded_message")
+ .Item("fields").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("embedded2_num")
+ .Item("field_number").Value(10)
+ .Item("proto_type").Value("uint64")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("embedded2_struct")
+ .Item("field_number").Value(17)
+ .Item("proto_type").Value("structured_message")
+ .Item("fields").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("float1")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("float")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("string1")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("embedded2_repeated")
+ .Item("field_number").Value(42)
+ .Item("proto_type").Value("string")
+ .Item("repeated").Value(true)
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("num")
+ .Item("field_number").Value(12)
+ .Item("proto_type").Value("uint64")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("extra_field")
+ .Item("field_number").Value(13)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .Item("complex_type_mode").Value(complexTypeMode)
+ .EndAttributes()
+ .Value("protobuf");
+ return config;
+}
+
+TTableSchemaPtr BuildEmbeddedSchema()
+{
+ auto schema = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"num", SimpleLogicalType(ESimpleLogicalValueType::Uint64)},
+ {"embedded_num", SimpleLogicalType(ESimpleLogicalValueType::Uint64)},
+ {"variant", VariantStructLogicalType({
+ {"str_variant", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"uint_variant", SimpleLogicalType(ESimpleLogicalValueType::Uint64)},
+ })},
+ {"extra_column", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Uint64))},
+ {"embedded2_num", SimpleLogicalType(ESimpleLogicalValueType::Uint64)},
+ {"embedded2_struct", StructLogicalType({
+ {"float1", SimpleLogicalType(ESimpleLogicalValueType::Float)},
+ {"string1", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ })},
+ {"embedded2_repeated", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
+ {"other_complex_field", StructLogicalType({
+ {"one", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ {"two", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ {"three", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ })},
+ {"extra_int", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
+
+ });
+ return schema;
+}
+
+TEST(TProtobufFormat, TestConfigParsingEmbedded) {
+ auto config = BuildEmbeddedConfig(EComplexTypeMode::Positional, EProtoFormatType::Structured);
+ auto schema = BuildEmbeddedSchema();
+
+ EXPECT_NO_THROW(
+ ParseAndValidateConfig(config->Attributes().ToMap(), {schema}));
+}
+
+TEST(TProtobufFormat, TestConfigParsing)
+{
+ // Empty config.
+ EXPECT_THROW_WITH_SUBSTRING(
+ ParseAndValidateConfig(ParseYson("{}")),
+ "one of \"tables\", \"file_descriptor_set\" and \"file_descriptor_set_text\" must be specified");
+
+ // Broken protobuf.
+ EXPECT_THROW_WITH_SUBSTRING(
+ ParseAndValidateConfig(ParseYson(R"({file_descriptor_set="dfgxx"; file_indices=[0]; message_indices=[0]})")),
+ "Error parsing \"file_descriptor_set\" in protobuf config");
+
+ EXPECT_NO_THROW(ParseAndValidateConfig(
+ CreateAllFieldsConfig(EProtoFormatType::Structured)->Attributes().ToMap()));
+
+ EXPECT_NO_THROW(ParseAndValidateConfig(
+ CreateAllFieldsConfig(EProtoFormatType::FileDescriptorLegacy)->Attributes().ToMap()));
+
+ EXPECT_NO_THROW(ParseAndValidateConfig(
+ CreateAllFieldsConfig(EProtoFormatType::FileDescriptor)->Attributes().ToMap()));
+
+ auto embeddedInsideNonembeddedConfig = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables").BeginList()
+ .Item().BeginMap()
+ .Item("columns").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("embedded_message1")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("embedded_message")
+ .Item("fields").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("field1")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("structured_message")
+ .Item("fields").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("embedded_message2")
+ .Item("field_number").Value(3)
+ .Item("proto_type").Value("embedded_message")
+ .Item("fields").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("field2")
+ .Item("field_number").Value(4)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ auto schemaForEmbedded = New<TTableSchema>(std::vector{
+ TColumnSchema("field1", StructLogicalType({
+ {"embedded_message2", StructLogicalType({
+ {"field2", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ })},
+ }))
+ });
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ ParseAndValidateConfig(embeddedInsideNonembeddedConfig, {schemaForEmbedded}),
+ "embedded_message inside of structured_message is not allowed");
+
+ auto repeatedEmbeddedConfig = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("*")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("embedded_message")
+ .Item("repeated").Value(true)
+ .Item("fields").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("field1")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("uint64")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ ParseAndValidateConfig(repeatedEmbeddedConfig),
+ R"(type "embedded_message" can not be repeated)");
+
+ auto multipleOtherColumnsConfig = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("Other1")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("other_columns")
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("Other2")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("other_columns")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ ParseAndValidateConfig(multipleOtherColumnsConfig),
+ "Multiple \"other_columns\" in protobuf config are not allowed");
+
+ auto duplicateColumnNamesConfig = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("SomeColumn")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("SomeColumn")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ ParseAndValidateConfig(duplicateColumnNamesConfig),
+ "Multiple fields with same column name \"SomeColumn\" are forbidden in protobuf format");
+
+ auto anyCorrespondsToStruct = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("SomeColumn")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("any")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ auto schema = New<TTableSchema>(std::vector{
+ TColumnSchema("SomeColumn", StructLogicalType({})),
+ });
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ ParseAndValidateConfig(anyCorrespondsToStruct, {schema}),
+ "Table schema and protobuf format config mismatch");
+
+ auto configWithBytes = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("SomeColumn")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("bytes")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ auto schemaWithUtf8 = New<TTableSchema>(std::vector{
+ TColumnSchema("SomeColumn", SimpleLogicalType(ESimpleLogicalValueType::Utf8)),
+ });
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ ParseAndValidateConfig(configWithBytes, {schemaWithUtf8}),
+ "mismatch: expected logical type to be one of");
+
+ auto configWithPackedNonRepeated = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("SomeColumn")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("int64")
+ .Item("packed").Value(true)
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ auto schemaWithInt64List = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"SomeColumn", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ });
+ EXPECT_THROW_WITH_SUBSTRING(
+ ParseAndValidateConfig(configWithPackedNonRepeated, {schemaWithInt64List}),
+ "Field \"SomeColumn\" is marked \"packed\" but is not marked \"repeated\"");
+
+ auto configWithPackedRepeatedString = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("SomeColumn")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("string")
+ .Item("packed").Value(true)
+ .Item("repeated").Value(true)
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ auto schemaWithStringList = New<TTableSchema>(std::vector{
+ TColumnSchema("SomeColumn", ListLogicalType(
+ SimpleLogicalType(ESimpleLogicalValueType::String)))
+ });
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ ParseAndValidateConfig(configWithPackedRepeatedString, {schemaWithStringList}),
+ "packed protobuf field must have primitive numeric type, got \"string\"");
+
+ auto configWithMissingFieldNumber = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("SomeColumn")
+ .Item("proto_type").Value("string")
+ .Item("repeated").Value(true)
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ ParseAndValidateConfig(configWithMissingFieldNumber, {schemaWithStringList}),
+ "\"field_number\" is required");
+}
+
+TEST(TProtobufFormat, TestParseBigZigZag)
+{
+ constexpr i32 value = Min<i32>();
+ TMessage message;
+ message.set_int32_field(value);
+ auto config = ConvertTo<TProtobufFormatConfigPtr>(CreateAllFieldsConfig(EProtoFormatType::Structured)->Attributes().ToMap());
+ auto rowCollector = ParseRows(message, config);
+ EXPECT_EQ(GetInt64(rowCollector.GetRowValue(0, "Int32")), value);
+}
+
+TEST(TProtobufFormat, TestParseEnumerationString)
+{
+ auto config = ConvertTo<TProtobufFormatConfigPtr>(CreateAllFieldsConfig(EProtoFormatType::Structured)->Attributes().ToMap());
+ {
+ TMessage message;
+ message.set_enum_field(EEnum::One);
+ auto rowCollector = ParseRows(message, config);
+ EXPECT_EQ(GetString(rowCollector.GetRowValue(0, "Enum")), "One");
+ }
+ {
+ TMessage message;
+ message.set_enum_field(EEnum::Two);
+ auto rowCollector = ParseRows(message, config);
+ EXPECT_EQ(GetString(rowCollector.GetRowValue(0, "Enum")), "Two");
+ }
+ {
+ TMessage message;
+ message.set_enum_field(EEnum::Three);
+ auto rowCollector = ParseRows(message, config);
+ EXPECT_EQ(GetString(rowCollector.GetRowValue(0, "Enum")), "Three");
+ }
+ {
+ TMessage message;
+ message.set_enum_field(EEnum::MinusFortyTwo);
+ auto rowCollector = ParseRows(message, config);
+ EXPECT_EQ(GetString(rowCollector.GetRowValue(0, "Enum")), "MinusFortyTwo");
+ }
+}
+
+TEST(TProtobufFormat, TestParseWrongEnumeration)
+{
+ auto config = ConvertTo<TProtobufFormatConfigPtr>(CreateAllFieldsConfig(EProtoFormatType::Structured)->Attributes().ToMap());
+ TMessage message;
+ auto enumTag = TMessage::descriptor()->FindFieldByName("enum_field")->number();
+ message.mutable_unknown_fields()->AddVarint(enumTag, 30);
+ EXPECT_ANY_THROW(ParseRows(message, config));
+}
+
+TEST(TProtobufFormat, TestParseEnumerationInt)
+{
+ TCollectingValueConsumer rowCollector;
+
+ auto config = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("Enum")
+ .Item("field_number").Value(16)
+ .Item("proto_type").Value("enum_int")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ auto parser = CreateParserForProtobuf(&rowCollector, ConvertTo<TProtobufFormatConfigPtr>(config), 0);
+
+ {
+ TMessage message;
+ message.set_enum_field(EEnum::One);
+ parser->Read(LenvalBytes(message));
+ }
+ {
+ TMessage message;
+ message.set_enum_field(EEnum::Two);
+ parser->Read(LenvalBytes(message));
+ }
+ {
+ TMessage message;
+ message.set_enum_field(EEnum::Three);
+ parser->Read(LenvalBytes(message));
+ }
+ {
+ TMessage message;
+ message.set_enum_field(EEnum::MinusFortyTwo);
+ parser->Read(LenvalBytes(message));
+ }
+ {
+ TMessage message;
+ auto enumTag = TMessage::descriptor()->FindFieldByName("enum_field")->number();
+ message.mutable_unknown_fields()->AddVarint(enumTag, 100500);
+ parser->Read(LenvalBytes(message));
+ }
+
+ parser->Finish();
+
+ EXPECT_EQ(GetInt64(rowCollector.GetRowValue(0, "Enum")), 1);
+ EXPECT_EQ(GetInt64(rowCollector.GetRowValue(1, "Enum")), 2);
+ EXPECT_EQ(GetInt64(rowCollector.GetRowValue(2, "Enum")), 3);
+ EXPECT_EQ(GetInt64(rowCollector.GetRowValue(3, "Enum")), -42);
+ EXPECT_EQ(GetInt64(rowCollector.GetRowValue(4, "Enum")), 100500);
+}
+
+TEST(TProtobufFormat, TestParseRandomGarbage)
+{
+ // Check that we never crash.
+
+ TFastRng64 rng(42);
+ for (int i = 0; i != 1000; ++i) {
+ auto bytes = GenerateRandomLenvalString(rng, 8);
+
+ TCollectingValueConsumer rowCollector;
+ auto parser = CreateParserForProtobuf(
+ &rowCollector,
+ ConvertTo<TProtobufFormatConfigPtr>(CreateAllFieldsConfig(EProtoFormatType::Structured)->Attributes().ToMap()),
+ 0);
+ try {
+ parser->Read(bytes);
+ parser->Finish();
+ } catch (...) {
+ }
+ }
+}
+
+TEST(TProtobufFormat, TestParseZeroColumns)
+{
+ auto config = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ TCollectingValueConsumer rowCollector;
+ auto parser = CreateParserForProtobuf(
+ &rowCollector,
+ ConvertTo<TProtobufFormatConfigPtr>(config),
+ 0);
+
+ // Empty lenval values.
+ parser->Read("\0\0\0\0"sv);
+ parser->Read("\0\0\0\0"sv);
+
+ parser->Finish();
+
+ ASSERT_EQ(static_cast<ssize_t>(rowCollector.Size()), 2);
+ EXPECT_EQ(static_cast<int>(rowCollector.GetRow(0).GetCount()), 0);
+ EXPECT_EQ(static_cast<int>(rowCollector.GetRow(1).GetCount()), 0);
+}
+
+TEST(TProtobufFormat, TestWriteEnumerationString)
+{
+ auto config = CreateAllFieldsConfig(EProtoFormatType::Structured);
+
+ auto nameTable = New<TNameTable>();
+
+ TString result;
+ TStringOutput resultStream(result);
+ auto writer = CreateWriterForProtobuf(
+ config->Attributes(),
+ {New<TTableSchema>()},
+ nameTable,
+ CreateAsyncAdapter(&resultStream),
+ true,
+ New<TControlAttributesConfig>(),
+ 0);
+
+ EXPECT_EQ(true, writer->Write({
+ MakeRow(nameTable, {
+ {"Enum", "MinusFortyTwo"}
+ }).Get()
+ }));
+ EXPECT_EQ(true, writer->Write({
+ MakeRow(nameTable, {
+ {"Enum", "Three"},
+ }).Get()
+ }));
+
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TStringInput si(result);
+ TLenvalParser parser(&si);
+ {
+ auto row = parser.Next();
+ ASSERT_TRUE(row);
+ NYT::TMessage message;
+ ASSERT_TRUE(message.ParseFromString(row->RowData));
+ ASSERT_EQ(message.enum_field(), NYT::EEnum::MinusFortyTwo);
+ }
+ {
+ auto row = parser.Next();
+ ASSERT_TRUE(row);
+ NYT::TMessage message;
+ ASSERT_TRUE(message.ParseFromString(row->RowData));
+ ASSERT_EQ(message.enum_field(), NYT::EEnum::Three);
+ }
+ {
+ auto row = parser.Next();
+ ASSERT_FALSE(row);
+ }
+}
+
+TEST(TProtobufFormat, TestWriteEnumerationInt)
+{
+ auto config = BuildYsonNodeFluently()
+ .BeginAttributes()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("Enum")
+ .Item("field_number").Value(16)
+ .Item("proto_type").Value("enum_int")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndAttributes()
+ .Value("protobuf");
+
+ auto nameTable = New<TNameTable>();
+
+ auto writeAndParseRow = [&] (TUnversionedRow row, TMessage* message) {
+ TString result;
+ TStringOutput resultStream(result);
+ auto writer = CreateWriterForProtobuf(
+ config->Attributes(),
+ {New<TTableSchema>()},
+ nameTable,
+ CreateAsyncAdapter(&resultStream),
+ true,
+ New<TControlAttributesConfig>(),
+ 0);
+ Y_UNUSED(writer->Write({row}));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TStringInput si(result);
+ TLenvalParser parser(&si);
+ auto protoRow = parser.Next();
+ ASSERT_TRUE(protoRow);
+
+ ASSERT_TRUE(message->ParseFromString(protoRow->RowData));
+
+ auto nextProtoRow = parser.Next();
+ ASSERT_FALSE(nextProtoRow);
+ };
+
+ {
+ TMessage message;
+ writeAndParseRow(
+ MakeRow(nameTable, {
+ {"Enum", -42},
+ }).Get(),
+ &message);
+ ASSERT_EQ(message.enum_field(), EEnum::MinusFortyTwo);
+ }
+ {
+ TMessage message;
+ writeAndParseRow(
+ MakeRow(nameTable, {
+ {"Enum", static_cast<ui64>(std::numeric_limits<i32>::max())},
+ }).Get(),
+ &message);
+ ASSERT_EQ(message.enum_field(), EEnum::MaxInt32);
+ }
+ {
+ TMessage message;
+ writeAndParseRow(
+ MakeRow(nameTable, {
+ {"Enum", std::numeric_limits<i32>::max()},
+ }).Get(),
+ &message);
+ ASSERT_EQ(message.enum_field(), EEnum::MaxInt32);
+ }
+ {
+ TMessage message;
+ writeAndParseRow(
+ MakeRow(nameTable, {
+ {"Enum", std::numeric_limits<i32>::min()},
+ }).Get(),
+ &message);
+ ASSERT_EQ(message.enum_field(), EEnum::MinInt32);
+ }
+
+ TMessage message;
+ ASSERT_THROW(
+ writeAndParseRow(
+ MakeRow(nameTable, {
+ {"Enum", static_cast<i64>(std::numeric_limits<i32>::max()) + 1},
+ }).Get(),
+ &message),
+ TErrorException);
+
+ ASSERT_THROW(
+ writeAndParseRow(
+ MakeRow(nameTable, {
+ {"Enum", static_cast<i64>(std::numeric_limits<i32>::min()) - 1},
+ }).Get(),
+ &message),
+ TErrorException);
+
+ ASSERT_THROW(
+ writeAndParseRow(
+ MakeRow(nameTable, {
+ {"Enum", static_cast<ui64>(std::numeric_limits<i32>::max()) + 1},
+ }).Get(),
+ &message),
+ TErrorException);
+}
+
+
+TEST(TProtobufFormat, TestWriteZeroColumns)
+{
+ auto config = BuildYsonNodeFluently()
+ .BeginAttributes()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndAttributes()
+ .Value("protobuf");
+
+ auto nameTable = New<TNameTable>();
+
+ TString result;
+ TStringOutput resultStream(result);
+ auto writer = CreateWriterForProtobuf(
+ config->Attributes(),
+ {New<TTableSchema>()},
+ nameTable,
+ CreateAsyncAdapter(&resultStream),
+ true,
+ New<TControlAttributesConfig>(),
+ 0);
+
+ EXPECT_EQ(true, writer->Write({
+ MakeRow(nameTable, {
+ {"Int64", -1},
+ {"String", "this_is_string"},
+ }).Get()
+ }));
+ EXPECT_EQ(true, writer->Write({MakeRow(nameTable, { }).Get()}));
+
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ ASSERT_EQ(result, "\0\0\0\0\0\0\0\0"sv);
+}
+
+TEST(TProtobufFormat, TestTabletIndex)
+{
+ auto config = ConvertTo<TProtobufFormatConfigPtr>(BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("int64_field")
+ .Item("field_number").Value(3)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap());
+
+ auto nameTable = New<TNameTable>();
+
+ TString result;
+ TStringOutput resultStream(result);
+ auto controlAttributesConfig = New<TControlAttributesConfig>();
+ controlAttributesConfig->EnableTabletIndex = true;
+
+ auto writer = CreateWriterForProtobuf(
+ config,
+ {New<TTableSchema>()},
+ nameTable,
+ CreateAsyncAdapter(&resultStream),
+ true,
+ controlAttributesConfig,
+ 0);
+
+ EXPECT_EQ(true, writer->Write({
+ MakeRow(nameTable, {
+ {TString(TabletIndexColumnName), 1LL << 50},
+ {"int64_field", -2345},
+ }).Get(),
+ MakeRow(nameTable, {
+ {TString(TabletIndexColumnName), 12},
+ {"int64_field", 2345},
+ }).Get(),
+ }));
+
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TStringInput si(result);
+ TLenvalParser parser(&si);
+ {
+ auto row = parser.Next();
+ ASSERT_TRUE(row);
+ ASSERT_EQ(row->TabletIndex, 1ULL << 50);
+ NYT::TMessage message;
+ ASSERT_TRUE(message.ParseFromString(row->RowData));
+ ASSERT_EQ(message.int64_field(), -2345);
+ }
+ {
+ auto row = parser.Next();
+ ASSERT_TRUE(row);
+ ASSERT_EQ(static_cast<int>(row->TabletIndex), 12);
+ NYT::TMessage message;
+ ASSERT_TRUE(message.ParseFromString(row->RowData));
+ ASSERT_EQ(message.int64_field(), 2345);
+ }
+ {
+ auto row = parser.Next();
+ ASSERT_FALSE(row);
+ }
+}
+
+TEST(TProtobufFormat, TestContext)
+{
+ auto config = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ TCollectingValueConsumer rowCollector;
+ auto parser = CreateParserForProtobuf(
+ &rowCollector,
+ ConvertTo<TProtobufFormatConfigPtr>(config),
+ 0);
+
+ TString context;
+ try {
+ TMessage message;
+ message.set_string_field("PYSHCH-PYSHCH");
+ parser->Read(LenvalBytes(message));
+ parser->Finish();
+ GTEST_FATAL_FAILURE_("expected to throw");
+ } catch (const NYT::TErrorException& e) {
+ context = *e.Error().Attributes().Find<TString>("context");
+ }
+ ASSERT_NE(context.find("PYSHCH-PYSHCH"), TString::npos);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TTableSchemaPtr CreateSchemaWithStructuredMessage()
+{
+ auto keyValueStruct = StructLogicalType({
+ {"key", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
+ {"value", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
+ });
+
+ return New<TTableSchema>(std::vector<TColumnSchema>{
+ {"first", StructLogicalType({
+ {"field_missing_from_proto1", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int32))},
+ {"enum_field", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"int64_field", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
+ {"repeated_int64_field", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ {"another_repeated_int64_field", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ {"message_field", keyValueStruct},
+ {"repeated_message_field", ListLogicalType(keyValueStruct)},
+ {"any_int64_field", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ {"any_map_field", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Any))},
+ {"optional_int64_field", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ {"repeated_optional_any_field", ListLogicalType(OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Any)))},
+ {"packed_repeated_enum_field", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
+ {"optional_repeated_bool_field", OptionalLogicalType(ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Boolean)))},
+ {"oneof_field", VariantStructLogicalType({
+ {"oneof_string_field_1", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"oneof_string_field", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"oneof_message_field", keyValueStruct},
+ })},
+ {"optional_oneof_field", OptionalLogicalType(VariantStructLogicalType({
+ {"oneof_string_field_1", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"oneof_string_field", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"oneof_message_field", keyValueStruct},
+ }))},
+ {"map_field", DictLogicalType(
+ SimpleLogicalType(ESimpleLogicalValueType::Int64),
+ OptionalLogicalType(keyValueStruct))
+ },
+ {"field_missing_from_proto2", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int32))},
+ })},
+ {"repeated_int64_field", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ {"another_repeated_int64_field", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ {"repeated_message_field", ListLogicalType(keyValueStruct)},
+ {"second", StructLogicalType({
+ {"one", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ {"two", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ {"three", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ })},
+ {"any_field", SimpleLogicalType(ESimpleLogicalValueType::Any)},
+
+ {"int64_field", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
+ {"uint64_field", SimpleLogicalType(ESimpleLogicalValueType::Uint64)},
+ {"int32_field", SimpleLogicalType(ESimpleLogicalValueType::Int32)},
+ {"uint32_field", SimpleLogicalType(ESimpleLogicalValueType::Uint32)},
+
+ {"enum_int_field", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
+ {"enum_string_string_field", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"enum_string_int64_field", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+
+ {"repeated_optional_any_field", ListLogicalType(OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Any)))},
+
+ {"other_complex_field", StructLogicalType({
+ {"one", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ {"two", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ {"three", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ })},
+
+ {"utf8_field", SimpleLogicalType(ESimpleLogicalValueType::Utf8)},
+
+ {"packed_repeated_int64_field", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+
+ {"optional_repeated_int64_field", OptionalLogicalType(ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64)))},
+
+ {"oneof_field", VariantStructLogicalType({
+ {"oneof_string_field_1", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"oneof_string_field", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"oneof_message_field", keyValueStruct},
+ })},
+
+ {"optional_oneof_field", OptionalLogicalType(VariantStructLogicalType({
+ {"oneof_string_field_1", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"oneof_string_field", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"oneof_message_field", keyValueStruct},
+ }))},
+
+ {"map_field", DictLogicalType(
+ SimpleLogicalType(ESimpleLogicalValueType::Int64),
+ OptionalLogicalType(keyValueStruct))
+ },
+ });
+}
+
+INodePtr CreateConfigWithStructuredMessage(EComplexTypeMode complexTypeMode, EProtoFormatType formatType)
+{
+ if (formatType == EProtoFormatType::FileDescriptor) {
+ return CreateFileDescriptorConfig<TMessageWithStructuredEmbedded>(complexTypeMode);
+ }
+ YT_VERIFY(formatType == EProtoFormatType::Structured);
+
+ auto buildOneofConfig = [] (TString prefix, int fieldNumberOffset) {
+ return BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("name").Value(prefix + "oneof_field")
+ .Item("proto_type").Value("oneof")
+ .Item("fields").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value(prefix + "oneof_string_field_1")
+ .Item("field_number").Value(101 + fieldNumberOffset)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value(prefix + "oneof_string_field")
+ .Item("field_number").Value(102 + fieldNumberOffset)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value(prefix + "oneof_message_field")
+ .Item("field_number").Value(1000 + fieldNumberOffset)
+ .Item("proto_type").Value("structured_message")
+ .Item("fields").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("key")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("value")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+ };
+ auto oneofConfig = buildOneofConfig("", 0);
+ auto optionalOneofConfig = buildOneofConfig("optional_", 1000);
+
+ auto keyValueFields = BuildYsonStringFluently()
+ .BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("key")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("value")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .EndList();
+
+ return BuildYsonNodeFluently()
+ .BeginAttributes()
+ .Item("enumerations").Value(EnumerationsConfig)
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("first")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("structured_message")
+ .Item("fields")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("int64_field")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("enum_field")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("enum_string")
+ .Item("enumeration_name").Value("EEnum")
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("packed_repeated_enum_field")
+ .Item("field_number").Value(11)
+ .Item("proto_type").Value("enum_string")
+ .Item("enumeration_name").Value("EEnum")
+ .Item("repeated").Value(true)
+ .Item("packed").Value(true)
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("message_field")
+ .Item("field_number").Value(4)
+ .Item("proto_type").Value("structured_message")
+ .Item("fields").Value(keyValueFields)
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("repeated_int64_field")
+ .Item("field_number").Value(3)
+ .Item("proto_type").Value("int64")
+ .Item("repeated").Value(true)
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("another_repeated_int64_field")
+ .Item("field_number").Value(9)
+ .Item("proto_type").Value("int64")
+ .Item("repeated").Value(true)
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("repeated_message_field")
+ .Item("field_number").Value(5)
+ .Item("proto_type").Value("structured_message")
+ .Item("repeated").Value(true)
+ .Item("fields").Value(keyValueFields)
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("any_int64_field")
+ .Item("field_number").Value(6)
+ .Item("proto_type").Value("any")
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("any_map_field")
+ .Item("field_number").Value(7)
+ .Item("proto_type").Value("any")
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("optional_int64_field")
+ .Item("field_number").Value(8)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("repeated_optional_any_field")
+ .Item("field_number").Value(10)
+ .Item("proto_type").Value("any")
+ .Item("repeated").Value(true)
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("optional_repeated_bool_field")
+ .Item("field_number").Value(12)
+ .Item("proto_type").Value("bool")
+ .Item("repeated").Value(true)
+ .EndMap()
+ .Item().Value(oneofConfig)
+ .Item().Value(optionalOneofConfig)
+ .Item()
+ .BeginMap()
+ .Item("name").Value("map_field")
+ .Item("field_number").Value(13)
+ .Item("proto_type").Value("structured_message")
+ .Item("repeated").Value(true)
+ .Item("fields")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("key")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("value")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("structured_message")
+ .Item("fields").Value(keyValueFields)
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("second")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("structured_message")
+ .Item("fields")
+ .BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("one")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("two")
+ .Item("field_number").Value(500000000)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("three")
+ .Item("field_number").Value(100500)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("repeated_message_field")
+ .Item("field_number").Value(3)
+ .Item("proto_type").Value("structured_message")
+ .Item("repeated").Value(true)
+ .Item("fields")
+ .BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("key")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("value")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("repeated_int64_field")
+ .Item("field_number").Value(4)
+ .Item("proto_type").Value("int64")
+ .Item("repeated").Value(true)
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("another_repeated_int64_field")
+ .Item("field_number").Value(13)
+ .Item("proto_type").Value("int64")
+ .Item("repeated").Value(true)
+ .EndMap()
+ .Item()
+ .BeginMap()
+ // In schema it is of type "any".
+ .Item("name").Value("any_field")
+ .Item("field_number").Value(5)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ // The next fields are for type casting testing
+ .Item()
+ .BeginMap()
+ // In schema it is of type "int64".
+ .Item("name").Value("int64_field")
+ .Item("field_number").Value(6)
+ .Item("proto_type").Value("int32")
+ .EndMap()
+ .Item()
+ .BeginMap()
+ // In schema it is of type "uint64".
+ .Item("name").Value("uint64_field")
+ .Item("field_number").Value(7)
+ .Item("proto_type").Value("uint32")
+ .EndMap()
+ .Item()
+ .BeginMap()
+ // In schema it is of type "int32".
+ .Item("name").Value("int32_field")
+ .Item("field_number").Value(8)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .Item()
+ .BeginMap()
+ // In schema it is of type "uint32".
+ .Item("name").Value("uint32_field")
+ .Item("field_number").Value(9)
+ .Item("proto_type").Value("uint64")
+ .EndMap()
+
+ // Enums.
+ .Item()
+ .BeginMap()
+ .Item("name").Value("enum_int_field")
+ .Item("field_number").Value(10)
+ .Item("proto_type").Value("enum_int")
+ .Item("enumeration_name").Value("EEnum")
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("enum_string_string_field")
+ .Item("field_number").Value(11)
+ .Item("proto_type").Value("enum_string")
+ .Item("enumeration_name").Value("EEnum")
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("enum_string_int64_field")
+ .Item("field_number").Value(12)
+ .Item("proto_type").Value("enum_string")
+ .Item("enumeration_name").Value("EEnum")
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("utf8_field")
+ .Item("field_number").Value(16)
+ .Item("proto_type").Value("string")
+ .EndMap()
+
+ // list<optional<any>>.
+ .Item()
+ .BeginMap()
+ .Item("name").Value("repeated_optional_any_field")
+ .Item("field_number").Value(14)
+ .Item("proto_type").Value("any")
+ .Item("repeated").Value(true)
+ .EndMap()
+
+ // Other columns.
+ .Item()
+ .BeginMap()
+ .Item("name").Value("other_columns_field")
+ .Item("field_number").Value(15)
+ .Item("proto_type").Value("other_columns")
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("packed_repeated_int64_field")
+ .Item("field_number").Value(17)
+ .Item("proto_type").Value("int64")
+ .Item("repeated").Value(true)
+ .Item("packed").Value(true)
+ .EndMap()
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("optional_repeated_int64_field")
+ .Item("field_number").Value(18)
+ .Item("proto_type").Value("int64")
+ .Item("repeated").Value(true)
+ .EndMap()
+
+ .Item().Value(oneofConfig)
+ .Item().Value(optionalOneofConfig)
+
+ .Item()
+ .BeginMap()
+ .Item("name").Value("map_field")
+ .Item("field_number").Value(19)
+ .Item("proto_type").Value("structured_message")
+ .Item("repeated").Value(true)
+ .Item("fields")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("key")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("value")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("structured_message")
+ .Item("fields").Value(keyValueFields)
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .Item("complex_type_mode").Value(complexTypeMode)
+ .EndAttributes()
+ .Value("protobuf");
+}
+
+using TProtobufFormatStructuredMessageParameter = std::tuple<EComplexTypeMode, int, EProtoFormatType>;
+
+class TProtobufFormatStructuredMessage
+ : public ::testing::TestWithParam<TProtobufFormatStructuredMessageParameter>
+{ };
+
+INSTANTIATE_TEST_SUITE_P(
+ FileDescriptor,
+ TProtobufFormatStructuredMessage,
+ ::testing::Values(TProtobufFormatStructuredMessageParameter{
+ EComplexTypeMode::Positional,
+ 1,
+ EProtoFormatType::FileDescriptor}));
+
+INSTANTIATE_TEST_SUITE_P(
+ Positional,
+ TProtobufFormatStructuredMessage,
+ ::testing::Values(TProtobufFormatStructuredMessageParameter{
+ EComplexTypeMode::Positional,
+ 1,
+ EProtoFormatType::Structured}));
+
+INSTANTIATE_TEST_SUITE_P(
+ Named,
+ TProtobufFormatStructuredMessage,
+ ::testing::Values(TProtobufFormatStructuredMessageParameter{
+ EComplexTypeMode::Named,
+ 1,
+ EProtoFormatType::Structured}));
+
+INSTANTIATE_TEST_SUITE_P(
+ ManyRows,
+ TProtobufFormatStructuredMessage,
+ ::testing::Values(TProtobufFormatStructuredMessageParameter{
+ EComplexTypeMode::Named,
+ 30000,
+ EProtoFormatType::Structured}));
+
+TEST_P(TProtobufFormatStructuredMessage, EmbeddedWrite)
+{
+ auto [complexTypeMode, rowCount, protoFormatType] = GetParam();
+
+ auto nameTable = New<TNameTable>();
+ auto numId = nameTable->RegisterName("num");
+ auto embeddedNumId = nameTable->RegisterName("embedded_num");
+ auto variantId = nameTable->RegisterName("variant");
+ auto embedded2NumId = nameTable->RegisterName("embedded2_num");
+ auto embedded2StructId = nameTable->RegisterName("embedded2_struct");
+ auto embedded2RepeatedId = nameTable->RegisterName("embedded2_repeated");
+ auto extraIntId = nameTable->RegisterName("extra_int");
+ auto otherComplexFieldId = nameTable->RegisterName("other_complex_field");
+
+ //message T2 {
+ // optional ui64 embedded2_num;
+ //};
+ //message T1 {
+ // required T2 t2 [embedded];
+ // optional ui64 embedded_num;
+ //};
+ //
+ //message T {
+ // required T1 t1 [embedded];
+ // optional ui64 num;
+ //};
+
+ auto schema = BuildEmbeddedSchema();
+ auto config = BuildEmbeddedConfig(complexTypeMode, protoFormatType);
+
+ TString result;
+ TStringOutput resultStream(result);
+ auto writer = CreateWriterForProtobuf(
+ ConvertTo<TProtobufFormatConfigPtr>(config->Attributes()),
+ {schema},
+ nameTable,
+ CreateAsyncAdapter(&resultStream),
+ true,
+ New<TControlAttributesConfig>(),
+ 0);
+
+ TUnversionedRowBuilder builder;
+ builder.AddValue(MakeUnversionedUint64Value(789, numId));
+ builder.AddValue(MakeUnversionedUint64Value(123, embeddedNumId));
+ builder.AddValue(MakeUnversionedUint64Value(456, embedded2NumId));
+ builder.AddValue(MakeUnversionedCompositeValue("[1; 555u]", variantId));
+ auto embeddedYson = BuildYsonStringFluently()
+ .BeginList()
+ // float1
+ .Item().Value(1.5f)
+ // string1
+ .Item().Value("abc")
+ .EndList();
+ auto embeddedYsonStr = embeddedYson.ToString();
+ builder.AddValue(MakeUnversionedCompositeValue(embeddedYsonStr, embedded2StructId));
+ auto repeatedYsonStr = BuildYsonStringFluently()
+ .BeginList()
+ .Item().Value("a")
+ .Item().Value("b")
+ .EndList()
+ .ToString();
+ builder.AddValue(MakeUnversionedCompositeValue(repeatedYsonStr, embedded2RepeatedId));
+ builder.AddValue(MakeUnversionedInt64Value(111, extraIntId));
+ auto otherComplexFieldYson = BuildYsonStringFluently()
+ .BeginList()
+ .Item().Value(22)
+ .Item().Value(23)
+ .Item().Value(24)
+ .EndList();
+ auto otherComplexFieldYsonStr = otherComplexFieldYson.ToString();
+ builder.AddValue(MakeUnversionedCompositeValue(otherComplexFieldYsonStr, otherComplexFieldId));
+
+
+ auto rows = std::vector<TUnversionedRow>(rowCount, builder.GetRow());
+ EXPECT_EQ(true, writer->Write(rows));
+
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TStringInput input(result);
+ TLenvalParser lenvalParser(&input);
+
+ for (int rowIndex = 0; rowIndex < rowCount; ++rowIndex) {
+ auto entry = lenvalParser.Next();
+ ASSERT_TRUE(entry);
+
+ NYT::TEmbeddingMessage message;
+ ASSERT_TRUE(message.ParseFromString(entry->RowData));
+
+ EXPECT_EQ(message.num(), 789UL);
+ EXPECT_EQ(message.t1().embedded_num(), 123UL);
+ EXPECT_EQ(message.t1().t2().embedded2_num(), 456UL);
+
+ EXPECT_FALSE(message.t1().has_str_variant());
+ EXPECT_TRUE(message.t1().has_uint_variant());
+ EXPECT_EQ(message.t1().uint_variant(), 555UL);
+
+ EXPECT_EQ(message.t1().t2().embedded2_struct().float1(), 1.5f);
+ EXPECT_EQ(message.t1().t2().embedded2_struct().string1(), "abc");
+
+ ASSERT_EQ(message.t1().t2().embedded2_repeated_size(), 2);
+ EXPECT_EQ(message.t1().t2().embedded2_repeated(0), "a");
+ EXPECT_EQ(message.t1().t2().embedded2_repeated(1), "b");
+
+ {
+ auto otherColumns = ConvertToNode(TYsonString(message.other_columns_field()))->AsMap();
+ auto mode = complexTypeMode;
+ auto expected = ([&] {
+ switch (mode) {
+ case EComplexTypeMode::Named:
+ return BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("one").Value(22)
+ .Item("two").Value(23)
+ .Item("three").Value(24)
+ .EndMap();
+ case EComplexTypeMode::Positional:
+ return ConvertToNode(otherComplexFieldYson);
+ }
+ YT_ABORT();
+ })();
+
+ EXPECT_NODES_EQUAL(expected, otherColumns->GetChildOrThrow("other_complex_field"));
+ EXPECT_EQ(ConvertTo<i64>(otherColumns->GetChildOrThrow("extra_int")), 111);
+ }
+
+ ASSERT_FALSE(message.has_extra_field());
+ ASSERT_FALSE(message.t1().has_embedded_extra_field());
+ }
+
+ ASSERT_FALSE(lenvalParser.Next());
+}
+
+TEST_P(TProtobufFormatStructuredMessage, Write)
+{
+ auto [complexTypeMode, rowCount, protoFormatType] = GetParam();
+
+ auto nameTable = New<TNameTable>();
+ auto firstId = nameTable->RegisterName("first");
+ auto secondId = nameTable->RegisterName("second");
+ auto repeatedMessageId = nameTable->RegisterName("repeated_message_field");
+ auto repeatedInt64Id = nameTable->RegisterName("repeated_int64_field");
+ auto anotherRepeatedInt64Id = nameTable->RegisterName("another_repeated_int64_field");
+ auto anyFieldId = nameTable->RegisterName("any_field");
+ auto int64FieldId = nameTable->RegisterName("int64_field");
+ auto uint64FieldId = nameTable->RegisterName("uint64_field");
+ auto int32FieldId = nameTable->RegisterName("int32_field");
+ auto uint32FieldId = nameTable->RegisterName("uint32_field");
+ auto enumIntFieldId = nameTable->RegisterName("enum_int_field");
+ auto enumStringStringFieldId = nameTable->RegisterName("enum_string_string_field");
+ auto enumStringInt64FieldId = nameTable->RegisterName("enum_string_int64_field");
+ auto utf8FieldId = nameTable->RegisterName("utf8_field");
+ auto repeatedOptionalAnyFieldId = nameTable->RegisterName("repeated_optional_any_field");
+ auto otherComplexFieldId = nameTable->RegisterName("other_complex_field");
+ auto packedRepeatedInt64FieldId = nameTable->RegisterName("packed_repeated_int64_field");
+ auto optionalRepeatedInt64FieldId = nameTable->RegisterName("optional_repeated_int64_field");
+ auto oneofFieldId = nameTable->RegisterName("oneof_field");
+ auto optionalOneofFieldId = nameTable->RegisterName("optional_oneof_field");
+ auto mapFieldId = nameTable->RegisterName("map_field");
+
+ auto schema = CreateSchemaWithStructuredMessage();
+ auto config = CreateConfigWithStructuredMessage(complexTypeMode, protoFormatType);
+
+ TString result;
+ TStringOutput resultStream(result);
+ auto writer = CreateWriterForProtobuf(
+ ConvertTo<TProtobufFormatConfigPtr>(config->Attributes()),
+ {schema},
+ nameTable,
+ CreateAsyncAdapter(&resultStream),
+ true,
+ New<TControlAttributesConfig>(),
+ 0);
+
+ auto firstYsonStr = BuildYsonStringFluently()
+ .BeginList()
+ // field_missing_from_proto1
+ .Item().Value(11111)
+ // enum_field
+ .Item().Value("Two")
+ // int64_field
+ .Item().Value(44)
+ // repeated_int64_field
+ .Item()
+ .BeginList()
+ .Item().Value(55)
+ .Item().Value(56)
+ .Item().Value(57)
+ .EndList()
+ // another_repeated_int64_field
+ .Item()
+ .BeginList()
+ .EndList()
+ // message_field
+ .Item()
+ .BeginList()
+ .Item().Value("key")
+ .Item().Value("value")
+ .EndList()
+ // repeated_message_field
+ .Item()
+ .BeginList()
+ .Item()
+ .BeginList()
+ .Item().Value("key1")
+ .Item().Value("value1")
+ .EndList()
+ .Item()
+ .BeginList()
+ .Item().Value("key2")
+ .Item().Value("value2")
+ .EndList()
+ .EndList()
+ // any_int64_field
+ .Item().Value(45)
+ // any_map_field
+ .Item()
+ .BeginMap()
+ .Item("key").Value("value")
+ .EndMap()
+ // optional_int64_field
+ .Item().Entity()
+ // repeated_optional_any_field
+ .Item()
+ .BeginList()
+ .Item().Value(2)
+ .Item().Entity()
+ .Item().Value("foo")
+ .EndList()
+ // packed_repeated_enum_field
+ .Item()
+ .BeginList()
+ .Item().Value("MinusFortyTwo")
+ .Item().Value("Two")
+ .EndList()
+ // optional_repeated_bool_field
+ .Item()
+ .BeginList()
+ .Item().Value(false)
+ .Item().Value(true)
+ .Item().Value(false)
+ .EndList()
+ // oneof_field
+ .Item()
+ .BeginList()
+ // message_field
+ .Item().Value(2)
+ .Item().BeginList()
+ .Item().Value("foo")
+ .Item().Entity()
+ .EndList()
+ .EndList()
+ // optional_oneof_field
+ .Item()
+ .Entity()
+ // map_field
+ .Item()
+ .BeginList()
+ .Item().BeginList()
+ .Item().Value(13)
+ .Item().BeginList()
+ .Item().Value("bac")
+ .Item().Value("cab")
+ .EndList()
+ .EndList()
+ .Item().BeginList()
+ .Item().Value(15)
+ .Item().BeginList()
+ .Item().Value("ya")
+ .Item().Value("make")
+ .EndList()
+ .EndList()
+ .EndList()
+ .EndList()
+ .ToString();
+
+ auto secondYsonStr = BuildYsonStringFluently()
+ .BeginList()
+ .Item().Value(101)
+ .Item().Value(102)
+ .Item().Value(103)
+ .EndList()
+ .ToString();
+
+ auto repeatedMessageYsonStr = BuildYsonStringFluently()
+ .BeginList()
+ .Item()
+ .BeginList()
+ .Item().Value("key11")
+ .Item().Value("value11")
+ .EndList()
+ .Item()
+ .BeginList()
+ .Item().Value("key21")
+ .Item().Value("value21")
+ .EndList()
+ .EndList()
+ .ToString();
+
+ auto repeatedInt64Yson = BuildYsonStringFluently()
+ .BeginList()
+ .Item().Value(31)
+ .Item().Value(32)
+ .Item().Value(33)
+ .EndList();
+ auto repeatedInt64YsonStr = repeatedInt64Yson.ToString();
+
+ auto anotherRepeatedInt64YsonStr = BuildYsonStringFluently()
+ .BeginList()
+ .EndList()
+ .ToString();
+
+ auto repeatedOptionalAnyYson = BuildYsonStringFluently()
+ .BeginList()
+ .Item().Value(1)
+ .Item().Value("abc")
+ .Item().Entity()
+ .Item().Value(true)
+ .EndList();
+ auto repeatedOptionalAnyYsonStr = repeatedOptionalAnyYson.ToString();
+
+ auto otherComplexFieldYson = BuildYsonStringFluently()
+ .BeginList()
+ .Item().Value(22)
+ .Item().Value(23)
+ .Item().Value(24)
+ .EndList();
+ auto otherComplexFieldYsonStr = otherComplexFieldYson.ToString();
+
+ TUnversionedRowBuilder builder;
+ builder.AddValue(MakeUnversionedCompositeValue(firstYsonStr, firstId));
+ builder.AddValue(MakeUnversionedCompositeValue(secondYsonStr, secondId));
+ builder.AddValue(MakeUnversionedCompositeValue(repeatedMessageYsonStr, repeatedMessageId));
+ builder.AddValue(MakeUnversionedCompositeValue(repeatedInt64YsonStr, repeatedInt64Id));
+ builder.AddValue(MakeUnversionedCompositeValue(anotherRepeatedInt64YsonStr, anotherRepeatedInt64Id));
+ builder.AddValue(MakeUnversionedInt64Value(4321, anyFieldId));
+
+ builder.AddValue(MakeUnversionedInt64Value(-64, int64FieldId));
+ builder.AddValue(MakeUnversionedUint64Value(64, uint64FieldId));
+ builder.AddValue(MakeUnversionedInt64Value(-32, int32FieldId));
+ builder.AddValue(MakeUnversionedUint64Value(32, uint32FieldId));
+
+ builder.AddValue(MakeUnversionedInt64Value(-42, enumIntFieldId));
+ builder.AddValue(MakeUnversionedStringValue("Three", enumStringStringFieldId));
+ builder.AddValue(MakeUnversionedInt64Value(1, enumStringInt64FieldId));
+
+ const auto HelloWorldInRussian = "\xd0\x9f\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82, \xd0\xbc\xd0\xb8\xd1\x80!";
+ builder.AddValue(MakeUnversionedStringValue(HelloWorldInRussian, utf8FieldId));
+
+ builder.AddValue(MakeUnversionedCompositeValue(repeatedOptionalAnyYsonStr, repeatedOptionalAnyFieldId));
+
+ builder.AddValue(MakeUnversionedCompositeValue(otherComplexFieldYsonStr, otherComplexFieldId));
+
+ builder.AddValue(MakeUnversionedCompositeValue("[12;-10;123456789000;]", packedRepeatedInt64FieldId));
+
+ builder.AddValue(MakeUnversionedCompositeValue("[1;2;3]", optionalRepeatedInt64FieldId));
+
+ builder.AddValue(MakeUnversionedCompositeValue("[0; foobaz]", oneofFieldId));
+ builder.AddValue(MakeUnversionedNullValue(optionalOneofFieldId));
+
+ builder.AddValue(MakeUnversionedCompositeValue("[[2; [x; y]]; [5; [z; w]]]", mapFieldId));
+
+ auto rows = std::vector<TUnversionedRow>(rowCount, builder.GetRow());
+ EXPECT_EQ(true, writer->Write(rows));
+
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TStringInput input(result);
+ TLenvalParser lenvalParser(&input);
+
+ for (int rowIndex = 0; rowIndex < rowCount; ++rowIndex) {
+ auto entry = lenvalParser.Next();
+ ASSERT_TRUE(entry);
+
+ NYT::TMessageWithStructuredEmbedded message;
+ ASSERT_TRUE(message.ParseFromString(entry->RowData));
+
+ const auto& first = message.first();
+ EXPECT_EQ(first.enum_field(), EEnum::Two);
+ EXPECT_EQ(first.int64_field(), 44);
+ std::vector<i64> firstRepeatedInt64Field(
+ first.repeated_int64_field().begin(),
+ first.repeated_int64_field().end());
+ EXPECT_EQ(firstRepeatedInt64Field, (std::vector<i64>{55, 56, 57}));
+ std::vector<i64> firstAnotherRepeatedInt64Field(
+ first.another_repeated_int64_field().begin(),
+ first.another_repeated_int64_field().end());
+ EXPECT_EQ(firstAnotherRepeatedInt64Field, (std::vector<i64>{}));
+ EXPECT_EQ(first.message_field().key(), "key");
+ EXPECT_EQ(first.message_field().value(), "value");
+ ASSERT_EQ(first.repeated_message_field_size(), 2);
+ EXPECT_EQ(first.repeated_message_field(0).key(), "key1");
+ EXPECT_EQ(first.repeated_message_field(0).value(), "value1");
+ EXPECT_EQ(first.repeated_message_field(1).key(), "key2");
+ EXPECT_EQ(first.repeated_message_field(1).value(), "value2");
+
+ EXPECT_NODES_EQUAL(
+ ConvertToNode(TYsonString(first.any_int64_field())),
+ BuildYsonNodeFluently().Value(45));
+
+ EXPECT_NODES_EQUAL(
+ ConvertToNode(TYsonString(first.any_map_field())),
+ BuildYsonNodeFluently().BeginMap()
+ .Item("key").Value("value")
+ .EndMap());
+
+ std::vector<TYsonString> firstRepeatedOptionalAnyField(
+ first.repeated_optional_any_field().begin(),
+ first.repeated_optional_any_field().end());
+
+ EXPECT_NODES_EQUAL(
+ ConvertToNode(firstRepeatedOptionalAnyField),
+ BuildYsonNodeFluently()
+ .BeginList()
+ .Item().Value(2)
+ .Item().Entity()
+ .Item().Value("foo")
+ .EndList());
+
+ EXPECT_FALSE(first.has_optional_int64_field());
+
+ std::vector<EEnum> actualFirstPackedRepeatedEnumField;
+ for (auto x : first.packed_repeated_enum_field()) {
+ actualFirstPackedRepeatedEnumField.push_back(static_cast<EEnum>(x));
+ }
+ auto expectedFirstPackedRepeatedEnumField = std::vector<EEnum>{EEnum::MinusFortyTwo, EEnum::Two};
+ EXPECT_EQ(expectedFirstPackedRepeatedEnumField, actualFirstPackedRepeatedEnumField);
+
+ std::vector<bool> firstOptionalRepeatedBoolField(
+ first.optional_repeated_bool_field().begin(),
+ first.optional_repeated_bool_field().end());
+ auto expectedFirstOptionalRepeatedBoolField = std::vector<bool>{false, true, false};
+ EXPECT_EQ(expectedFirstOptionalRepeatedBoolField, firstOptionalRepeatedBoolField);
+
+ EXPECT_FALSE(first.has_oneof_string_field_1());
+ EXPECT_FALSE(first.has_oneof_string_field());
+ EXPECT_TRUE(first.has_oneof_message_field());
+ EXPECT_EQ(first.oneof_message_field().key(), "foo");
+ EXPECT_FALSE(first.oneof_message_field().has_value());
+
+ EXPECT_FALSE(first.has_optional_oneof_string_field_1());
+ EXPECT_FALSE(first.has_optional_oneof_string_field());
+ EXPECT_FALSE(first.has_optional_oneof_message_field());
+
+ EXPECT_EQ(std::ssize(first.map_field()), 2);
+ ASSERT_EQ(static_cast<int>(first.map_field().count(13)), 1);
+ EXPECT_EQ(first.map_field().at(13).key(), "bac");
+ EXPECT_EQ(first.map_field().at(13).value(), "cab");
+ ASSERT_EQ(static_cast<int>(first.map_field().count(15)), 1);
+ EXPECT_EQ(first.map_field().at(15).key(), "ya");
+ EXPECT_EQ(first.map_field().at(15).value(), "make");
+
+ const auto& second = message.second();
+ EXPECT_EQ(second.one(), 101);
+ EXPECT_EQ(second.two(), 102);
+ EXPECT_EQ(second.three(), 103);
+
+ ASSERT_EQ(message.repeated_message_field_size(), 2);
+ EXPECT_EQ(message.repeated_message_field(0).key(), "key11");
+ EXPECT_EQ(message.repeated_message_field(0).value(), "value11");
+ EXPECT_EQ(message.repeated_message_field(1).key(), "key21");
+ EXPECT_EQ(message.repeated_message_field(1).value(), "value21");
+
+ std::vector<i64> repeatedInt64Field(
+ message.repeated_int64_field().begin(),
+ message.repeated_int64_field().end());
+ EXPECT_EQ(repeatedInt64Field, (std::vector<i64>{31, 32, 33}));
+
+ std::vector<i64> anotherRepeatedInt64Field(
+ message.another_repeated_int64_field().begin(),
+ message.another_repeated_int64_field().end());
+ EXPECT_EQ(anotherRepeatedInt64Field, (std::vector<i64>{}));
+
+ EXPECT_EQ(message.int64_any_field(), 4321);
+
+ // Note the reversal of 32 <-> 64.
+ EXPECT_EQ(message.int32_field(), -64);
+ EXPECT_EQ(message.uint32_field(), 64u);
+ EXPECT_EQ(message.int64_field(), -32);
+ EXPECT_EQ(message.uint64_field(), 32u);
+
+ EXPECT_EQ(message.enum_int_field(), EEnum::MinusFortyTwo);
+ EXPECT_EQ(message.enum_string_string_field(), EEnum::Three);
+ EXPECT_EQ(message.enum_string_int64_field(), EEnum::One);
+
+ EXPECT_EQ(message.utf8_field(), HelloWorldInRussian);
+
+ std::vector<TYsonString> repeatedOptionalAnyField(
+ message.repeated_optional_any_field().begin(),
+ message.repeated_optional_any_field().end());
+ EXPECT_NODES_EQUAL(ConvertToNode(repeatedOptionalAnyField), ConvertToNode(repeatedOptionalAnyYson));
+
+ {
+ auto otherColumns = ConvertToNode(TYsonString(message.other_columns_field()))->AsMap();
+ auto mode = complexTypeMode;
+ auto expected = ([&] {
+ switch (mode) {
+ case EComplexTypeMode::Named:
+ return BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("one").Value(22)
+ .Item("two").Value(23)
+ .Item("three").Value(24)
+ .EndMap();
+ case EComplexTypeMode::Positional:
+ return ConvertToNode(otherComplexFieldYson);
+ }
+ YT_ABORT();
+ })();
+
+ EXPECT_NODES_EQUAL(expected, otherColumns->GetChildOrThrow("other_complex_field"));
+ }
+
+ std::vector<i64> actualPackedRepeatedInt64Field(
+ message.packed_repeated_int64_field().begin(),
+ message.packed_repeated_int64_field().end());
+ auto expectedPackedRepeatedInt64Field = std::vector<i64>{12, -10, 123456789000LL};
+ EXPECT_EQ(expectedPackedRepeatedInt64Field, actualPackedRepeatedInt64Field);
+
+ std::vector<i64> actualOptionalRepeatedInt64Field(
+ message.optional_repeated_int64_field().begin(),
+ message.optional_repeated_int64_field().end());
+ auto expectedOptionalRepeatedInt64Field = std::vector<i64>{1, 2, 3};
+ EXPECT_EQ(expectedOptionalRepeatedInt64Field, actualOptionalRepeatedInt64Field);
+
+ EXPECT_TRUE(message.has_oneof_string_field_1());
+ EXPECT_EQ(message.oneof_string_field_1(), "foobaz");
+ EXPECT_FALSE(message.has_oneof_string_field());
+ EXPECT_FALSE(message.has_oneof_message_field());
+
+ EXPECT_FALSE(message.has_optional_oneof_string_field_1());
+ EXPECT_FALSE(message.has_optional_oneof_string_field());
+ EXPECT_FALSE(message.has_optional_oneof_message_field());
+
+ EXPECT_EQ(std::ssize(message.map_field()), 2);
+ ASSERT_EQ(static_cast<int>(message.map_field().count(2)), 1);
+ EXPECT_EQ(message.map_field().at(2).key(), "x");
+ EXPECT_EQ(message.map_field().at(2).value(), "y");
+ ASSERT_EQ(static_cast<int>(message.map_field().count(5)), 1);
+ EXPECT_EQ(message.map_field().at(5).key(), "z");
+ EXPECT_EQ(message.map_field().at(5).value(), "w");
+ }
+
+ ASSERT_FALSE(lenvalParser.Next());
+}
+
+INodePtr SortMapByKey(const INodePtr& node)
+{
+ auto keyValuePairs = ConvertTo<std::vector<std::pair<i64, INodePtr>>>(node);
+ std::sort(std::begin(keyValuePairs), std::end(keyValuePairs));
+ return ConvertTo<INodePtr>(keyValuePairs);
+}
+
+TEST_P(TProtobufFormatStructuredMessage, EmbeddedParse)
+{
+ auto [complexTypeMode, rowCount, protoFormatType] = GetParam();
+
+ auto schema = BuildEmbeddedSchema();
+ auto config = BuildEmbeddedConfig(complexTypeMode, protoFormatType);
+
+ NYT::TEmbeddingMessage message;
+
+ message.set_num(789);
+ auto* t1 = message.mutable_t1();
+ t1->set_embedded_num(123);
+ auto* t2 = t1->mutable_t2();
+ t2->set_embedded2_num(456);
+ t1->set_uint_variant(555);
+ t2->add_embedded2_repeated("a");
+ t2->add_embedded2_repeated("b");
+ t2->add_embedded2_repeated("c");
+ auto* embedded2_struct = t2->mutable_embedded2_struct();
+ embedded2_struct->set_float1(1.5f);
+ embedded2_struct->set_string1("abc");
+
+ //message.set_extra_field("*");
+ //t1->set_embedded_extra_field("*");
+
+ auto rowCollector = ParseRows(message, config, schema, rowCount);
+ for (int rowIndex = 0; rowIndex < rowCount; ++rowIndex) {
+ EXPECT_EQ(GetUint64(rowCollector.GetRowValue(rowIndex, "num")), 789u);
+ EXPECT_EQ(GetUint64(rowCollector.GetRowValue(rowIndex, "embedded_num")), 123u);
+ EXPECT_EQ(GetUint64(rowCollector.GetRowValue(rowIndex, "embedded2_num")), 456u);
+ EXPECT_NODES_EQUAL(
+ GetComposite(rowCollector.GetRowValue(rowIndex, "variant")),
+ ConvertToNode(TYsonString(TStringBuf("[1; 555u]"))));
+
+ auto embedded2_repeatedNode = GetComposite(rowCollector.GetRowValue(rowIndex, "embedded2_repeated"));
+ ASSERT_EQ(embedded2_repeatedNode->GetType(), ENodeType::List);
+ const auto& embedded2_repeatedList = embedded2_repeatedNode->AsList();
+ ASSERT_EQ(embedded2_repeatedList->GetChildCount(), 3);
+ EXPECT_EQ(embedded2_repeatedList->GetChildValueOrThrow<TString>(0), "a");
+ EXPECT_EQ(embedded2_repeatedList->GetChildValueOrThrow<TString>(1), "b");
+ EXPECT_EQ(embedded2_repeatedList->GetChildValueOrThrow<TString>(2), "c");
+
+ auto embedded2_structNode = GetComposite(rowCollector.GetRowValue(rowIndex, "embedded2_struct"));
+ ASSERT_EQ(embedded2_structNode->GetType(), ENodeType::List);
+ const auto& embedded2_structList = embedded2_structNode->AsList();
+ ASSERT_EQ(embedded2_structList->GetChildCount(), 2);
+ EXPECT_EQ(embedded2_structList->GetChildValueOrThrow<double>(0), 1.5f);
+ EXPECT_EQ(embedded2_structList->GetChildValueOrThrow<TString>(1), "abc");
+ }
+}
+
+TEST_P(TProtobufFormatStructuredMessage, Parse)
+{
+ auto [complexTypeMode, rowCount, protoFormatType] = GetParam();
+
+ auto schema = CreateSchemaWithStructuredMessage();
+ auto config = CreateConfigWithStructuredMessage(complexTypeMode, protoFormatType);
+
+ NYT::TMessageWithStructuredEmbedded message;
+
+ auto* first = message.mutable_first();
+ first->set_enum_field(EEnum::Two);
+ first->set_int64_field(44);
+
+ first->add_repeated_int64_field(55);
+ first->add_repeated_int64_field(56);
+ first->add_repeated_int64_field(57);
+
+ // another_repeated_int64_field is intentionally empty.
+
+ first->mutable_message_field()->set_key("key");
+ first->mutable_message_field()->set_value("value");
+ auto* firstSubfield1 = first->add_repeated_message_field();
+ firstSubfield1->set_key("key1");
+ firstSubfield1->set_value("value1");
+ auto* firstSubfield2 = first->add_repeated_message_field();
+ firstSubfield2->set_key("key2");
+ firstSubfield2->set_value("value2");
+
+ first->set_any_int64_field(BuildYsonStringFluently().Value(4422).ToString());
+ first->set_any_map_field(
+ BuildYsonStringFluently()
+ .BeginMap()
+ .Item("key").Value("value")
+ .EndMap()
+ .ToString());
+
+ first->add_repeated_optional_any_field("%false");
+ first->add_repeated_optional_any_field("42");
+ first->add_repeated_optional_any_field("#");
+
+ first->add_packed_repeated_enum_field(EEnum::MaxInt32);
+ first->add_packed_repeated_enum_field(EEnum::MinusFortyTwo);
+
+ // optional_repeated_bool_field is intentionally empty.
+
+ first->mutable_oneof_message_field()->set_key("KEY");
+
+ // optional_oneof_field is intentionally empty.
+
+ (*first->mutable_map_field())[111].set_key("key111");
+ (*first->mutable_map_field())[111].set_value("value111");
+ (*first->mutable_map_field())[222].set_key("key222");
+ (*first->mutable_map_field())[222].set_value("value222");
+
+ auto* second = message.mutable_second();
+ second->set_one(101);
+ second->set_two(102);
+ second->set_three(103);
+
+ message.add_repeated_int64_field(31);
+ message.add_repeated_int64_field(32);
+ message.add_repeated_int64_field(33);
+
+ // another_repeated_int64_field is intentionally empty.
+
+ auto* subfield1 = message.add_repeated_message_field();
+ subfield1->set_key("key11");
+ subfield1->set_value("value11");
+ auto* subfield2 = message.add_repeated_message_field();
+ subfield2->set_key("key21");
+ subfield2->set_value("value21");
+
+ message.set_int64_any_field(4321);
+
+ // Note the reversal of 32 <-> 64.
+ message.set_int64_field(-32);
+ message.set_uint64_field(32);
+ message.set_int32_field(-64);
+ message.set_uint32_field(64);
+
+ // Note that we don't set the "enum_string_int64_field" as it would fail during parsing.
+ message.set_enum_int_field(EEnum::MinusFortyTwo);
+ message.set_enum_string_string_field(EEnum::Three);
+
+ const auto HelloWorldInChinese = "\xe4\xbd\xa0\xe5\xa5\xbd\xef\xbc\x8c\xe4\xb8\x96\xe7\x95\x8c";
+ message.set_utf8_field(HelloWorldInChinese);
+
+ message.add_repeated_optional_any_field("#");
+ message.add_repeated_optional_any_field("1");
+ message.add_repeated_optional_any_field("\"qwe\"");
+ message.add_repeated_optional_any_field("%true");
+
+ auto otherComplexFieldPositional = BuildYsonNodeFluently()
+ .BeginList()
+ .Item().Value(301)
+ .Item().Value(302)
+ .Item().Value(303)
+ .EndList();
+
+ auto mode = complexTypeMode;
+ auto otherComplexField = ([&] {
+ switch (mode) {
+ case EComplexTypeMode::Named:
+ return BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("one").Value(301)
+ .Item("two").Value(302)
+ .Item("three").Value(303)
+ .EndMap();
+ case EComplexTypeMode::Positional:
+ return otherComplexFieldPositional;
+ }
+ YT_ABORT();
+ })();
+ auto otherColumnsYson = BuildYsonStringFluently()
+ .BeginMap()
+ .Item("other_complex_field").Value(otherComplexField)
+ .EndMap();
+ message.set_other_columns_field(otherColumnsYson.ToString());
+
+ message.add_packed_repeated_int64_field(-123456789000LL);
+ message.add_packed_repeated_int64_field(0);
+
+ message.add_optional_repeated_int64_field(-4242);
+
+ // optional_oneof_field is intentionally empty.
+
+ message.set_oneof_string_field("spam");
+
+ (*message.mutable_map_field())[777].set_key("key777");
+ (*message.mutable_map_field())[777].set_value("value777");
+ (*message.mutable_map_field())[888].set_key("key888");
+ (*message.mutable_map_field())[888].set_value("value888");
+
+ auto rowCollector = ParseRows(message, config, schema, rowCount);
+ for (int rowIndex = 0; rowIndex < rowCount; ++rowIndex) {
+ auto firstNode = GetComposite(rowCollector.GetRowValue(rowIndex, "first"));
+ ASSERT_EQ(firstNode->GetType(), ENodeType::List);
+ const auto& firstList = firstNode->AsList();
+ ASSERT_EQ(firstList->GetChildCount(), 17);
+
+ EXPECT_EQ(firstList->GetChildOrThrow(0)->GetType(), ENodeType::Entity);
+ EXPECT_EQ(firstList->GetChildValueOrThrow<TString>(1), "Two");
+ EXPECT_EQ(firstList->GetChildValueOrThrow<i64>(2), 44);
+
+ ASSERT_EQ(firstList->GetChildOrThrow(3)->GetType(), ENodeType::List);
+ EXPECT_EQ(ConvertTo<std::vector<i64>>(firstList->GetChildOrThrow(3)), (std::vector<i64>{55, 56, 57}));
+
+ ASSERT_EQ(firstList->GetChildOrThrow(4)->GetType(), ENodeType::List);
+ EXPECT_EQ(ConvertTo<std::vector<i64>>(firstList->GetChildOrThrow(4)), (std::vector<i64>{}));
+
+ ASSERT_EQ(firstList->GetChildOrThrow(5)->GetType(), ENodeType::List);
+ EXPECT_EQ(firstList->GetChildOrThrow(5)->AsList()->GetChildValueOrThrow<TString>(0), "key");
+ EXPECT_EQ(firstList->GetChildOrThrow(5)->AsList()->GetChildValueOrThrow<TString>(1), "value");
+
+ ASSERT_EQ(firstList->GetChildOrThrow(6)->GetType(), ENodeType::List);
+ ASSERT_EQ(firstList->GetChildOrThrow(6)->AsList()->GetChildCount(), 2);
+
+ const auto& firstSubNode1 = firstList->GetChildOrThrow(6)->AsList()->GetChildOrThrow(0);
+ ASSERT_EQ(firstSubNode1->GetType(), ENodeType::List);
+ ASSERT_EQ(firstSubNode1->AsList()->GetChildCount(), 2);
+ EXPECT_EQ(firstSubNode1->AsList()->GetChildValueOrThrow<TString>(0), "key1");
+ EXPECT_EQ(firstSubNode1->AsList()->GetChildValueOrThrow<TString>(1), "value1");
+
+ const auto& firstSubNode2 = firstList->GetChildOrThrow(6)->AsList()->GetChildOrThrow(1);
+ ASSERT_EQ(firstSubNode2->GetType(), ENodeType::List);
+ ASSERT_EQ(firstSubNode2->AsList()->GetChildCount(), 2);
+ EXPECT_EQ(firstSubNode2->AsList()->GetChildValueOrThrow<TString>(0), "key2");
+ EXPECT_EQ(firstSubNode2->AsList()->GetChildValueOrThrow<TString>(1), "value2");
+
+ ASSERT_EQ(firstList->GetChildOrThrow(7)->GetType(), ENodeType::Int64);
+ EXPECT_EQ(firstList->GetChildValueOrThrow<i64>(7), 4422);
+
+ ASSERT_EQ(firstList->GetChildOrThrow(8)->GetType(), ENodeType::Map);
+ EXPECT_NODES_EQUAL(
+ firstList->GetChildOrThrow(8),
+ BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("key").Value("value")
+ .EndMap());
+
+ ASSERT_EQ(firstList->GetChildOrThrow(9)->GetType(), ENodeType::Entity);
+
+ EXPECT_NODES_EQUAL(
+ firstList->GetChildOrThrow(10),
+ BuildYsonNodeFluently()
+ .BeginList()
+ .Item().Value(false)
+ .Item().Value(42)
+ .Item().Entity()
+ .EndList());
+
+ EXPECT_NODES_EQUAL(
+ firstList->GetChildOrThrow(11),
+ BuildYsonNodeFluently()
+ .BeginList()
+ .Item().Value("MaxInt32")
+ .Item().Value("MinusFortyTwo")
+ .EndList());
+
+ // optional_repeated_bool_field.
+ ASSERT_EQ(firstList->GetChildOrThrow(12)->GetType(), ENodeType::Entity);
+
+ // oneof_field.
+ EXPECT_NODES_EQUAL(
+ firstList->GetChildOrThrow(13),
+ BuildYsonNodeFluently()
+ .BeginList()
+ .Item().Value(2)
+ .Item().BeginList()
+ .Item().Value("KEY")
+ .Item().Entity()
+ .EndList()
+ .EndList());
+
+ // optional_oneof_field.
+ ASSERT_EQ(firstList->GetChildOrThrow(14)->GetType(), ENodeType::Entity);
+
+ // map_field.
+ EXPECT_NODES_EQUAL(
+ SortMapByKey(firstList->GetChildOrThrow(15)),
+ BuildYsonNodeFluently()
+ .BeginList()
+ .Item().BeginList()
+ .Item().Value(111)
+ .Item().BeginList()
+ .Item().Value("key111")
+ .Item().Value("value111")
+ .EndList()
+ .EndList()
+ .Item().BeginList()
+ .Item().Value(222)
+ .Item().BeginList()
+ .Item().Value("key222")
+ .Item().Value("value222")
+ .EndList()
+ .EndList()
+ .EndList());
+
+ // field_missing_from_proto2.
+ ASSERT_EQ(firstList->GetChildOrThrow(16)->GetType(), ENodeType::Entity);
+
+ auto secondNode = GetComposite(rowCollector.GetRowValue(rowIndex, "second"));
+ ASSERT_EQ(secondNode->GetType(), ENodeType::List);
+ EXPECT_EQ(ConvertTo<std::vector<i64>>(secondNode), (std::vector<i64>{101, 102, 103}));
+
+ auto repeatedMessageNode = GetComposite(rowCollector.GetRowValue(rowIndex, "repeated_message_field"));
+ ASSERT_EQ(repeatedMessageNode->GetType(), ENodeType::List);
+ ASSERT_EQ(repeatedMessageNode->AsList()->GetChildCount(), 2);
+
+ const auto& subNode1 = repeatedMessageNode->AsList()->GetChildOrThrow(0);
+ ASSERT_EQ(subNode1->GetType(), ENodeType::List);
+ ASSERT_EQ(subNode1->AsList()->GetChildCount(), 2);
+ EXPECT_EQ(subNode1->AsList()->GetChildValueOrThrow<TString>(0), "key11");
+ EXPECT_EQ(subNode1->AsList()->GetChildValueOrThrow<TString>(1), "value11");
+
+ const auto& subNode2 = repeatedMessageNode->AsList()->GetChildOrThrow(1);
+ ASSERT_EQ(subNode2->GetType(), ENodeType::List);
+ ASSERT_EQ(subNode2->AsList()->GetChildCount(), 2);
+ EXPECT_EQ(subNode2->AsList()->GetChildValueOrThrow<TString>(0), "key21");
+ EXPECT_EQ(subNode2->AsList()->GetChildValueOrThrow<TString>(1), "value21");
+
+ auto repeatedInt64Node = GetComposite(rowCollector.GetRowValue(rowIndex, "repeated_int64_field"));
+ EXPECT_EQ(ConvertTo<std::vector<i64>>(repeatedInt64Node), (std::vector<i64>{31, 32, 33}));
+
+ auto anotherRepeatedInt64Node = GetComposite(rowCollector.GetRowValue(rowIndex, "another_repeated_int64_field"));
+ EXPECT_EQ(ConvertTo<std::vector<i64>>(anotherRepeatedInt64Node), (std::vector<i64>{}));
+
+ auto anyValue = rowCollector.GetRowValue(rowIndex, "any_field");
+ ASSERT_EQ(anyValue.Type, EValueType::Int64);
+ EXPECT_EQ(anyValue.Data.Int64, 4321);
+
+ EXPECT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "int64_field")), -64);
+ EXPECT_EQ(GetUint64(rowCollector.GetRowValue(rowIndex, "uint64_field")), 64u);
+ EXPECT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "int32_field")), -32);
+ EXPECT_EQ(GetUint64(rowCollector.GetRowValue(rowIndex, "uint32_field")), 32u);
+
+ EXPECT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "enum_int_field")), -42);
+ EXPECT_EQ(GetString(rowCollector.GetRowValue(rowIndex, "enum_string_string_field")), "Three");
+
+ EXPECT_EQ(GetString(rowCollector.GetRowValue(rowIndex, "utf8_field")), HelloWorldInChinese);
+
+ auto repeatedRepeatedOptionalAnyNode = GetComposite(rowCollector.GetRowValue(rowIndex, "repeated_optional_any_field"));
+ auto expectedRepeatedOptionalAnyNode = BuildYsonNodeFluently()
+ .BeginList()
+ .Item().Entity()
+ .Item().Value(1)
+ .Item().Value("qwe")
+ .Item().Value(true)
+ .EndList();
+ EXPECT_NODES_EQUAL(repeatedRepeatedOptionalAnyNode, expectedRepeatedOptionalAnyNode);
+
+ auto actualOtherComplexField = GetComposite(rowCollector.GetRowValue(rowIndex, "other_complex_field"));
+ EXPECT_NODES_EQUAL(actualOtherComplexField, otherComplexFieldPositional);
+
+ EXPECT_NODES_EQUAL(
+ GetComposite(rowCollector.GetRowValue(rowIndex, "packed_repeated_int64_field")),
+ ConvertToNode(TYsonString(TStringBuf("[-123456789000;0]"))));
+
+ EXPECT_NODES_EQUAL(
+ GetComposite(rowCollector.GetRowValue(rowIndex, "optional_repeated_int64_field")),
+ ConvertToNode(TYsonString(TStringBuf("[-4242]"))));
+
+ EXPECT_NODES_EQUAL(
+ GetComposite(rowCollector.GetRowValue(rowIndex, "oneof_field")),
+ ConvertToNode(TYsonString(TStringBuf("[1; \"spam\"]"))));
+
+ EXPECT_FALSE(rowCollector.FindRowValue(rowIndex, "optional_oneof_field"));
+
+ // map_field.
+ EXPECT_NODES_EQUAL(
+ SortMapByKey(GetComposite(rowCollector.GetRowValue(rowIndex, "map_field"))),
+ ConvertToNode(TYsonString(TStringBuf("[[777; [key777; value777]]; [888; [key888; value888]]]"))));
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::vector<TTableSchemaPtr> CreateSeveralTablesSchemas()
+{
+ return {
+ New<TTableSchema>(std::vector<TColumnSchema>{
+ {"embedded", StructLogicalType({
+ {"enum_field", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"int64_field", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
+ })},
+ {"repeated_int64_field", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ {"any_field", SimpleLogicalType(ESimpleLogicalValueType::Any)},
+ }),
+ New<TTableSchema>(std::vector<TColumnSchema>{
+ {"enum_field", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"int64_field", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
+ }),
+ // Empty schema.
+ New<TTableSchema>(),
+ };
+}
+
+INodePtr CreateSeveralTablesConfig(EProtoFormatType protoFormatType)
+{
+ if (protoFormatType == EProtoFormatType::FileDescriptor) {
+ return CreateFileDescriptorConfig<TSeveralTablesMessageFirst, TSeveralTablesMessageSecond, TSeveralTablesMessageThird>();
+ }
+ YT_VERIFY(protoFormatType == EProtoFormatType::Structured);
+
+ return BuildYsonNodeFluently()
+ .BeginAttributes()
+ .Item("enumerations").Value(EnumerationsConfig)
+ .Item("tables")
+ .BeginList()
+ // Table #1.
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("embedded")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("structured_message")
+ .Item("fields")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("int64_field")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("enum_field")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("enum_string")
+ .Item("enumeration_name").Value("EEnum")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("repeated_int64_field")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("int64")
+ .Item("repeated").Value(true)
+ .EndMap()
+ .Item()
+ .BeginMap()
+ // In schema it is of type "any".
+ .Item("name").Value("any_field")
+ .Item("field_number").Value(3)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .EndList()
+ .EndMap()
+
+ // Table #2.
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("int64_field")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("enum_field")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("enum_string")
+ .Item("enumeration_name").Value("EEnum")
+ .EndMap()
+ .EndList()
+ .EndMap()
+
+ // Table #3.
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("string_field")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndAttributes()
+ .Value("protobuf");
+}
+
+using TProtobufFormatSeveralTablesParam = std::tuple<EProtoFormatType>;
+
+class TProtobufFormatSeveralTables
+ : public ::testing::TestWithParam<TProtobufFormatSeveralTablesParam>
+{ };
+
+INSTANTIATE_TEST_SUITE_P(
+ FileDescriptor,
+ TProtobufFormatSeveralTables,
+ ::testing::Values(TProtobufFormatSeveralTablesParam{
+ EProtoFormatType::FileDescriptor}));
+
+INSTANTIATE_TEST_SUITE_P(
+ Structured,
+ TProtobufFormatSeveralTables,
+ ::testing::Values(TProtobufFormatSeveralTablesParam{
+ EProtoFormatType::Structured}));
+
+TEST_P(TProtobufFormatSeveralTables, Write)
+{
+ auto [protoFormatType] = GetParam();
+
+ auto schemas = CreateSeveralTablesSchemas();
+ auto configNode = CreateSeveralTablesConfig(protoFormatType);
+
+ auto config = ConvertTo<TProtobufFormatConfigPtr>(configNode->Attributes().ToMap());
+
+ auto nameTable = New<TNameTable>();
+ auto embeddedId = nameTable->RegisterName("embedded");
+ auto anyFieldId = nameTable->RegisterName("any_field");
+ auto int64FieldId = nameTable->RegisterName("int64_field");
+ auto repeatedInt64Id = nameTable->RegisterName("repeated_int64_field");
+ auto enumFieldId = nameTable->RegisterName("enum_field");
+ auto stringFieldId = nameTable->RegisterName("string_field");
+ auto tableIndexId = nameTable->RegisterName(TableIndexColumnName);
+
+ TString result;
+ TStringOutput resultStream(result);
+ auto controlAttributesConfig = New<TControlAttributesConfig>();
+ controlAttributesConfig->EnableTableIndex = true;
+ controlAttributesConfig->EnableEndOfStream = true;
+ auto writer = CreateWriterForProtobuf(
+ std::move(config),
+ schemas,
+ nameTable,
+ CreateAsyncAdapter(&resultStream),
+ true,
+ std::move(controlAttributesConfig),
+ 0);
+
+ auto embeddedYson = BuildYsonStringFluently()
+ .BeginList()
+ .Item().Value("Two")
+ .Item().Value(44)
+ .EndList()
+ .ToString();
+
+ auto repeatedInt64Yson = ConvertToYsonString(std::vector<i64>{31, 32, 33}).ToString();
+
+ {
+ TUnversionedRowBuilder builder;
+ builder.AddValue(MakeUnversionedCompositeValue(embeddedYson, embeddedId));
+ builder.AddValue(MakeUnversionedCompositeValue(repeatedInt64Yson, repeatedInt64Id));
+ builder.AddValue(MakeUnversionedInt64Value(4321, anyFieldId));
+ EXPECT_EQ(true, writer->Write({builder.GetRow()}));
+ }
+ {
+ TUnversionedRowBuilder builder;
+ builder.AddValue(MakeUnversionedStringValue("Two", enumFieldId));
+ builder.AddValue(MakeUnversionedInt64Value(999, int64FieldId));
+ builder.AddValue(MakeUnversionedInt64Value(1, tableIndexId));
+ EXPECT_EQ(true, writer->Write({builder.GetRow()}));
+ }
+ {
+ TUnversionedRowBuilder builder;
+ builder.AddValue(MakeUnversionedStringValue("blah", stringFieldId));
+ builder.AddValue(MakeUnversionedInt64Value(2, tableIndexId));
+ EXPECT_EQ(true, writer->Write({builder.GetRow()}));
+ }
+
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TStringInput input(result);
+ TLenvalParser lenvalParser(&input);
+
+ {
+ auto entry = lenvalParser.Next();
+ ASSERT_TRUE(entry);
+
+ NYT::TSeveralTablesMessageFirst message;
+ ASSERT_TRUE(message.ParseFromString(entry->RowData));
+
+ const auto& embedded = message.embedded();
+ EXPECT_EQ(embedded.enum_field(), EEnum::Two);
+ EXPECT_EQ(embedded.int64_field(), 44);
+
+ std::vector<i64> repeatedInt64Field(
+ message.repeated_int64_field().begin(),
+ message.repeated_int64_field().end());
+ EXPECT_EQ(repeatedInt64Field, (std::vector<i64>{31, 32, 33}));
+ EXPECT_EQ(message.int64_field(), 4321);
+ }
+ {
+ auto entry = lenvalParser.Next();
+ ASSERT_TRUE(entry);
+
+ NYT::TSeveralTablesMessageSecond message;
+ ASSERT_TRUE(message.ParseFromString(entry->RowData));
+
+ EXPECT_EQ(message.enum_field(), EEnum::Two);
+ EXPECT_EQ(message.int64_field(), 999);
+ }
+ {
+ auto entry = lenvalParser.Next();
+ ASSERT_TRUE(entry);
+
+ NYT::TSeveralTablesMessageThird message;
+ ASSERT_TRUE(message.ParseFromString(entry->RowData));
+
+ EXPECT_EQ(message.string_field(), "blah");
+ }
+ ASSERT_FALSE(lenvalParser.IsEndOfStream());
+ ASSERT_FALSE(lenvalParser.Next());
+ ASSERT_TRUE(lenvalParser.IsEndOfStream());
+ ASSERT_FALSE(lenvalParser.Next());
+}
+
+TEST_P(TProtobufFormatSeveralTables, Parse)
+{
+ auto [protoFormatType] = GetParam();
+
+ auto schemas = CreateSeveralTablesSchemas();
+ auto configNode = CreateSeveralTablesConfig(protoFormatType);
+ auto config = ConvertTo<TProtobufFormatConfigPtr>(configNode->Attributes().ToMap());
+
+ std::vector<TCollectingValueConsumer> rowCollectors;
+ std::vector<std::unique_ptr<IParser>> parsers;
+ for (const auto& schema : schemas) {
+ rowCollectors.emplace_back(schema);
+ }
+ for (int tableIndex = 0; tableIndex < static_cast<int>(schemas.size()); ++tableIndex) {
+ parsers.push_back(CreateParserForProtobuf(
+ &rowCollectors[tableIndex],
+ config,
+ tableIndex));
+ }
+
+ NYT::TSeveralTablesMessageFirst firstMessage;
+ auto* embedded = firstMessage.mutable_embedded();
+ embedded->set_enum_field(EEnum::Two);
+ embedded->set_int64_field(44);
+
+ firstMessage.add_repeated_int64_field(55);
+ firstMessage.add_repeated_int64_field(56);
+ firstMessage.add_repeated_int64_field(57);
+
+ firstMessage.set_int64_field(4444);
+
+ NYT::TSeveralTablesMessageSecond secondMessage;
+ secondMessage.set_enum_field(EEnum::Two);
+ secondMessage.set_int64_field(44);
+
+ NYT::TSeveralTablesMessageThird thirdMessage;
+ thirdMessage.set_string_field("blah");
+
+ auto parse = [] (auto& parser, const auto& message) {
+ TString lenvalBytes;
+ {
+ TStringOutput out(lenvalBytes);
+ auto messageSize = static_cast<ui32>(message.ByteSizeLong());
+ out.Write(&messageSize, sizeof(messageSize));
+ ASSERT_TRUE(message.SerializeToArcadiaStream(&out));
+ }
+ parser->Read(lenvalBytes);
+ parser->Finish();
+ };
+
+ parse(parsers[0], firstMessage);
+ parse(parsers[1], secondMessage);
+ parse(parsers[2], thirdMessage);
+
+ {
+ const auto& rowCollector = rowCollectors[0];
+ ASSERT_EQ(static_cast<int>(rowCollector.Size()), 1);
+
+ auto embeddedNode = GetComposite(rowCollector.GetRowValue(0, "embedded"));
+ ASSERT_EQ(ConvertToTextYson(embeddedNode), "[\"Two\";44;]");
+
+ auto repeatedInt64Node = GetComposite(rowCollector.GetRowValue(0, "repeated_int64_field"));
+ ASSERT_EQ(ConvertToTextYson(repeatedInt64Node), "[55;56;57;]");
+
+ auto int64Field = GetInt64(rowCollector.GetRowValue(0, "any_field"));
+ EXPECT_EQ(int64Field, 4444);
+ }
+
+ {
+ const auto& rowCollector = rowCollectors[1];
+ ASSERT_EQ(static_cast<int>(rowCollector.Size()), 1);
+
+ EXPECT_EQ(GetString(rowCollector.GetRowValue(0, "enum_field")), "Two");
+ EXPECT_EQ(GetInt64(rowCollector.GetRowValue(0, "int64_field")), 44);
+ }
+
+ {
+ const auto& rowCollector = rowCollectors[2];
+ ASSERT_EQ(static_cast<int>(rowCollector.Size()), 1);
+
+ EXPECT_EQ(GetString(rowCollector.GetRowValue(0, "string_field")), "blah");
+ }
+}
+
+TEST(TProtobufFormat, SchemaConfigMismatch)
+{
+ auto createParser = [] (const TTableSchemaPtr& schema, const INodePtr& configNode) {
+ TCollectingValueConsumer rowCollector(schema);
+ return CreateParserForProtobuf(
+ &rowCollector,
+ ConvertTo<TProtobufFormatConfigPtr>(configNode),
+ 0);
+ };
+ auto createSeveralTableWriter = [] (const std::vector<TTableSchemaPtr>& schemas, const INodePtr& configNode) {
+ TString result;
+ TStringOutput resultStream(result);
+ return CreateWriterForProtobuf(
+ ConvertTo<TProtobufFormatConfigPtr>(configNode),
+ schemas,
+ New<TNameTable>(),
+ CreateAsyncAdapter(&resultStream),
+ true,
+ New<TControlAttributesConfig>(),
+ 0);
+ };
+ auto createWriter = [&] (const TTableSchemaPtr& schema, const INodePtr& configNode) {
+ createSeveralTableWriter({schema}, configNode);
+ };
+
+ auto schema_struct_with_int64 = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"struct", StructLogicalType({
+ {"int64_field", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ })},
+ });
+
+ auto schema_struct_with_uint64 = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"struct", StructLogicalType({
+ {"int64_field", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Uint64))},
+ })},
+ });
+
+ auto config_struct_with_int64 = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("struct")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("structured_message")
+ .Item("fields")
+ .BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("int64_field")
+ .Item("field_number").Value(2)
+ // Wrong type.
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ // OK.
+ EXPECT_NO_THROW(createParser(schema_struct_with_int64, config_struct_with_int64));
+ EXPECT_NO_THROW(createWriter(schema_struct_with_int64, config_struct_with_int64));
+
+ // Types mismatch.
+ EXPECT_THROW_WITH_SUBSTRING(
+ createParser(schema_struct_with_uint64, config_struct_with_int64),
+ "signedness of both types must be the same");
+ EXPECT_THROW_WITH_SUBSTRING(
+ createWriter(schema_struct_with_uint64, config_struct_with_int64),
+ "signedness of both types must be the same");
+
+ // No schema for structured field is Ok.
+ EXPECT_NO_THROW(createParser(New<TTableSchema>(), config_struct_with_int64));
+ EXPECT_NO_THROW(createWriter(New<TTableSchema>(), config_struct_with_int64));
+
+ auto schema_list_int64 = New<TTableSchema>(std::vector<TColumnSchema>{
+ {
+ "repeated",
+ ListLogicalType(
+ SimpleLogicalType(ESimpleLogicalValueType::Int64)),
+ },
+ });
+
+ auto schema_list_optional_int64 = New<TTableSchema>(std::vector<TColumnSchema>{
+ {
+ "repeated",
+ ListLogicalType(
+ OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))),
+ },
+ });
+
+ auto config_repeated_int64 = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("repeated")
+ .Item("field_number").Value(1)
+ .Item("repeated").Value(true)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ // OK.
+ EXPECT_NO_THROW(createParser(schema_list_int64, config_repeated_int64));
+ EXPECT_NO_THROW(createWriter(schema_list_int64, config_repeated_int64));
+
+ // No schema for repeated field is Ok.
+ EXPECT_NO_THROW(createParser(New<TTableSchema>(), config_repeated_int64));
+ EXPECT_NO_THROW(createWriter(New<TTableSchema>(), config_repeated_int64));
+
+ // List of optional is not allowed.
+ EXPECT_THROW_WITH_SUBSTRING(
+ createParser(schema_list_optional_int64, config_repeated_int64),
+ "unexpected logical metatype \"optional\"");
+ EXPECT_THROW_WITH_SUBSTRING(
+ createWriter(schema_list_optional_int64, config_repeated_int64),
+ "unexpected logical metatype \"optional\"");
+
+ auto schema_optional_list_int64 = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"repeated", OptionalLogicalType(
+ ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64)))},
+ });
+
+ // Optional list is OK.
+ EXPECT_NO_THROW(createParser(schema_optional_list_int64, config_repeated_int64));
+ EXPECT_NO_THROW(createWriter(schema_optional_list_int64, config_repeated_int64));
+
+ auto schema_optional_optional_int64 = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"field", OptionalLogicalType(
+ OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64)))},
+ });
+
+ auto config_int64 = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("field")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ // Optional of optional is not allowed.
+ EXPECT_THROW_WITH_SUBSTRING(
+ createParser(schema_optional_optional_int64, config_int64),
+ "unexpected logical metatype \"optional\"");
+ EXPECT_THROW_WITH_SUBSTRING(
+ createWriter(schema_optional_optional_int64, config_int64),
+ "unexpected logical metatype \"optional\"");
+
+ auto schema_struct_with_both = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"struct", StructLogicalType({
+ {"required_field", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
+ {"optional_field", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ })},
+ });
+
+ auto config_struct_with_required = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("struct")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("structured_message")
+ .Item("fields")
+ .BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("required_field")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ auto config_struct_with_optional = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("struct")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("structured_message")
+ .Item("fields")
+ .BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("optional_field")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ auto config_struct_with_unknown = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("struct")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("structured_message")
+ .Item("fields")
+ .BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("required_field")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("optional_field")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("unknown_field")
+ .Item("field_number").Value(3)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ // Schema has more fields, non-optional field is missing in protobuf config.
+ // Parser should fail.
+ EXPECT_THROW_WITH_SUBSTRING(
+ createParser(schema_struct_with_both, config_struct_with_optional),
+ "non-optional field \"required_field\" in schema is missing from protobuf config");
+ // Writer feels OK.
+ EXPECT_NO_THROW(createWriter(schema_struct_with_both, config_struct_with_optional));
+
+ // Schema has more fields, optional field is missing in protobuf config.
+ // It's OK for both the writer and the parser.
+ EXPECT_NO_THROW(createParser(schema_struct_with_both, config_struct_with_required));
+ EXPECT_NO_THROW(createWriter(schema_struct_with_both, config_struct_with_required));
+
+ // Protobuf config has more fields, it is always OK.
+ EXPECT_NO_THROW(createParser(schema_struct_with_both, config_struct_with_unknown));
+ EXPECT_NO_THROW(createWriter(schema_struct_with_both, config_struct_with_unknown));
+
+ auto schema_int64 = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"int64_field", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ });
+
+ auto config_two_tables = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("int64_field")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("int64_field")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ EXPECT_NO_THROW(createWriter(schema_int64, config_two_tables));
+ EXPECT_THROW_WITH_SUBSTRING(
+ createSeveralTableWriter({schema_int64, schema_int64, schema_int64}, config_two_tables),
+ "Number of schemas is greater than number of tables in protobuf config: 3 > 2");
+
+ auto schema_variant_with_int = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"variant", VariantStructLogicalType({
+ {"a", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
+ })},
+ });
+ auto schema_variant_with_optional_int = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"variant", VariantStructLogicalType({
+ {"a", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ })},
+ });
+
+ auto config_with_oneof = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("tables")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("columns")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("variant")
+ .Item("proto_type").Value("oneof")
+ .Item("fields").BeginList()
+ .Item()
+ .BeginMap()
+ .Item("name").Value("a")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap();
+
+ // Oneof fields require schematized columns.
+ EXPECT_THROW_WITH_SUBSTRING(
+ createParser(New<TTableSchema>(), config_with_oneof),
+ "requires a corresponding schematized column");
+ EXPECT_THROW_WITH_SUBSTRING(
+ createWriter(New<TTableSchema>(), config_with_oneof),
+ "requires a corresponding schematized column");
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ createParser(schema_variant_with_optional_int, config_with_oneof),
+ "Optional variant field \"variant.a\"");
+ EXPECT_THROW_WITH_SUBSTRING(
+ createWriter(schema_variant_with_optional_int, config_with_oneof),
+ "Optional variant field \"variant.a\"");
+ EXPECT_NO_THROW(createParser(schema_variant_with_int, config_with_oneof));
+ EXPECT_NO_THROW(createWriter(schema_variant_with_int, config_with_oneof));
+}
+
+TEST(TProtobufFormat, MultipleOtherColumns)
+{
+ auto nameTable = New<TNameTable>();
+
+ TString data;
+ TStringOutput resultStream(data);
+
+ auto controlAttributesConfig = New<TControlAttributesConfig>();
+ controlAttributesConfig->EnableTableIndex = true;
+ controlAttributesConfig->EnableEndOfStream = true;
+
+ auto protoWriter = CreateWriterForProtobuf(
+ MakeProtobufFormatConfig({TOtherColumnsMessage::descriptor(), TOtherColumnsMessage::descriptor()}),
+ std::vector<TTableSchemaPtr>(2, New<TTableSchema>()),
+ nameTable,
+ CreateAsyncAdapter(&resultStream),
+ true,
+ controlAttributesConfig,
+ 0);
+
+ EXPECT_EQ(true, protoWriter->Write(
+ std::vector<TUnversionedRow>{
+ NNamedValue::MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+ {"field1", "foo"},
+ }),
+ NNamedValue::MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 1},
+ {"field2", "bar"},
+ }),
+ }));
+
+ WaitFor(protoWriter->Close())
+ .ThrowOnError();
+
+ std::vector<TString> otherColumnsValue;
+ auto parser = TLenvalParser(data);
+ while (auto item = parser.Next()) {
+ TOtherColumnsMessage message;
+ bool parsed = message.ParseFromString(item->RowData);
+ EXPECT_TRUE(parsed);
+ otherColumnsValue.push_back(CanonizeYson(message.other_columns_field()));
+ }
+
+ EXPECT_EQ(
+ otherColumnsValue,
+ std::vector<TString>({
+ CanonizeYson("{field1=foo}"),
+ CanonizeYson("{field2=bar}"),
+ }));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+using TProtobufFormatAllFieldsParameter = std::tuple<int, EProtoFormatType>;
+class TProtobufFormatAllFields
+ : public ::testing::TestWithParam<TProtobufFormatAllFieldsParameter>
+{
+public:
+ bool IsLegacyFormat() const
+ {
+ auto [rowCount, protoFormatType] = GetParam();
+ return protoFormatType == EProtoFormatType::FileDescriptorLegacy;
+ }
+};
+
+INSTANTIATE_TEST_SUITE_P(
+ Specification,
+ TProtobufFormatAllFields,
+ ::testing::Values(TProtobufFormatAllFieldsParameter{1, EProtoFormatType::Structured}));
+
+INSTANTIATE_TEST_SUITE_P(
+ FileDescriptorLegacy,
+ TProtobufFormatAllFields,
+ ::testing::Values(TProtobufFormatAllFieldsParameter{1, EProtoFormatType::FileDescriptorLegacy}));
+
+INSTANTIATE_TEST_SUITE_P(
+ FileDescriptor,
+ TProtobufFormatAllFields,
+ ::testing::Values(TProtobufFormatAllFieldsParameter{1, EProtoFormatType::FileDescriptor}));
+
+INSTANTIATE_TEST_SUITE_P(
+ ManyRows,
+ TProtobufFormatAllFields,
+ ::testing::Values(TProtobufFormatAllFieldsParameter{50000, EProtoFormatType::Structured}));
+
+TEST_P(TProtobufFormatAllFields, Writer)
+{
+ auto [rowCount, protoFormatType] = GetParam();
+ auto config = CreateAllFieldsConfig(protoFormatType);
+
+ auto nameTable = New<TNameTable>();
+
+ auto doubleId = nameTable->RegisterName("Double");
+ auto floatId = nameTable->RegisterName("Float");
+
+ auto int64Id = nameTable->RegisterName("Int64");
+ auto uint64Id = nameTable->RegisterName("UInt64");
+ auto sint64Id = nameTable->RegisterName("SInt64");
+ auto fixed64Id = nameTable->RegisterName("Fixed64");
+ auto sfixed64Id = nameTable->RegisterName("SFixed64");
+
+ auto int32Id = nameTable->RegisterName("Int32");
+ auto uint32Id = nameTable->RegisterName("UInt32");
+ auto sint32Id = nameTable->RegisterName("SInt32");
+ auto fixed32Id = nameTable->RegisterName("Fixed32");
+ auto sfixed32Id = nameTable->RegisterName("SFixed32");
+
+ auto boolId = nameTable->RegisterName("Bool");
+ auto stringId = nameTable->RegisterName("String");
+ auto bytesId = nameTable->RegisterName("Bytes");
+
+ auto enumId = nameTable->RegisterName("Enum");
+
+ auto messageId = nameTable->RegisterName("Message");
+
+ auto anyWithMapId = nameTable->RegisterName("AnyWithMap");
+ auto anyWithInt64Id = nameTable->RegisterName("AnyWithInt64");
+ auto anyWithStringId = nameTable->RegisterName("AnyWithString");
+
+ auto otherInt64ColumnId = nameTable->RegisterName("OtherInt64Column");
+ auto otherDoubleColumnId = nameTable->RegisterName("OtherDoubleColumn");
+ auto otherStringColumnId = nameTable->RegisterName("OtherStringColumn");
+ auto otherNullColumnId = nameTable->RegisterName("OtherNullColumn");
+ auto otherBooleanColumnId = nameTable->RegisterName("OtherBooleanColumn");
+ auto otherAnyColumnId = nameTable->RegisterName("OtherAnyColumn");
+
+ auto tableIndexColumnId = nameTable->RegisterName(TableIndexColumnName);
+ auto rowIndexColumnId = nameTable->RegisterName(RowIndexColumnName);
+ auto rangeIndexColumnId = nameTable->RegisterName(RangeIndexColumnName);
+
+ auto missintInt64Id = nameTable->RegisterName("MissingInt64");
+
+ TString result;
+ TStringOutput resultStream(result);
+ auto writer = CreateWriterForProtobuf(
+ config->Attributes(),
+ {New<TTableSchema>()},
+ nameTable,
+ CreateAsyncAdapter(&resultStream),
+ true,
+ New<TControlAttributesConfig>(),
+ 0);
+
+ TEmbeddedMessage embeddedMessage;
+ embeddedMessage.set_key("embedded_key");
+ embeddedMessage.set_value("embedded_value");
+ TString embeddedMessageBytes;
+ ASSERT_TRUE(embeddedMessage.SerializeToString(&embeddedMessageBytes));
+
+ auto mapNode = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("Key").Value("Value")
+ .Item("Another")
+ .BeginList()
+ .Item().Value(1)
+ .Item().Value("two")
+ .EndList()
+ .EndMap();
+ auto ysonString = ConvertToYsonString(mapNode).ToString();
+
+ TUnversionedRowBuilder builder;
+ for (const auto& value : {
+ MakeUnversionedDoubleValue(3.14159, doubleId),
+ MakeUnversionedDoubleValue(2.71828, floatId),
+
+ MakeUnversionedInt64Value(-1, int64Id),
+ MakeUnversionedUint64Value(2, uint64Id),
+ MakeUnversionedInt64Value(-3, sint64Id),
+ MakeUnversionedUint64Value(4, fixed64Id),
+ MakeUnversionedInt64Value(-5, sfixed64Id),
+
+ MakeUnversionedInt64Value(-6, int32Id),
+ MakeUnversionedUint64Value(7, uint32Id),
+ MakeUnversionedInt64Value(-8, sint32Id),
+ MakeUnversionedUint64Value(9, fixed32Id),
+ MakeUnversionedInt64Value(-10, sfixed32Id),
+
+ MakeUnversionedBooleanValue(true, boolId),
+ MakeUnversionedStringValue("this_is_string", stringId),
+ MakeUnversionedStringValue("this_is_bytes", bytesId),
+
+ MakeUnversionedStringValue("Two", enumId),
+
+ MakeUnversionedStringValue(embeddedMessageBytes, messageId),
+
+ MakeUnversionedNullValue(missintInt64Id),
+
+ MakeUnversionedInt64Value(12, tableIndexColumnId),
+ MakeUnversionedInt64Value(42, rowIndexColumnId),
+ MakeUnversionedInt64Value(333, rangeIndexColumnId),
+ }) {
+ builder.AddValue(value);
+ }
+
+ if (!IsLegacyFormat()) {
+ builder.AddValue(MakeUnversionedAnyValue(ysonString, anyWithMapId));
+ builder.AddValue(MakeUnversionedInt64Value(22, anyWithInt64Id));
+ builder.AddValue(MakeUnversionedStringValue("some_string", anyWithStringId));
+
+ builder.AddValue(MakeUnversionedInt64Value(-123, otherInt64ColumnId));
+ builder.AddValue(MakeUnversionedDoubleValue(-123.456, otherDoubleColumnId));
+ builder.AddValue(MakeUnversionedStringValue("some_string", otherStringColumnId));
+ builder.AddValue(MakeUnversionedBooleanValue(true, otherBooleanColumnId));
+ builder.AddValue(MakeUnversionedAnyValue(ysonString, otherAnyColumnId));
+ builder.AddValue(MakeUnversionedNullValue(otherNullColumnId));
+ }
+
+ auto row = builder.GetRow();
+ std::vector<TUnversionedRow> rows(rowCount, row);
+ EXPECT_EQ(true, writer->Write(rows));
+
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TStringInput input(result);
+ TLenvalParser lenvalParser(&input);
+
+ for (int rowIndex = 0; rowIndex < rowCount; ++rowIndex) {
+ auto entry = lenvalParser.Next();
+ ASSERT_TRUE(entry);
+
+ NYT::TMessage message;
+ ASSERT_TRUE(message.ParseFromString(entry->RowData));
+
+ EXPECT_DOUBLE_EQ(message.double_field(), 3.14159);
+ EXPECT_FLOAT_EQ(message.float_field(), 2.71828);
+ EXPECT_EQ(message.int64_field(), -1);
+ EXPECT_EQ(message.uint64_field(), 2u);
+ EXPECT_EQ(message.sint64_field(), -3);
+ EXPECT_EQ(message.fixed64_field(), 4u);
+ EXPECT_EQ(message.sfixed64_field(), -5);
+
+ EXPECT_EQ(message.int32_field(), -6);
+ EXPECT_EQ(message.uint32_field(), 7u);
+ EXPECT_EQ(message.sint32_field(), -8);
+ EXPECT_EQ(message.fixed32_field(), 9u);
+ EXPECT_EQ(message.sfixed32_field(), -10);
+
+ EXPECT_EQ(message.bool_field(), true);
+ EXPECT_EQ(message.string_field(), "this_is_string");
+ EXPECT_EQ(message.bytes_field(), "this_is_bytes");
+
+ EXPECT_EQ(message.enum_field(), EEnum::Two);
+
+ EXPECT_EQ(message.message_field().key(), "embedded_key");
+ EXPECT_EQ(message.message_field().value(), "embedded_value");
+
+ if (!IsLegacyFormat()) {
+ EXPECT_TRUE(AreNodesEqual(ConvertToNode(TYsonString(message.any_field_with_map())), mapNode));
+ EXPECT_TRUE(AreNodesEqual(
+ ConvertToNode(TYsonString(message.any_field_with_int64())),
+ BuildYsonNodeFluently().Value(22)));
+ EXPECT_TRUE(AreNodesEqual(
+ ConvertToNode(TYsonString(message.any_field_with_string())),
+ BuildYsonNodeFluently().Value("some_string")));
+
+ auto otherColumnsMap = ConvertToNode(TYsonString(message.other_columns_field()))->AsMap();
+ EXPECT_EQ(otherColumnsMap->GetChildValueOrThrow<i64>("OtherInt64Column"), -123);
+ EXPECT_DOUBLE_EQ(otherColumnsMap->GetChildValueOrThrow<double>("OtherDoubleColumn"), -123.456);
+ EXPECT_EQ(otherColumnsMap->GetChildValueOrThrow<TString>("OtherStringColumn"), "some_string");
+ EXPECT_EQ(otherColumnsMap->GetChildValueOrThrow<bool>("OtherBooleanColumn"), true);
+ EXPECT_TRUE(AreNodesEqual(otherColumnsMap->GetChildOrThrow("OtherAnyColumn"), mapNode));
+ EXPECT_EQ(otherColumnsMap->GetChildOrThrow("OtherNullColumn")->GetType(), ENodeType::Entity);
+
+ auto keys = otherColumnsMap->GetKeys();
+ std::sort(keys.begin(), keys.end());
+ std::vector<std::string> expectedKeys = {
+ "OtherInt64Column",
+ "OtherDoubleColumn",
+ "OtherStringColumn",
+ "OtherBooleanColumn",
+ "OtherAnyColumn",
+ "OtherNullColumn"
+ };
+ std::sort(expectedKeys.begin(), expectedKeys.end());
+ EXPECT_EQ(expectedKeys, keys);
+ }
+ }
+
+ ASSERT_FALSE(lenvalParser.Next());
+}
+
+TEST_P(TProtobufFormatAllFields, Parser)
+{
+ auto [rowCount, protoFormatType] = GetParam();
+
+ auto config = CreateAllFieldsConfig(protoFormatType);
+
+ TMessage message;
+ message.set_double_field(3.14159);
+ message.set_float_field(2.71828);
+
+ message.set_int64_field(-1);
+ message.set_uint64_field(2);
+ message.set_sint64_field(-3);
+ message.set_fixed64_field(4);
+ message.set_sfixed64_field(-5);
+
+ message.set_int32_field(-6);
+ message.set_uint32_field(7);
+ message.set_sint32_field(-8);
+ message.set_fixed32_field(9);
+ message.set_sfixed32_field(-10);
+
+ message.set_bool_field(true);
+ message.set_string_field("this_is_string");
+ message.set_bytes_field("this_is_bytes");
+ message.set_enum_field(EEnum::Three);
+
+ message.mutable_message_field()->set_key("embedded_key");
+ message.mutable_message_field()->set_value("embedded_value");
+
+ auto mapNode = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("Key").Value("Value")
+ .Item("Another")
+ .BeginList()
+ .Item().Value(1)
+ .Item().Value("two")
+ .EndList()
+ .EndMap();
+
+ auto otherColumnsNode = BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("OtherInt64Column").Value(-123)
+ .Item("OtherDoubleColumn").Value(-123.456)
+ .Item("OtherStringColumn").Value("some_string")
+ .Item("OtherBooleanColumn").Value(true)
+ .Item("OtherAnyColumn").Value(mapNode)
+ .Item("OtherNullColumn").Entity()
+ .EndMap();
+
+ if (!IsLegacyFormat()) {
+ message.set_any_field_with_map(ConvertToYsonString(mapNode).ToString());
+ message.set_any_field_with_int64(BuildYsonStringFluently().Value(22).ToString());
+ message.set_any_field_with_string(BuildYsonStringFluently().Value("some_string").ToString());
+ message.set_other_columns_field(ConvertToYsonString(otherColumnsNode).ToString());
+ }
+
+ auto rowCollector = ParseRows(
+ message,
+ ConvertTo<TProtobufFormatConfigPtr>(config->Attributes().ToMap()),
+ New<TTableSchema>(),
+ rowCount);
+
+ for (int rowIndex = 0; rowIndex < rowCount; ++rowIndex) {
+ int expectedSize = IsLegacyFormat() ? 17 : 26;
+ ASSERT_EQ(static_cast<int>(rowCollector.GetRow(rowIndex).GetCount()), expectedSize);
+
+ ASSERT_DOUBLE_EQ(GetDouble(rowCollector.GetRowValue(rowIndex, "Double")), 3.14159);
+ ASSERT_NEAR(GetDouble(rowCollector.GetRowValue(rowIndex, "Float")), 2.71828, 1e-5);
+
+ ASSERT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "Int64")), -1);
+ ASSERT_EQ(GetUint64(rowCollector.GetRowValue(rowIndex, "UInt64")), 2u);
+ ASSERT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "SInt64")), -3);
+ ASSERT_EQ(GetUint64(rowCollector.GetRowValue(rowIndex, "Fixed64")), 4u);
+ ASSERT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "SFixed64")), -5);
+
+ ASSERT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "Int32")), -6);
+ ASSERT_EQ(GetUint64(rowCollector.GetRowValue(rowIndex, "UInt32")), 7u);
+ ASSERT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "SInt32")), -8);
+ ASSERT_EQ(GetUint64(rowCollector.GetRowValue(rowIndex, "Fixed32")), 9u);
+ ASSERT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "SFixed32")), -10);
+
+ ASSERT_EQ(GetBoolean(rowCollector.GetRowValue(rowIndex, "Bool")), true);
+ ASSERT_EQ(GetString(rowCollector.GetRowValue(rowIndex, "String")), "this_is_string");
+ ASSERT_EQ(GetString(rowCollector.GetRowValue(rowIndex, "Bytes")), "this_is_bytes");
+
+ if (IsLegacyFormat()) {
+ ASSERT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "Enum")), 3);
+ } else {
+ ASSERT_EQ(GetString(rowCollector.GetRowValue(rowIndex, "Enum")), "Three");
+ }
+
+ TEmbeddedMessage embeddedMessage;
+ ASSERT_TRUE(embeddedMessage.ParseFromString(GetString(rowCollector.GetRowValue(rowIndex, "Message"))));
+ ASSERT_EQ(embeddedMessage.key(), "embedded_key");
+ ASSERT_EQ(embeddedMessage.value(), "embedded_value");
+
+ if (!IsLegacyFormat()) {
+ ASSERT_TRUE(AreNodesEqual(GetAny(rowCollector.GetRowValue(rowIndex, "AnyWithMap")), mapNode));
+ ASSERT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "AnyWithInt64")), 22);
+ ASSERT_EQ(GetString(rowCollector.GetRowValue(rowIndex, "AnyWithString")), "some_string");
+
+ ASSERT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "OtherInt64Column")), -123);
+ ASSERT_DOUBLE_EQ(GetDouble(rowCollector.GetRowValue(rowIndex, "OtherDoubleColumn")), -123.456);
+ ASSERT_EQ(GetString(rowCollector.GetRowValue(rowIndex, "OtherStringColumn")), "some_string");
+ ASSERT_EQ(GetBoolean(rowCollector.GetRowValue(rowIndex, "OtherBooleanColumn")), true);
+ ASSERT_TRUE(AreNodesEqual(GetAny(rowCollector.GetRowValue(rowIndex, "OtherAnyColumn")), mapNode));
+ ASSERT_EQ(rowCollector.GetRowValue(rowIndex, "OtherNullColumn").Type, EValueType::Null);
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TProtobufFormatCompat
+ : public ::testing::Test
+{
+public:
+ static TTableSchemaPtr GetEarlySchema()
+ {
+ static const auto schema = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"a", OptionalLogicalType(VariantStructLogicalType({
+ {"f1", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
+ }))},
+ });
+ return schema;
+ }
+
+ static TTableSchemaPtr GetFirstMiddleSchema()
+ {
+ static const auto schema = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"a", OptionalLogicalType(VariantStructLogicalType({
+ {"f1", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
+ {"f2", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ }))},
+ {"b", OptionalLogicalType(StructLogicalType({
+ {"x", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ }))},
+ });
+ return schema;
+ }
+
+ static TTableSchemaPtr GetSecondMiddleSchema()
+ {
+ static const auto schema = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"a", OptionalLogicalType(VariantStructLogicalType({
+ {"f1", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
+ {"f2", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ }))},
+ {"b", OptionalLogicalType(StructLogicalType({
+ {"x", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"y", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
+ }))},
+ });
+ return schema;
+ }
+
+ static TTableSchemaPtr GetThirdMiddleSchema()
+ {
+ static const auto schema = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"a", OptionalLogicalType(VariantStructLogicalType({
+ {"f1", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
+ {"f2", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ }))},
+ {"b", OptionalLogicalType(StructLogicalType({
+ {"x", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"y", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
+ {"z", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
+ }))},
+ });
+ return schema;
+ }
+
+ static TTableSchemaPtr GetLateSchema()
+ {
+ static const auto schema = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"a", OptionalLogicalType(VariantStructLogicalType({
+ {"f1", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
+ {"f2", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"f3", SimpleLogicalType(ESimpleLogicalValueType::Boolean)},
+ }))},
+ {"c", OptionalLogicalType(ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Boolean)))},
+ {"b", OptionalLogicalType(StructLogicalType({
+ {"x", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"y", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
+ {"z", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
+ }))},
+ });
+ return schema;
+ }
+
+ static TProtobufFormatConfigPtr GetFirstMiddleConfig()
+ {
+ static const auto config = ConvertTo<TProtobufFormatConfigPtr>(BuildYsonNodeFluently()
+ .BeginMap().Item("tables").BeginList().Item().BeginMap().Item("columns").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("a")
+ .Item("field_number").Value(0)
+ .Item("proto_type").Value("oneof")
+ .Item("fields").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("f1")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("b")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("structured_message")
+ .Item("fields")
+ .BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("x")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList().EndMap().EndList().EndMap());
+ return config;
+ }
+
+ static TProtobufFormatConfigPtr GetSecondMiddleConfig()
+ {
+ static const auto config = ConvertTo<TProtobufFormatConfigPtr>(BuildYsonNodeFluently()
+ .BeginMap().Item("tables").BeginList().Item().BeginMap().Item("columns").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("a")
+ .Item("field_number").Value(0)
+ .Item("proto_type").Value("oneof")
+ .Item("fields").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("f1")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("f2")
+ .Item("field_number").Value(101)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("b")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("structured_message")
+ .Item("fields")
+ .BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("x")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("y")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList().EndMap().EndList().EndMap());
+ return config;
+ }
+};
+
+template <typename TMessage>
+TMessage WriteRow(
+ TUnversionedRow row,
+ const TProtobufFormatConfigPtr& config,
+ const TTableSchemaPtr& schema,
+ const TNameTablePtr& nameTable)
+{
+ TString result;
+ TStringOutput resultStream(result);
+
+ auto writer = CreateWriterForProtobuf(
+ config,
+ {schema},
+ nameTable,
+ CreateAsyncAdapter(&resultStream),
+ true,
+ New<TControlAttributesConfig>(),
+ 0);
+ Y_UNUSED(writer->Write(std::vector<TUnversionedRow>{row}));
+ writer->Close().Get().ThrowOnError();
+
+ TStringInput input(result);
+ TLenvalParser lenvalParser(&input);
+ auto entry = lenvalParser.Next();
+ if (!entry) {
+ THROW_ERROR_EXCEPTION("Unexpected end of stream in lenval parser");
+ }
+ TMessage message;
+ if (!message.ParseFromString(entry->RowData)) {
+ THROW_ERROR_EXCEPTION("Failed to parse message");
+ }
+ if (lenvalParser.Next()) {
+ THROW_ERROR_EXCEPTION("Unexpected entry in lenval parser");
+ }
+ return message;
+}
+
+TEST_F(TProtobufFormatCompat, Write)
+{
+ auto nameTable = TNameTable::FromSchema(*GetLateSchema());
+ auto config = GetSecondMiddleConfig();
+
+ auto writeRow = [&] (TUnversionedRow row, const TTableSchemaPtr& schema) {
+ return WriteRow<NYT::TCompatMessage>(row, config, schema, nameTable);
+ };
+
+ {
+ auto earlyRow = MakeRow(nameTable, {
+ {"a", EValueType::Composite, "[0; -24]"}
+ });
+
+ SCOPED_TRACE("early");
+ auto message = writeRow(earlyRow, GetEarlySchema());
+ EXPECT_EQ(message.f1(), -24);
+ EXPECT_FALSE(message.has_f2());
+ EXPECT_EQ(message.has_b(), false);
+ }
+ {
+ auto firstMiddleRow = MakeRow(nameTable, {
+ {"a", EValueType::Composite, "[1; foobar]"},
+ {"b", EValueType::Composite, "[foo]"},
+ });
+
+ SCOPED_TRACE("firstMiddle");
+ auto message = writeRow(firstMiddleRow, GetFirstMiddleSchema());
+ EXPECT_FALSE(message.has_f1());
+ EXPECT_EQ(message.f2(), "foobar");
+ EXPECT_EQ(message.b().x(), "foo");
+ EXPECT_EQ(message.b().has_y(), false);
+ }
+ {
+ auto secondMiddleRow = MakeRow(nameTable, {
+ {"a", EValueType::Composite, "[1; foobar]"},
+ {"b", EValueType::Composite, "[foo; bar]"},
+ });
+
+ SCOPED_TRACE("secondMiddle");
+ auto message = writeRow(secondMiddleRow, GetSecondMiddleSchema());
+ EXPECT_FALSE(message.has_f1());
+ EXPECT_EQ(message.f2(), "foobar");
+ EXPECT_EQ(message.b().x(), "foo");
+ EXPECT_EQ(message.b().y(), "bar");
+ }
+ {
+ auto thirdMiddleRow = MakeRow(nameTable, {
+ {"a", EValueType::Composite, "[1; foobar]"},
+ {"b", EValueType::Composite, "[foo; bar; spam]"},
+ });
+
+ SCOPED_TRACE("thirdMiddle");
+ auto message = writeRow(thirdMiddleRow, GetThirdMiddleSchema());
+ EXPECT_FALSE(message.has_f1());
+ EXPECT_EQ(message.f2(), "foobar");
+ EXPECT_EQ(message.b().x(), "foo");
+ EXPECT_EQ(message.b().y(), "bar");
+ }
+ {
+ auto lateRow = MakeRow(nameTable, {
+ {"a", EValueType::Composite, "[2; %true]"},
+ {"c", EValueType::Composite, "[%false; %true; %false]"},
+ {"b", EValueType::Composite, "[foo; bar; spam]"},
+ });
+
+ SCOPED_TRACE("late");
+ auto message = writeRow(lateRow, GetLateSchema());
+ EXPECT_FALSE(message.has_f1());
+ EXPECT_FALSE(message.has_f2());
+ EXPECT_EQ(message.b().x(), "foo");
+ EXPECT_EQ(message.b().y(), "bar");
+ }
+}
+
+TEST_F(TProtobufFormatCompat, Parse)
+{
+ auto config = GetSecondMiddleConfig();
+
+ NYT::TCompatMessage message;
+ message.set_f2("Sandiego");
+ message.mutable_b()->set_x("foo");
+ message.mutable_b()->set_y("bar");
+
+ {
+ SCOPED_TRACE("early");
+ auto collector = ParseRows(message, config, GetEarlySchema());
+ EXPECT_FALSE(collector.FindRowValue(0, "a"));
+ EXPECT_FALSE(collector.GetNameTable()->FindId("b"));
+ EXPECT_FALSE(collector.GetNameTable()->FindId("c"));
+ }
+ {
+ SCOPED_TRACE("firstMiddle");
+ auto collector = ParseRows(message, config, GetFirstMiddleSchema());
+ EXPECT_NODES_EQUAL(
+ GetComposite(collector.GetRowValue(0, "a")),
+ ConvertToNode(TYsonString(TStringBuf("[1;Sandiego]"))));
+ EXPECT_NODES_EQUAL(GetComposite(collector.GetRowValue(0, "b")), ConvertToNode(TYsonString(TStringBuf("[foo]"))));
+ EXPECT_FALSE(collector.GetNameTable()->FindId("c"));
+ }
+ {
+ SCOPED_TRACE("secondMiddle");
+ auto collector = ParseRows(message, config, GetSecondMiddleSchema());
+ EXPECT_NODES_EQUAL(
+ GetComposite(collector.GetRowValue(0, "a")),
+ ConvertToNode(TYsonString(TStringBuf("[1;Sandiego]"))));
+ EXPECT_NODES_EQUAL(GetComposite(collector.GetRowValue(0, "b")), ConvertToNode(TYsonString(TStringBuf("[foo;bar]"))));
+ EXPECT_FALSE(collector.GetNameTable()->FindId("c"));
+ }
+ {
+ SCOPED_TRACE("thirdMiddle");
+ auto collector = ParseRows(message, config, GetThirdMiddleSchema());
+ EXPECT_NODES_EQUAL(
+ GetComposite(collector.GetRowValue(0, "a")),
+ ConvertToNode(TYsonString(TStringBuf("[1;Sandiego]"))));
+ EXPECT_NODES_EQUAL(GetComposite(collector.GetRowValue(0, "b")), ConvertToNode(TYsonString(TStringBuf("[foo;bar;#]"))));
+ EXPECT_FALSE(collector.GetNameTable()->FindId("c"));
+ }
+ {
+ SCOPED_TRACE("late");
+ auto collector = ParseRows(message, config, GetLateSchema());
+ EXPECT_NODES_EQUAL(
+ GetComposite(collector.GetRowValue(0, "a")),
+ ConvertToNode(TYsonString(TStringBuf("[1;Sandiego]"))));
+ EXPECT_NODES_EQUAL(GetComposite(collector.GetRowValue(0, "b")), ConvertToNode(TYsonString(TStringBuf("[foo;bar;#]"))));
+ EXPECT_TRUE(collector.GetNameTable()->FindId("c"));
+ }
+}
+
+TEST_F(TProtobufFormatCompat, ParseWrong)
+{
+ NYT::TCompatMessage message;
+ message.set_f1(42);
+ message.mutable_b()->set_x("foo");
+ message.mutable_b()->set_y("bar");
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ ParseRows(message, GetFirstMiddleConfig(), GetFirstMiddleSchema()),
+ "Unexpected field number 2");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TProtobufFormatEnumCompat
+ : public ::testing::Test
+{
+public:
+ static TTableSchemaPtr CreateTableSchema()
+ {
+ static const auto schema = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"optional_enum", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
+ {"required_enum", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"repeated_enum", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
+ {"packed_repeated_enum", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
+ {"inner", OptionalLogicalType(StructLogicalType({
+ {"optional_enum", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
+ {"required_enum", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"repeated_enum", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
+ {"packed_repeated_enum", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
+ }))},
+ });
+ return schema;
+ }
+ static TProtobufFormatConfigPtr CreateProtobufFormatConfig()
+ {
+ static const auto config = ConvertTo<TProtobufFormatConfigPtr>(BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("enumerations").BeginMap()
+ .Item("ECompatEnum")
+ .BeginMap()
+ .Item("One").Value(1)
+ .Item("Two").Value(2)
+ .Item("Three").Value(3)
+ .EndMap()
+ .EndMap()
+ .Item("tables").BeginList().Item().BeginMap().Item("columns").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("optional_enum")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("enum_string")
+ .Item("enum_writing_mode").Value("skip_unknown_values")
+ .Item("enumeration_name").Value("ECompatEnum")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("required_enum")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("enum_string")
+ .Item("enum_writing_mode").Value("skip_unknown_values")
+ .Item("enumeration_name").Value("ECompatEnum")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("repeated_enum")
+ .Item("field_number").Value(3)
+ .Item("proto_type").Value("enum_string")
+ .Item("repeated").Value(true)
+ .Item("enum_writing_mode").Value("skip_unknown_values")
+ .Item("enumeration_name").Value("ECompatEnum")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("packed_repeated_enum")
+ .Item("field_number").Value(4)
+ .Item("proto_type").Value("enum_string")
+ .Item("repeated").Value(true)
+ .Item("packed").Value(true)
+ .Item("enum_writing_mode").Value("skip_unknown_values")
+ .Item("enumeration_name").Value("ECompatEnum")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("inner")
+ .Item("field_number").Value(100)
+ .Item("proto_type").Value("structured_message")
+ .Item("fields").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("optional_enum")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("enum_string")
+ .Item("enum_writing_mode").Value("skip_unknown_values")
+ .Item("enumeration_name").Value("ECompatEnum")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("required_enum")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("enum_string")
+ .Item("enum_writing_mode").Value("skip_unknown_values")
+ .Item("enumeration_name").Value("ECompatEnum")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("repeated_enum")
+ .Item("field_number").Value(3)
+ .Item("proto_type").Value("enum_string")
+ .Item("repeated").Value(true)
+ .Item("enum_writing_mode").Value("skip_unknown_values")
+ .Item("enumeration_name").Value("ECompatEnum")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("packed_repeated_enum")
+ .Item("field_number").Value(4)
+ .Item("proto_type").Value("enum_string")
+ .Item("repeated").Value(true)
+ .Item("packed").Value(true)
+ .Item("enum_writing_mode").Value("skip_unknown_values")
+ .Item("enumeration_name").Value("ECompatEnum")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList().EndMap().EndList().EndMap());
+ return config;
+ }
+
+};
+
+TEST_F(TProtobufFormatEnumCompat, WriteCanSkipUnknownEnumValues)
+{
+ auto schema = CreateTableSchema();
+ auto config = CreateProtobufFormatConfig();
+
+ auto nameTable = TNameTable::FromSchema(*schema);
+
+ auto row = MakeRow(nameTable, {
+ {"optional_enum", "MinusFortyTwo"},
+ {"required_enum", "One"},
+ {"repeated_enum", EValueType::Composite, "[MinusFortyTwo;One;MinusFortyTwo]"},
+ {"packed_repeated_enum", EValueType::Composite, "[MinusFortyTwo;Two;MinusFortyTwo]"},
+ {"inner", EValueType::Composite, "[MinusFortyTwo;Two;[MinusFortyTwo;Two];[One;MinusFortyTwo]]"},
+ });
+
+ auto collectRepeated = [] (const auto& repeated) {
+ std::vector<TEnumCompat::ECompatEnum> values;
+ for (auto value : repeated) {
+ values.push_back(static_cast<TEnumCompat::ECompatEnum>(value));
+ }
+ return values;
+ };
+
+ auto message = WriteRow<TEnumCompat>(row, config, schema, nameTable);
+
+ EXPECT_FALSE(message.has_optional_enum());
+ EXPECT_EQ(message.required_enum(), TEnumCompat::One);
+ EXPECT_EQ(collectRepeated(message.repeated_enum()), std::vector{TEnumCompat::One});
+ EXPECT_EQ(collectRepeated(message.packed_repeated_enum()), std::vector{TEnumCompat::Two});
+
+ ASSERT_TRUE(message.has_inner());
+ EXPECT_FALSE(message.inner().has_optional_enum());
+ EXPECT_EQ(message.inner().required_enum(), TEnumCompat::Two);
+ EXPECT_EQ(collectRepeated(message.inner().repeated_enum()), std::vector{TEnumCompat::Two});
+ EXPECT_EQ(collectRepeated(message.inner().packed_repeated_enum()), std::vector{TEnumCompat::One});
+}
+
+TEST_F(TProtobufFormatEnumCompat, WriteDoesntSkipRequiredFields)
+{
+ auto schema = CreateTableSchema();
+ auto config = CreateProtobufFormatConfig();
+
+ auto nameTable = TNameTable::FromSchema(*schema);
+
+ {
+ auto row = MakeRow(nameTable, {{"required_enum", "MinusFortyTwo"}});
+ EXPECT_THROW_WITH_SUBSTRING(WriteRow<TEnumCompat>(row, config, schema, nameTable), "Invalid value for enum");
+ }
+ {
+ auto row = MakeRow(nameTable, {{"inner", EValueType::Composite, "[#;MinusFortyTwo;#;#]"},});
+ EXPECT_THROW_WITH_SUBSTRING(WriteRow<TEnumCompat>(row, config, schema, nameTable), "Invalid value for enum");
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TProtobufFormatRuntimeErrors
+ : public ::testing::Test
+{
+public:
+ static TTableSchemaPtr GetSchemaWithVariant(bool optional = false)
+ {
+ auto variantType = VariantStructLogicalType({
+ {"f1", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
+ {"f2", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ });
+ return New<TTableSchema>(std::vector<TColumnSchema>{
+ {"a", optional ? OptionalLogicalType(variantType) : variantType},
+ });
+ }
+
+ static TTableSchemaPtr GetSchemaWithStruct(bool optional = false)
+ {
+ auto structType = StructLogicalType({
+ {"f1", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
+ {"f2", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ });
+ return New<TTableSchema>(std::vector<TColumnSchema>{
+ {"a", optional ? OptionalLogicalType(structType) : structType},
+ });
+ }
+
+ static TProtobufFormatConfigPtr GetConfigWithVariant()
+ {
+ static const auto config = ConvertTo<TProtobufFormatConfigPtr>(BuildYsonNodeFluently()
+ .BeginMap().Item("tables").BeginList().Item().BeginMap().Item("columns").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("a")
+ .Item("proto_type").Value("oneof")
+ .Item("fields").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("f1")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("f2")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList().EndMap().EndList().EndMap());
+ return config;
+ }
+
+ static TProtobufFormatConfigPtr GetConfigWithStruct()
+ {
+ static const auto config = ConvertTo<TProtobufFormatConfigPtr>(BuildYsonNodeFluently()
+ .BeginMap().Item("tables").BeginList().Item().BeginMap().Item("columns").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("a")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("structured_message")
+ .Item("fields").BeginList()
+ .Item().BeginMap()
+ .Item("name").Value("f1")
+ .Item("field_number").Value(1)
+ .Item("proto_type").Value("int64")
+ .EndMap()
+ .Item().BeginMap()
+ .Item("name").Value("f2")
+ .Item("field_number").Value(2)
+ .Item("proto_type").Value("string")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList().EndMap().EndList().EndMap());
+ return config;
+ }
+};
+
+TEST_F(TProtobufFormatRuntimeErrors, ParseVariant)
+{
+ {
+ SCOPED_TRACE("Optional variant, all missing");
+ TMessageWithOneof message;
+ auto collector = ParseRows(message, GetConfigWithVariant(), GetSchemaWithVariant(/* optional */ true));
+ EXPECT_FALSE(collector.FindRowValue(0, "a"));
+ }
+ {
+ SCOPED_TRACE("All missing");
+ TMessageWithOneof message;
+ EXPECT_THROW_WITH_SUBSTRING(
+ ParseRows(message, GetConfigWithVariant(), GetSchemaWithVariant()),
+ "required field \"<root>.a\" is missing");
+ }
+ {
+ SCOPED_TRACE("two alternatives");
+ TMessageWithStruct::TStruct message;
+ message.set_f1(5);
+ message.set_f2("boo");
+ EXPECT_THROW_WITH_SUBSTRING(
+ ParseRows(message, GetConfigWithVariant(), GetSchemaWithVariant()),
+ "multiple entries for oneof field \"<root>.a\"");
+ }
+}
+
+TEST_F(TProtobufFormatRuntimeErrors, ParseStruct)
+{
+ {
+ SCOPED_TRACE("Optional submessage missing");
+ TMessageWithStruct message;
+ auto collector = ParseRows(message, GetConfigWithStruct(), GetSchemaWithStruct(/* optional */ true));
+ EXPECT_FALSE(collector.FindRowValue(0, "a"));
+ }
+ {
+ SCOPED_TRACE("Required submessage missing");
+ TMessageWithStruct message;
+ EXPECT_THROW_WITH_SUBSTRING(
+ ParseRows(message, GetConfigWithStruct(), GetSchemaWithStruct()),
+ "required field \"<root>.a\" is missing");
+ }
+ {
+ SCOPED_TRACE("All fields missing");
+ TMessageWithStruct message;
+ message.mutable_a();
+ EXPECT_THROW_WITH_SUBSTRING(
+ ParseRows(message, GetConfigWithStruct(), GetSchemaWithStruct()),
+ "required field \"<root>.a.f1\" is missing");
+ }
+ {
+ SCOPED_TRACE("Second field missing");
+ TMessageWithStruct message;
+ message.mutable_a()->set_f1(17);
+ EXPECT_THROW_WITH_SUBSTRING(
+ ParseRows(message, GetConfigWithStruct(), GetSchemaWithStruct()),
+ "required field \"<root>.a.f2\" is missing");
+ }
+ {
+ SCOPED_TRACE("All present");
+ TMessageWithStruct message;
+ message.mutable_a()->set_f1(17);
+ message.mutable_a()->set_f2("foobar");
+ auto collector = ParseRows(message, GetConfigWithStruct(), GetSchemaWithStruct());
+ EXPECT_NODES_EQUAL(
+ GetComposite(collector.GetRowValue(0, "a")),
+ ConvertToNode(TYsonString(TStringBuf("[17;foobar]"))));
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT
diff --git a/yt/yt/library/formats/unittests/protobuf_format_ut.proto b/yt/yt/library/formats/unittests/protobuf_format_ut.proto
new file mode 100644
index 0000000000..06258de619
--- /dev/null
+++ b/yt/yt/library/formats/unittests/protobuf_format_ut.proto
@@ -0,0 +1,255 @@
+import "yt/yt_proto/yt/formats/extension.proto";
+
+package NYT.NProtobufFormatTest;
+
+enum EEnum
+{
+ One = 1;
+ Two = 2;
+ Three = 3;
+
+ MinusFortyTwo = -42;
+
+ MinInt32 = -2147483648;
+ MaxInt32 = 2147483647;
+}
+
+message TEmbeddedStruct {
+ optional float float1 = 1;
+ optional string string1 = 2;
+};
+
+message TEmbedded2Message {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ optional uint64 embedded2_num = 10;
+ optional TEmbeddedStruct embedded2_struct = 17;
+ repeated string embedded2_repeated = 42;
+};
+
+message TEmbedded1Message {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ optional TEmbedded2Message t2 = 1 [(NYT.flags) = EMBEDDED];
+ oneof variant {
+ string str_variant = 101;
+ uint64 uint_variant = 102;
+ }
+ optional uint64 embedded_num = 10; // make intentional field_num collision!
+ optional string embedded_extra_field = 11;
+};
+message TEmbeddingMessage {
+ optional bytes other_columns_field = 15 [(NYT.flags) = OTHER_COLUMNS];
+ optional TEmbedded1Message t1 = 2 [(NYT.flags) = EMBEDDED];
+ optional uint64 num = 12;
+ optional string extra_field = 13;
+};
+
+message TEmbeddedMessage
+{
+ optional string key = 1;
+ optional string value = 2;
+}
+
+message TMessageWithStructuredEmbedded
+{
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ message TFirstMessage
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ optional EEnum enum_field = 1 [(NYT.flags) = ENUM_STRING];
+ optional int64 int64_field = 2;
+ repeated int64 repeated_int64_field = 3;
+ optional TEmbeddedMessage message_field = 4;
+ repeated TEmbeddedMessage repeated_message_field = 5;
+ optional bytes any_int64_field = 6 [(NYT.flags) = ANY];
+ optional bytes any_map_field = 7 [(NYT.flags) = ANY];
+ optional int64 optional_int64_field = 8;
+ repeated int64 another_repeated_int64_field = 9;
+ repeated bytes repeated_optional_any_field = 10 [(NYT.flags) = ANY];
+ repeated EEnum packed_repeated_enum_field = 11 [packed=true, (NYT.flags) = ENUM_STRING];
+ repeated bool optional_repeated_bool_field = 12;
+ oneof oneof_field {
+ string oneof_string_field_1 = 101;
+ string oneof_string_field = 102;
+ TEmbeddedMessage oneof_message_field = 1000;
+ }
+ oneof optional_oneof_field {
+ string optional_oneof_string_field_1 = 201;
+ string optional_oneof_string_field = 202;
+ TEmbeddedMessage optional_oneof_message_field = 2000;
+ }
+ map<int64, TEmbeddedMessage> map_field = 13 [(NYT.flags) = MAP_AS_DICT];
+ }
+
+ message TSecondMessage
+ {
+ optional int64 one = 2;
+ optional int64 two = 500000000;
+ optional int64 three = 100500;
+ }
+
+ optional TFirstMessage first = 1;
+ optional TSecondMessage second = 2;
+ repeated TEmbeddedMessage repeated_message_field = 3;
+ repeated int64 repeated_int64_field = 4;
+ optional int64 int64_any_field = 5 [(NYT.column_name) = "any_field"];
+
+ optional int32 int32_field = 6 [(NYT.column_name) = "int64_field"];
+ optional uint32 uint32_field = 7 [(NYT.column_name) = "uint64_field"];
+ optional int64 int64_field = 8 [(NYT.column_name) = "int32_field"];
+ optional uint64 uint64_field = 9 [(NYT.column_name) = "uint32_field"];
+
+ optional EEnum enum_int_field = 10 [(NYT.flags) = ENUM_INT];
+ optional EEnum enum_string_string_field = 11 [(NYT.flags) = ENUM_STRING];
+ optional EEnum enum_string_int64_field = 12 [(NYT.flags) = ENUM_STRING];
+
+
+ repeated int64 another_repeated_int64_field = 13;
+
+ repeated bytes repeated_optional_any_field = 14 [(NYT.flags) = ANY];
+
+ optional bytes other_columns_field = 15 [(NYT.flags) = OTHER_COLUMNS];
+
+ optional string utf8_field = 16;
+
+ repeated int64 packed_repeated_int64_field = 17 [packed=true];
+
+ repeated int64 optional_repeated_int64_field = 18;
+
+ oneof oneof_field {
+ string oneof_string_field_1 = 101;
+ string oneof_string_field = 102;
+ TEmbeddedMessage oneof_message_field = 1000;
+ }
+
+ oneof optional_oneof_field {
+ string optional_oneof_string_field_1 = 201;
+ string optional_oneof_string_field = 202;
+ TEmbeddedMessage optional_oneof_message_field = 2000;
+ }
+
+ map<int64, TEmbeddedMessage> map_field = 19 [(NYT.flags) = MAP_AS_DICT];
+}
+
+message TSeveralTablesMessageFirst
+{
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ message TEmbedded
+ {
+ optional EEnum enum_field = 1 [(NYT.flags) = ENUM_STRING];
+ optional int64 int64_field = 2;
+ }
+ optional TEmbedded embedded = 1;
+ repeated int64 repeated_int64_field = 2;
+ optional int64 int64_field = 3 [(NYT.column_name) = "any_field"];
+}
+
+message TSeveralTablesMessageSecond
+{
+ optional EEnum enum_field = 1 [(NYT.flags) = ENUM_STRING];
+ optional int64 int64_field = 2;
+}
+
+message TSeveralTablesMessageThird
+{
+ optional string string_field = 1;
+}
+
+message TMessage
+{
+ optional double double_field = 1 [(NYT.column_name) = "Double"];
+ optional float float_field = 2 [(NYT.column_name) = "Float"];
+
+ optional int64 int64_field = 3 [(NYT.column_name) = "Int64"];
+ optional uint64 uint64_field = 4 [(NYT.column_name) = "UInt64"];
+ optional sint64 sint64_field = 5 [(NYT.column_name) = "SInt64"];
+ optional fixed64 fixed64_field = 6 [(NYT.column_name) = "Fixed64"];
+ optional sfixed64 sfixed64_field = 7 [(NYT.column_name) = "SFixed64"];
+
+ optional int32 int32_field = 8 [(NYT.column_name) = "Int32"];
+ optional uint32 uint32_field = 9 [(NYT.column_name) = "UInt32"];
+ optional sint32 sint32_field = 10 [(NYT.column_name) = "SInt32"];
+ optional fixed32 fixed32_field = 11 [(NYT.column_name) = "Fixed32"];
+ optional sfixed32 sfixed32_field = 12 [(NYT.column_name) = "SFixed32"];
+
+ optional bool bool_field = 13 [(NYT.column_name) = "Bool"];
+ optional string string_field = 14 [(NYT.column_name) = "String"];
+ optional bytes bytes_field = 15 [(NYT.column_name) = "Bytes"];
+
+ optional EEnum enum_field = 16 [(NYT.column_name) = "Enum", (NYT.flags) = ENUM_STRING];
+ optional TEmbeddedMessage message_field = 17 [(NYT.column_name) = "Message"];
+
+ optional bytes any_field_with_map = 18 [(NYT.column_name) = "AnyWithMap", (NYT.flags) = ANY];
+ optional bytes any_field_with_int64 = 19 [(NYT.column_name) = "AnyWithInt64", (NYT.flags) = ANY];
+ optional bytes any_field_with_string = 20 [(NYT.column_name) = "AnyWithString", (NYT.flags) = ANY];
+ optional bytes other_columns_field = 21 [(NYT.flags) = OTHER_COLUMNS];
+
+ optional int64 missing_int64_field = 22 [(NYT.column_name) = "MissingInt64"];
+}
+
+message TCompatMessage
+{
+ message TEmbedded
+ {
+ optional string x = 1;
+ optional string y = 2;
+ }
+
+ oneof a {
+ int64 f1 = 1;
+ string f2 = 101;
+ }
+ optional TEmbedded b = 2;
+}
+
+message TMessageWithOneof
+{
+ oneof variant {
+ int64 f1 = 1;
+ string f2 = 2;
+ }
+}
+
+message TMessageWithStruct
+{
+ message TStruct
+ {
+ optional int64 f1 = 1;
+ optional string f2 = 2;
+ }
+ optional TStruct a = 1;
+}
+
+message TOtherColumnsMessage
+{
+ optional bytes other_columns_field = 1 [(NYT.flags) = OTHER_COLUMNS];
+}
+
+message TEnumCompat {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ option (NYT.default_field_flags) = ENUM_SKIP_UNKNOWN_VALUES;
+
+ enum ECompatEnum {
+ One = 1;
+ Two = 2;
+ Three = 3;
+ }
+
+
+ message TStruct
+ {
+ optional ECompatEnum optional_enum = 1;
+ required ECompatEnum required_enum = 2;
+ repeated ECompatEnum repeated_enum = 3;
+ repeated ECompatEnum packed_repeated_enum = 4 [packed=true, (NYT.flags) = ENUM_STRING];
+ }
+
+ optional ECompatEnum optional_enum = 1;
+ required ECompatEnum required_enum = 2;
+ repeated ECompatEnum repeated_enum = 3;
+ repeated ECompatEnum packed_repeated_enum = 4 [packed=true, (NYT.flags) = ENUM_STRING];
+
+ optional TStruct inner = 100;
+}
diff --git a/yt/yt/library/formats/unittests/row_helpers.cpp b/yt/yt/library/formats/unittests/row_helpers.cpp
new file mode 100644
index 0000000000..61a89d1669
--- /dev/null
+++ b/yt/yt/library/formats/unittests/row_helpers.cpp
@@ -0,0 +1,70 @@
+#include "row_helpers.h"
+
+#include <yt/yt/core/yson/string.h>
+#include <yt/yt/core/ytree/convert.h>
+
+namespace NYT {
+
+using namespace NTableClient;
+
+////////////////////////////////////////////////////////////////////////////////
+
+static void EnsureTypesMatch(EValueType expected, EValueType actual)
+{
+ if (expected != actual) {
+ THROW_ERROR_EXCEPTION("Unexpected type of TUnversionedValue: expected %Qlv, actual %Qlv",
+ expected,
+ actual);
+ }
+}
+
+i64 GetInt64(const TUnversionedValue& value)
+{
+ EnsureTypesMatch(EValueType::Int64, value.Type);
+ return value.Data.Int64;
+}
+
+ui64 GetUint64(const TUnversionedValue& value)
+{
+ EnsureTypesMatch(EValueType::Uint64, value.Type);
+ return value.Data.Uint64;
+}
+
+double GetDouble(const NTableClient::TUnversionedValue& value)
+{
+ EnsureTypesMatch(EValueType::Double, value.Type);
+ return value.Data.Double;
+}
+
+bool GetBoolean(const TUnversionedValue& value)
+{
+ EnsureTypesMatch(EValueType::Boolean, value.Type);
+ return value.Data.Boolean;
+}
+
+TString GetString(const TUnversionedValue& value)
+{
+ EnsureTypesMatch(EValueType::String, value.Type);
+ return value.AsString();
+}
+
+NYTree::INodePtr GetAny(const NTableClient::TUnversionedValue& value)
+{
+ EnsureTypesMatch(EValueType::Any, value.Type);
+ return NYTree::ConvertToNode(NYson::TYsonString(value.AsString()));
+}
+
+NYTree::INodePtr GetComposite(const NTableClient::TUnversionedValue& value)
+{
+ EnsureTypesMatch(EValueType::Composite, value.Type);
+ return NYTree::ConvertToNode(NYson::TYsonString(value.AsString()));
+}
+
+bool IsNull(const NTableClient::TUnversionedValue& value)
+{
+ return value.Type == EValueType::Null;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/yt/library/formats/unittests/row_helpers.h b/yt/yt/library/formats/unittests/row_helpers.h
new file mode 100644
index 0000000000..fdc3f8b560
--- /dev/null
+++ b/yt/yt/library/formats/unittests/row_helpers.h
@@ -0,0 +1,111 @@
+#pragma once
+
+#include <yt/yt/client/table_client/unversioned_row.h>
+#include <yt/yt/client/table_client/name_table.h>
+#include <yt/yt/client/table_client/schema.h>
+#include <yt/yt/client/table_client/value_consumer.h>
+
+#include <vector>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TCollectingValueConsumer
+ : public NTableClient::IValueConsumer
+{
+public:
+ explicit TCollectingValueConsumer(NTableClient::TTableSchemaPtr schema = New<NTableClient::TTableSchema>())
+ : Schema_(std::move(schema))
+ { }
+
+ explicit TCollectingValueConsumer(NTableClient::TNameTablePtr nameTable, NTableClient::TTableSchemaPtr schema = New<NTableClient::TTableSchema>())
+ : Schema_(std::move(schema))
+ , NameTable_(std::move(nameTable))
+ { }
+
+ const NTableClient::TNameTablePtr& GetNameTable() const override
+ {
+ return NameTable_;
+ }
+
+ const NTableClient::TTableSchemaPtr& GetSchema() const override
+ {
+ return Schema_;
+ }
+
+ bool GetAllowUnknownColumns() const override
+ {
+ return true;
+ }
+
+ void OnBeginRow() override
+ { }
+
+ void OnValue(const NTableClient::TUnversionedValue& value) override
+ {
+ Builder_.AddValue(value);
+ }
+
+ void OnEndRow() override
+ {
+ RowList_.emplace_back(Builder_.FinishRow());
+ }
+
+ NTableClient::TUnversionedRow GetRow(size_t rowIndex)
+ {
+ return RowList_.at(rowIndex);
+ }
+
+ std::optional<NTableClient::TUnversionedValue> FindRowValue(size_t rowIndex, TStringBuf columnName) const
+ {
+ NTableClient::TUnversionedRow row = RowList_.at(rowIndex);
+ auto id = GetNameTable()->GetIdOrThrow(columnName);
+
+ for (const auto& value : row) {
+ if (value.Id == id) {
+ return value;
+ }
+ }
+ return std::nullopt;
+ }
+
+ NTableClient::TUnversionedValue GetRowValue(size_t rowIndex, TStringBuf columnName) const
+ {
+ auto row = FindRowValue(rowIndex, columnName);
+ if (!row) {
+ THROW_ERROR_EXCEPTION("Cannot find column %Qv", columnName);
+ }
+ return *row;
+ }
+
+ size_t Size() const
+ {
+ return RowList_.size();
+ }
+
+ const std::vector<NTableClient::TUnversionedOwningRow>& GetRowList() const {
+ return RowList_;
+ }
+
+private:
+ const NTableClient::TTableSchemaPtr Schema_;
+ const NTableClient::TNameTablePtr NameTable_ = New<NTableClient::TNameTable>();
+ NTableClient::TUnversionedOwningRowBuilder Builder_;
+ std::vector<NTableClient::TUnversionedOwningRow> RowList_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+i64 GetInt64(const NTableClient::TUnversionedValue& value);
+ui64 GetUint64(const NTableClient::TUnversionedValue& value);
+double GetDouble(const NTableClient::TUnversionedValue& value);
+bool GetBoolean(const NTableClient::TUnversionedValue& value);
+TString GetString(const NTableClient::TUnversionedValue& value);
+NYTree::INodePtr GetAny(const NTableClient::TUnversionedValue& value);
+NYTree::INodePtr GetComposite(const NTableClient::TUnversionedValue& value);
+bool IsNull(const NTableClient::TUnversionedValue& value);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/yt/library/formats/unittests/schemaful_dsv_parser_ut.cpp b/yt/yt/library/formats/unittests/schemaful_dsv_parser_ut.cpp
new file mode 100644
index 0000000000..875ad5b9f1
--- /dev/null
+++ b/yt/yt/library/formats/unittests/schemaful_dsv_parser_ut.cpp
@@ -0,0 +1,248 @@
+#include <yt/yt/core/test_framework/framework.h>
+
+#include <yt/yt/core/test_framework/yson_consumer_mock.h>
+
+#include <yt/yt/library/formats/schemaful_dsv_parser.h>
+
+#include <yt/yt/core/yson/null_consumer.h>
+
+namespace NYT::NFormats {
+namespace {
+
+using namespace NYson;
+
+using ::testing::InSequence;
+using ::testing::StrictMock;
+using ::testing::NiceMock;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TSchemafulDsvParserTest, Simple)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("a"));
+ EXPECT_CALL(Mock, OnStringScalar("5"));
+ EXPECT_CALL(Mock, OnKeyedItem("b"));
+ EXPECT_CALL(Mock, OnStringScalar("6"));
+ EXPECT_CALL(Mock, OnEndMap());
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("a"));
+ EXPECT_CALL(Mock, OnStringScalar("100"));
+ EXPECT_CALL(Mock, OnKeyedItem("b"));
+ EXPECT_CALL(Mock, OnStringScalar("max\tignat"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input =
+ "5\t6\n"
+ "100\tmax\\tignat\n";
+
+ auto config = New<TSchemafulDsvFormatConfig>();
+ config->Columns = {"a", "b"};
+
+ ParseSchemafulDsv(input, &Mock, config);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TSchemafulDsvParserTest, TableIndex)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginAttributes());
+ EXPECT_CALL(Mock, OnKeyedItem("table_index"));
+ EXPECT_CALL(Mock, OnInt64Scalar(1));
+ EXPECT_CALL(Mock, OnEndAttributes());
+ EXPECT_CALL(Mock, OnEntity());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("a"));
+ EXPECT_CALL(Mock, OnStringScalar("x"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginAttributes());
+ EXPECT_CALL(Mock, OnKeyedItem("table_index"));
+ EXPECT_CALL(Mock, OnInt64Scalar(0));
+ EXPECT_CALL(Mock, OnEndAttributes());
+ EXPECT_CALL(Mock, OnEntity());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("a"));
+ EXPECT_CALL(Mock, OnStringScalar("y"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("a"));
+ EXPECT_CALL(Mock, OnStringScalar("z"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input =
+ "1\tx\n"
+ "0\ty\n"
+ "0\tz\n";
+
+ auto config = New<TSchemafulDsvFormatConfig>();
+ config->Columns = {"a"};
+ config->EnableTableIndex = true;
+
+ ParseSchemafulDsv(input, &Mock, config);
+}
+
+TEST(TSchemafulDsvParserTest, TooManyRows)
+{
+ TString input = "5\t6\n";
+
+ auto config = New<TSchemafulDsvFormatConfig>();
+ config->Columns = {"a"};
+
+ EXPECT_THROW({ ParseSchemafulDsv(input, GetNullYsonConsumer(), config); }, std::exception);
+}
+
+TEST(TSchemafulDsvParserTest, SpecialSymbols)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ auto value = TString("6\0", 2);
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("a"));
+ EXPECT_CALL(Mock, OnStringScalar("5\r"));
+ EXPECT_CALL(Mock, OnKeyedItem("b"));
+ EXPECT_CALL(Mock, OnStringScalar(value));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input("5\r\t6\0\n", 6);
+
+ auto config = New<TSchemafulDsvFormatConfig>();
+ config->Columns = {"a", "b"};
+
+ ParseSchemafulDsv(input, &Mock, config);
+}
+
+TEST(TSchemafulDsvParserTest, EnabledEscaping)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ auto value = TString("6\0", 2);
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("a"));
+ EXPECT_CALL(Mock, OnStringScalar("5\r\r"));
+ EXPECT_CALL(Mock, OnKeyedItem("b"));
+ EXPECT_CALL(Mock, OnStringScalar(value));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input("5\r\\r\t6\0\n", 8);
+
+ auto config = New<TSchemafulDsvFormatConfig>();
+ config->Columns = {"a", "b"};
+ config->EnableEscaping = true;
+
+ ParseSchemafulDsv(input, &Mock, config);
+}
+
+TEST(TSchemafulDsvParserTest, DisabledEscaping)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ auto value = TString("6\0", 2);
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("a"));
+ EXPECT_CALL(Mock, OnStringScalar("5\r\\r"));
+ EXPECT_CALL(Mock, OnKeyedItem("b"));
+ EXPECT_CALL(Mock, OnStringScalar(value));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input("5\r\\r\t6\0\n", 8);
+
+ auto config = New<TSchemafulDsvFormatConfig>();
+ config->Columns = {"a", "b"};
+ config->EnableEscaping = false;
+
+ ParseSchemafulDsv(input, &Mock, config);
+}
+
+TEST(TSchemafulDsvParserTest, ColumnsNamesHeader)
+{
+ TString input("a\tb\n1\t2\n");
+
+ auto config = New<TSchemafulDsvFormatConfig>();
+ config->Columns = {"a", "b"};
+ config->EnableColumnNamesHeader = true;
+
+ EXPECT_THROW(ParseSchemafulDsv(input, GetNullYsonConsumer(), config), std::exception);
+}
+
+TEST(TSchemafulDsvParserTest, MissingValueModePrintSentinel)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ TString input = "x\t\tz\n";
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("a"));
+ EXPECT_CALL(Mock, OnStringScalar("x"));
+ EXPECT_CALL(Mock, OnKeyedItem("b"));
+ EXPECT_CALL(Mock, OnStringScalar(""));
+ EXPECT_CALL(Mock, OnKeyedItem("c"));
+ EXPECT_CALL(Mock, OnStringScalar("z"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ auto config = New<TSchemafulDsvFormatConfig>();
+ config->Columns = {"a", "b", "c"};
+ // By default missing_value_mode = fail and no sentinel values are used,
+ // i. e. there is no way to represent YSON entity with this format.
+
+ ParseSchemafulDsv(input, &Mock, config);
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("a"));
+ EXPECT_CALL(Mock, OnStringScalar("x"));
+ EXPECT_CALL(Mock, OnKeyedItem("b"));
+ EXPECT_CALL(Mock, OnEntity());
+ EXPECT_CALL(Mock, OnKeyedItem("c"));
+ EXPECT_CALL(Mock, OnStringScalar("z"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ config->MissingValueMode = EMissingSchemafulDsvValueMode::PrintSentinel;
+ // By default missing_value_sentinel = "".
+
+ ParseSchemafulDsv(input, &Mock, config);
+
+ input = "null\tNULL\t\n";
+
+ config->MissingValueSentinel = "NULL";
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("a"));
+ EXPECT_CALL(Mock, OnStringScalar("null"));
+ EXPECT_CALL(Mock, OnKeyedItem("b"));
+ EXPECT_CALL(Mock, OnEntity());
+ EXPECT_CALL(Mock, OnKeyedItem("c"));
+ EXPECT_CALL(Mock, OnStringScalar(""));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ ParseSchemafulDsv(input, &Mock, config);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT::NFormats
diff --git a/yt/yt/library/formats/unittests/schemaful_dsv_writer_ut.cpp b/yt/yt/library/formats/unittests/schemaful_dsv_writer_ut.cpp
new file mode 100644
index 0000000000..52cd31a1a8
--- /dev/null
+++ b/yt/yt/library/formats/unittests/schemaful_dsv_writer_ut.cpp
@@ -0,0 +1,346 @@
+#include <yt/yt/core/test_framework/framework.h>
+
+#include "format_writer_ut.h"
+
+#include <yt/yt/library/formats/schemaful_dsv_writer.h>
+#include <yt/yt/library/formats/format.h>
+
+#include <yt/yt/client/table_client/name_table.h>
+
+#include <yt/yt/core/concurrency/async_stream.h>
+
+#include <limits>
+
+namespace NYT::NFormats {
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+
+using namespace NYTree;
+using namespace NYson;
+using namespace NConcurrency;
+using namespace NTableClient;
+
+class TSchemalessWriterForSchemafulDsvTest
+ : public ::testing::Test
+{
+protected:
+ TNameTablePtr NameTable_;
+ int KeyAId_;
+ int KeyBId_;
+ int KeyCId_;
+ int KeyDId_;
+ int TableIndexId_;
+ int RangeIndexId_;
+ int RowIndexId_;
+ TSchemafulDsvFormatConfigPtr Config_;
+
+ ISchemalessFormatWriterPtr Writer_;
+
+ TStringStream OutputStream_;
+
+ TSchemalessWriterForSchemafulDsvTest()
+ {
+ NameTable_ = New<TNameTable>();
+ KeyAId_ = NameTable_->RegisterName("column_a");
+ KeyBId_ = NameTable_->RegisterName("column_b");
+ KeyCId_ = NameTable_->RegisterName("column_c");
+ KeyDId_ = NameTable_->RegisterName("column_d");
+ TableIndexId_ = NameTable_->RegisterName(TableIndexColumnName);
+ RowIndexId_ = NameTable_->RegisterName(RowIndexColumnName);
+ RangeIndexId_ = NameTable_->RegisterName(RangeIndexColumnName);
+
+ Config_ = New<TSchemafulDsvFormatConfig>();
+ }
+
+ void CreateStandardWriter()
+ {
+ auto controlAttributesConfig = New<TControlAttributesConfig>();
+ controlAttributesConfig->EnableTableIndex = Config_->EnableTableIndex;
+ Writer_ = CreateSchemalessWriterForSchemafulDsv(
+ Config_,
+ NameTable_,
+ CreateAsyncAdapter(static_cast<IOutputStream*>(&OutputStream_)),
+ false, // enableContextSaving
+ controlAttributesConfig,
+ 0 /* keyColumnCount */);
+ }
+};
+
+TEST_F(TSchemalessWriterForSchemafulDsvTest, Simple)
+{
+ Config_->Columns = {"column_b", "column_c", "column_a"};
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedStringValue("value_a", KeyAId_));
+ row1.AddValue(MakeUnversionedInt64Value(-42, KeyBId_));
+ row1.AddValue(MakeUnversionedBooleanValue(true, KeyCId_));
+ row1.AddValue(MakeUnversionedStringValue("garbage", KeyDId_));
+
+ // Ignore system columns.
+ row1.AddValue(MakeUnversionedInt64Value(2, TableIndexId_));
+ row1.AddValue(MakeUnversionedInt64Value(42, RowIndexId_));
+ row1.AddValue(MakeUnversionedInt64Value(1, RangeIndexId_));
+
+ TUnversionedRowBuilder row2;
+ // The order is reversed.
+ row2.AddValue(MakeUnversionedStringValue("value_c", KeyCId_));
+ row2.AddValue(MakeUnversionedBooleanValue(false, KeyBId_));
+ row2.AddValue(MakeUnversionedInt64Value(23, KeyAId_));
+
+ std::vector<TUnversionedRow> rows = {row1.GetRow(), row2.GetRow()};
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString expectedOutput =
+ "-42\ttrue\tvalue_a\n"
+ "false\tvalue_c\t23\n";
+ EXPECT_EQ(expectedOutput, OutputStream_.Str());
+}
+
+// This test shows the actual behavior of writer. It is OK to change it in the future. :)
+TEST_F(TSchemalessWriterForSchemafulDsvTest, TrickyDoubleRepresentations)
+{
+ Config_->Columns = {"column_a", "column_b", "column_c", "column_d"};
+ CreateStandardWriter();
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedDoubleValue(1.234567890123456, KeyAId_));
+ row1.AddValue(MakeUnversionedDoubleValue(42, KeyBId_));
+ row1.AddValue(MakeUnversionedDoubleValue(1e300, KeyCId_));
+ row1.AddValue(MakeUnversionedDoubleValue(-1e-300, KeyDId_));
+
+ std::vector<TUnversionedRow> rows = {row1.GetRow()};
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+ TString expectedOutput = "1.234567890123456\t42.\t1e+300\t-1e-300\n";
+ EXPECT_EQ(expectedOutput, OutputStream_.Str());
+}
+
+TEST_F(TSchemalessWriterForSchemafulDsvTest, IntegralTypeRepresentations)
+{
+ Config_->Columns = {"column_a", "column_b", "column_c", "column_d"};
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedInt64Value(0LL, KeyAId_));
+ row1.AddValue(MakeUnversionedInt64Value(-1LL, KeyBId_));
+ row1.AddValue(MakeUnversionedInt64Value(1LL, KeyCId_));
+ row1.AddValue(MakeUnversionedInt64Value(99LL, KeyDId_));
+
+ TUnversionedRowBuilder row2;
+ row2.AddValue(MakeUnversionedInt64Value(123LL, KeyAId_));
+ row2.AddValue(MakeUnversionedInt64Value(-123LL, KeyBId_));
+ row2.AddValue(MakeUnversionedInt64Value(1234LL, KeyCId_));
+ row2.AddValue(MakeUnversionedInt64Value(-1234LL, KeyDId_));
+
+ TUnversionedRowBuilder row3;
+ row3.AddValue(MakeUnversionedUint64Value(0ULL, KeyAId_));
+ row3.AddValue(MakeUnversionedUint64Value(98ULL, KeyBId_));
+ row3.AddValue(MakeUnversionedUint64Value(987ULL, KeyCId_));
+ row3.AddValue(MakeUnversionedUint64Value(9876ULL, KeyDId_));
+
+ TUnversionedRowBuilder row4;
+ row4.AddValue(MakeUnversionedInt64Value(std::numeric_limits<i64>::max(), KeyAId_));
+ row4.AddValue(MakeUnversionedInt64Value(std::numeric_limits<i64>::min(), KeyBId_));
+ row4.AddValue(MakeUnversionedInt64Value(std::numeric_limits<i64>::min() + 1LL, KeyCId_));
+ row4.AddValue(MakeUnversionedUint64Value(std::numeric_limits<ui64>::max(), KeyDId_));
+
+ std::vector<TUnversionedRow> rows =
+ {row1.GetRow(), row2.GetRow(), row3.GetRow(), row4.GetRow()};
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+ TString expectedOutput =
+ "0\t-1\t1\t99\n"
+ "123\t-123\t1234\t-1234\n"
+ "0\t98\t987\t9876\n"
+ "9223372036854775807\t-9223372036854775808\t-9223372036854775807\t18446744073709551615\n";
+ EXPECT_EQ(expectedOutput, OutputStream_.Str());
+}
+
+TEST_F(TSchemalessWriterForSchemafulDsvTest, EmptyColumnList)
+{
+ Config_->Columns = std::vector<std::string>();
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedInt64Value(0LL, KeyAId_));
+
+ std::vector<TUnversionedRow> rows = { row1.GetRow() };
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+ TString expectedOutput = "\n";
+ EXPECT_EQ(expectedOutput, OutputStream_.Str());
+}
+
+TEST_F(TSchemalessWriterForSchemafulDsvTest, MissingValueMode)
+{
+ Config_->Columns = {"column_a", "column_b", "column_c"};
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedStringValue("Value1A", KeyAId_));
+ row1.AddValue(MakeUnversionedStringValue("Value1B", KeyBId_));
+ row1.AddValue(MakeUnversionedStringValue("Value1C", KeyCId_));
+
+ TUnversionedRowBuilder row2;
+ row2.AddValue(MakeUnversionedStringValue("Value2A", KeyAId_));
+ row2.AddValue(MakeUnversionedStringValue("Value2C", KeyCId_));
+
+ TUnversionedRowBuilder row3;
+ row3.AddValue(MakeUnversionedStringValue("Value3A", KeyAId_));
+ row3.AddValue(MakeUnversionedStringValue("Value3B", KeyBId_));
+ row3.AddValue(MakeUnversionedStringValue("Value3C", KeyCId_));
+
+ std::vector<TUnversionedRow> rows =
+ {row1.GetRow(), row2.GetRow(), row3.GetRow()};
+
+ {
+ Config_->MissingValueMode = EMissingSchemafulDsvValueMode::SkipRow;
+ CreateStandardWriter();
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+ TString expectedOutput =
+ "Value1A\tValue1B\tValue1C\n"
+ "Value3A\tValue3B\tValue3C\n";
+ EXPECT_EQ(expectedOutput, OutputStream_.Str());
+ OutputStream_.Clear();
+ }
+
+ {
+ Config_->MissingValueMode = EMissingSchemafulDsvValueMode::Fail;
+ CreateStandardWriter();
+ EXPECT_EQ(false, Writer_->Write(rows));
+ EXPECT_THROW(Writer_->Close()
+ .Get()
+ .ThrowOnError(), std::exception);
+ OutputStream_.Clear();
+ }
+
+ {
+ Config_->MissingValueMode = EMissingSchemafulDsvValueMode::PrintSentinel;
+ Config_->MissingValueSentinel = "~";
+ CreateStandardWriter();
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+ TString expectedOutput =
+ "Value1A\tValue1B\tValue1C\n"
+ "Value2A\t~\tValue2C\n"
+ "Value3A\tValue3B\tValue3C\n";
+ EXPECT_EQ(expectedOutput, OutputStream_.Str());
+ OutputStream_.Clear();
+ }
+}
+
+TEST_F(TSchemalessWriterForSchemafulDsvTest, NameTableExpansion)
+{
+ Config_->Columns = {"Column1"};
+ Config_->MissingValueMode = {EMissingSchemafulDsvValueMode::PrintSentinel};
+ CreateStandardWriter();
+ TestNameTableExpansion(Writer_, NameTable_);
+}
+
+TEST_F(TSchemalessWriterForSchemafulDsvTest, TableIndex)
+{
+ Config_->Columns = {"column_a", "column_b", "column_c", "column_d"};
+ Config_->EnableTableIndex = true;
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row0;
+ row0.AddValue(MakeUnversionedInt64Value(0LL, KeyAId_));
+ row0.AddValue(MakeUnversionedInt64Value(1LL, KeyBId_));
+ row0.AddValue(MakeUnversionedInt64Value(2LL, KeyCId_));
+ row0.AddValue(MakeUnversionedInt64Value(3LL, KeyDId_));
+
+ // It's necessary to specify a column corresponding to the table index
+ // when enable_table_index = true.
+ EXPECT_EQ(false, Writer_->Write(std::vector<TUnversionedRow>{row0.GetRow()}));
+
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedInt64Value(42LL, TableIndexId_));
+ row1.AddValue(MakeUnversionedInt64Value(0LL, KeyAId_));
+ row1.AddValue(MakeUnversionedInt64Value(1LL, KeyBId_));
+ row1.AddValue(MakeUnversionedInt64Value(2LL, KeyCId_));
+ row1.AddValue(MakeUnversionedInt64Value(3LL, KeyDId_));
+
+
+ TUnversionedRowBuilder row2;
+ row2.AddValue(MakeUnversionedInt64Value(42LL, TableIndexId_));
+ row2.AddValue(MakeUnversionedInt64Value(4LL, KeyAId_));
+ row2.AddValue(MakeUnversionedInt64Value(5LL, KeyBId_));
+ row2.AddValue(MakeUnversionedInt64Value(6LL, KeyCId_));
+ row2.AddValue(MakeUnversionedInt64Value(7LL, KeyDId_));
+
+ EXPECT_EQ(true, Writer_->Write(std::vector<TUnversionedRow>{row1.GetRow(), row2.GetRow()}));
+
+ TUnversionedRowBuilder row3;
+ row3.AddValue(MakeUnversionedInt64Value(23LL, TableIndexId_));
+ row3.AddValue(MakeUnversionedUint64Value(8LL, KeyAId_));
+ row3.AddValue(MakeUnversionedUint64Value(9LL, KeyBId_));
+ row3.AddValue(MakeUnversionedUint64Value(10LL, KeyCId_));
+ row3.AddValue(MakeUnversionedUint64Value(11ULL, KeyDId_));
+
+ EXPECT_EQ(true, Writer_->Write(std::vector<TUnversionedRow>{row3.GetRow()}));
+
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+ TString expectedOutput =
+ "42\t0\t1\t2\t3\n"
+ "42\t4\t5\t6\t7\n"
+ "23\t8\t9\t10\t11\n";
+ EXPECT_EQ(expectedOutput, OutputStream_.Str());
+}
+
+
+TEST_F(TSchemalessWriterForSchemafulDsvTest, ValidateDuplicateNames)
+{
+ Config_->Columns = {"column_a", "column_b", "column_a"};
+ Config_->EnableTableIndex = true;
+ EXPECT_THROW(CreateStandardWriter(), TErrorException);
+}
+
+TEST_F(TSchemalessWriterForSchemafulDsvTest, ColumnsHeader)
+{
+ Config_->Columns = {"column_b", "column_c", "column_a"};
+ Config_->EnableColumnNamesHeader = true;
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedStringValue("value_a", KeyAId_));
+ row1.AddValue(MakeUnversionedInt64Value(-42, KeyBId_));
+ row1.AddValue(MakeUnversionedBooleanValue(true, KeyCId_));
+ std::vector<TUnversionedRow> rows = {row1.GetRow()};
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString expectedOutput =
+ "column_b\tcolumn_c\tcolumn_a\n"
+ "-42\ttrue\tvalue_a\n";
+ EXPECT_EQ(expectedOutput, OutputStream_.Str());
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT::NFormats
diff --git a/yt/yt/library/formats/unittests/skiff_format_ut.cpp b/yt/yt/library/formats/unittests/skiff_format_ut.cpp
new file mode 100644
index 0000000000..0f5d416bd5
--- /dev/null
+++ b/yt/yt/library/formats/unittests/skiff_format_ut.cpp
@@ -0,0 +1,3028 @@
+#include <yt/yt/core/test_framework/framework.h>
+
+#include <yt/yt/library/logical_type_shortcuts/logical_type_shortcuts.h>
+#include "value_examples.h"
+#include "row_helpers.h"
+#include "yson_helpers.h"
+
+#include <yt/yt/client/formats/config.h>
+#include <yt/yt/client/formats/parser.h>
+#include <yt/yt/library/formats/skiff_parser.h>
+#include <yt/yt/library/formats/skiff_writer.h>
+#include <yt/yt/library/formats/format.h>
+#include <yt/yt/client/table_client/name_table.h>
+#include <yt/yt/client/table_client/validate_logical_type.h>
+
+#include <yt/yt/library/named_value/named_value.h>
+#include <yt/yt/library/skiff_ext/schema_match.h>
+
+#include <yt/yt/core/yson/string.h>
+#include <yt/yt/core/ytree/convert.h>
+#include <yt/yt/core/ytree/fluent.h>
+#include <yt/yt/core/ytree/tree_visitor.h>
+
+#include <library/cpp/skiff/skiff.h>
+#include <library/cpp/skiff/skiff_schema.h>
+
+#include <util/stream/null.h>
+#include <util/string/hex.h>
+
+namespace NYT {
+
+namespace {
+
+using namespace NFormats;
+using namespace NNamedValue;
+using namespace NSkiff;
+using namespace NSkiffExt;
+using namespace NTableClient;
+using namespace NYTree;
+using namespace NYson;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TString ConvertToSkiffSchemaShortDebugString(INodePtr node)
+{
+ auto skiffFormatConfig = ConvertTo<TSkiffFormatConfigPtr>(std::move(node));
+ auto skiffSchemas = ParseSkiffSchemas(skiffFormatConfig->SkiffSchemaRegistry, skiffFormatConfig->TableSkiffSchemas);
+ TStringStream result;
+ result << '{';
+ for (const auto& schema : skiffSchemas) {
+ result << GetShortDebugString(schema);
+ result << ',';
+ }
+ result << '}';
+ return result.Str();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TString ConvertToYsonTextStringStable(const INodePtr& node)
+{
+ TStringStream out;
+ TYsonWriter writer(&out, EYsonFormat::Text);
+ VisitTree(node, &writer, true, TAttributeFilter());
+ writer.Flush();
+ return out.Str();
+}
+
+TTableSchemaPtr CreateSingleValueTableSchema(const TLogicalTypePtr& logicalType)
+{
+ std::vector<TColumnSchema> columns;
+ if (logicalType) {
+ columns.emplace_back("value", logicalType);
+
+ }
+ auto strict = static_cast<bool>(logicalType);
+ return New<TTableSchema>(columns, strict);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TSkiffSchemaParse, TestAllowedTypes)
+{
+ EXPECT_EQ(
+ "{uint64,}",
+
+ ConvertToSkiffSchemaShortDebugString(
+ BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("table_skiff_schemas")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("wire_type")
+ .Value("uint64")
+ .EndMap()
+ .EndList()
+ .EndMap()));
+
+ EXPECT_EQ(
+ "{string32,}",
+
+ ConvertToSkiffSchemaShortDebugString(
+ BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("table_skiff_schemas")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("wire_type")
+ .Value("string32")
+ .EndMap()
+ .EndList()
+ .EndMap()));
+
+ EXPECT_EQ(
+ "{variant8<string32;int64;>,}",
+
+ ConvertToSkiffSchemaShortDebugString(
+ BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("table_skiff_schemas")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("wire_type")
+ .Value("variant8")
+ .Item("children")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("wire_type")
+ .Value("string32")
+ .EndMap()
+ .Item()
+ .BeginMap()
+ .Item("wire_type")
+ .Value("int64")
+ .EndMap()
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap()));
+
+ EXPECT_EQ(
+ "{variant8<int64;string32;>,}",
+
+ ConvertToSkiffSchemaShortDebugString(
+ BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("skiff_schema_registry")
+ .BeginMap()
+ .Item("item1")
+ .BeginMap()
+ .Item("wire_type")
+ .Value("int64")
+ .EndMap()
+ .Item("item2")
+ .BeginMap()
+ .Item("wire_type")
+ .Value("string32")
+ .EndMap()
+ .EndMap()
+ .Item("table_skiff_schemas")
+ .BeginList()
+ .Item()
+ .BeginMap()
+ .Item("wire_type")
+ .Value("variant8")
+ .Item("children")
+ .BeginList()
+ .Item().Value("$item1")
+ .Item().Value("$item2")
+ .EndList()
+ .EndMap()
+ .EndList()
+ .EndMap()));
+}
+
+TEST(TSkiffSchemaParse, TestRecursiveTypesAreDisallowed)
+{
+ try {
+ ConvertToSkiffSchemaShortDebugString(
+ BuildYsonNodeFluently()
+ .BeginMap()
+ .Item("skiff_schema_registry")
+ .BeginMap()
+ .Item("item1")
+ .BeginMap()
+ .Item("wire_type")
+ .Value("variant8")
+ .Item("children")
+ .BeginList()
+ .Item().Value("$item1")
+ .EndList()
+ .EndMap()
+ .EndMap()
+ .Item("table_skiff_schemas")
+ .BeginList()
+ .Item().Value("$item1")
+ .EndList()
+ .EndMap());
+ ADD_FAILURE();
+ } catch (const std::exception& e) {
+ EXPECT_THAT(e.what(), testing::HasSubstr("recursive types are forbidden"));
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TSkiffSchemaDescription, TestDescriptionDerivation)
+{
+ auto schema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Foo"),
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Uint64),
+ })->SetName("Bar"),
+ });
+
+ auto tableDescriptionList = CreateTableDescriptionList({schema}, RangeIndexColumnName, RowIndexColumnName);
+ EXPECT_EQ(std::ssize(tableDescriptionList), 1);
+ EXPECT_EQ(tableDescriptionList[0].HasOtherColumns, false);
+ EXPECT_EQ(tableDescriptionList[0].SparseFieldDescriptionList.empty(), true);
+
+ auto denseFieldDescriptionList = tableDescriptionList[0].DenseFieldDescriptionList;
+ EXPECT_EQ(std::ssize(denseFieldDescriptionList), 2);
+
+ EXPECT_EQ(denseFieldDescriptionList[0].Name(), "Foo");
+ EXPECT_EQ(denseFieldDescriptionList[0].ValidatedSimplify(), EWireType::Uint64);
+}
+
+TEST(TSkiffSchemaDescription, TestKeySwitchColumn)
+{
+ {
+ auto schema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Foo"),
+ CreateSimpleTypeSchema(EWireType::Boolean)->SetName("$key_switch"),
+ });
+
+ auto tableDescriptionList = CreateTableDescriptionList({schema}, RangeIndexColumnName, RowIndexColumnName);
+ EXPECT_EQ(std::ssize(tableDescriptionList), 1);
+ EXPECT_EQ(tableDescriptionList[0].KeySwitchFieldIndex, std::optional<size_t>(1));
+ }
+ {
+ auto schema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Uint64)->SetName("$key_switch"),
+ });
+
+ try {
+ auto tableDescriptionList = CreateTableDescriptionList({schema}, RangeIndexColumnName, RowIndexColumnName);
+ ADD_FAILURE();
+ } catch (const std::exception& e) {
+ EXPECT_THAT(e.what(), testing::HasSubstr("Column \"$key_switch\" has unexpected Skiff type"));
+ }
+ }
+}
+
+TEST(TSkiffSchemaDescription, TestDisallowEmptyNames)
+{
+ auto schema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Foo"),
+ CreateSimpleTypeSchema(EWireType::Int64)->SetName(""),
+ });
+
+ try {
+ CreateTableDescriptionList({schema}, RangeIndexColumnName, RowIndexColumnName);
+ ADD_FAILURE();
+ } catch (const std::exception& e) {
+ EXPECT_THAT(e.what(), testing::HasSubstr("must have a name"));
+ }
+}
+
+TEST(TSkiffSchemaDescription, TestWrongRowType)
+{
+ auto schema = CreateRepeatedVariant16Schema({
+ CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Foo"),
+ CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Bar"),
+ });
+
+ try {
+ CreateTableDescriptionList({schema}, RangeIndexColumnName, RowIndexColumnName);
+ ADD_FAILURE();
+ } catch (const std::exception& e) {
+ EXPECT_THAT(e.what(), testing::HasSubstr("Invalid wire type for table row"));
+ }
+}
+
+TEST(TSkiffSchemaDescription, TestOtherColumnsOk)
+{
+ auto schema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Foo"),
+ CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Bar"),
+ CreateSimpleTypeSchema(EWireType::Yson32)->SetName("$other_columns"),
+ });
+
+ auto tableDescriptionList = CreateTableDescriptionList({schema}, RangeIndexColumnName, RowIndexColumnName);
+ ASSERT_EQ(std::ssize(tableDescriptionList), 1);
+ ASSERT_EQ(tableDescriptionList[0].HasOtherColumns, true);
+}
+
+TEST(TSkiffSchemaDescription, TestOtherColumnsWrongType)
+{
+ auto schema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Foo"),
+ CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Bar"),
+ CreateSimpleTypeSchema(EWireType::Uint64)->SetName("$other_columns"),
+ });
+
+ try {
+ CreateTableDescriptionList({schema}, RangeIndexColumnName, RowIndexColumnName);
+ ADD_FAILURE();
+ } catch (const std::exception& e) {
+ EXPECT_THAT(e.what(), testing::HasSubstr("Invalid wire type for column \"$other_columns\""));
+ }
+}
+
+TEST(TSkiffSchemaDescription, TestOtherColumnsWrongPlace)
+{
+ auto schema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Foo"),
+ CreateSimpleTypeSchema(EWireType::Uint64)->SetName("$other_columns"),
+ CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Bar"),
+ });
+
+ try {
+ CreateTableDescriptionList({schema}, RangeIndexColumnName, RowIndexColumnName);
+ ADD_FAILURE();
+ } catch (const std::exception& e) {
+ EXPECT_THAT(e.what(), testing::HasSubstr("Invalid placement of special column \"$other_columns\""));
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+ISchemalessFormatWriterPtr CreateSkiffWriter(
+ std::shared_ptr<TSkiffSchema> skiffSchema,
+ TNameTablePtr nameTable,
+ IOutputStream* outputStream,
+ const std::vector<TTableSchemaPtr>& tableSchemaList,
+ int keyColumnCount = 0,
+ bool enableEndOfStream = false)
+{
+ auto controlAttributesConfig = New<TControlAttributesConfig>();
+ controlAttributesConfig->EnableKeySwitch = (keyColumnCount > 0);
+ controlAttributesConfig->EnableEndOfStream = enableEndOfStream;
+ return CreateWriterForSkiff(
+ {std::move(skiffSchema)},
+ std::move(nameTable),
+ tableSchemaList,
+ NConcurrency::CreateAsyncAdapter(outputStream),
+ false,
+ controlAttributesConfig,
+ keyColumnCount);
+}
+
+TString TableToSkiff(
+ const TLogicalTypePtr& logicalType,
+ const std::shared_ptr<TSkiffSchema>& typeSchema,
+ const TNamedValue::TValue& value)
+{
+ auto schema = CreateSingleValueTableSchema(logicalType);
+ auto skiffSchema = CreateTupleSchema({
+ typeSchema->SetName("value")
+ });
+
+ auto nameTable = New<TNameTable>();
+
+ TStringStream resultStream;
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {schema});
+
+ Y_UNUSED(writer->Write({
+ MakeRow(nameTable, {
+ {"value", value}
+ }).Get(),
+ }));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ auto result = resultStream.Str();
+ if (!TStringBuf(result).StartsWith(TString(2, '\0'))) {
+ THROW_ERROR_EXCEPTION("Expected skiff value to start with \\x00\\x00, but prefix is %Qv",
+ EscapeC(result.substr(0, 2)));
+ }
+
+ return result.substr(2);
+}
+
+TNamedValue::TValue SkiffToTable(
+ const TLogicalTypePtr& logicalType,
+ const std::shared_ptr<TSkiffSchema>& typeSchema,
+ const TString& skiffValue)
+{
+ auto schema = CreateSingleValueTableSchema(logicalType);
+ auto skiffSchema = CreateTupleSchema({
+ typeSchema->SetName("value")
+ });
+ auto nameTable = New<TNameTable>();
+
+ TCollectingValueConsumer rowCollector(schema);
+ auto parser = CreateParserForSkiff(skiffSchema, &rowCollector);
+ parser->Read(TString(2, 0));
+ parser->Read(skiffValue);
+ parser->Finish();
+
+ if (rowCollector.Size() != 1) {
+ THROW_ERROR_EXCEPTION("Expected 1 row collected, actual %v",
+ rowCollector.Size());
+ }
+ auto value = rowCollector.GetRowValue(0, "value");
+ return TNamedValue::ExtractValue(value);
+}
+
+#define CHECK_BIDIRECTIONAL_CONVERSION(logicalTypeArg, skiffSchemaArg, tableValueArg, hexSkiffArg) \
+ do { \
+ try { \
+ TLogicalTypePtr logicalType = (logicalTypeArg); \
+ std::shared_ptr<TSkiffSchema> skiffSchema = (skiffSchemaArg); \
+ TNamedValue::TValue tableValue = (tableValueArg); \
+ TString hexSkiff = (hexSkiffArg); \
+ auto nameTable = New<TNameTable>(); \
+ auto actualSkiff = TableToSkiff(logicalType, skiffSchema, tableValue); \
+ EXPECT_EQ(HexEncode(actualSkiff), hexSkiff); \
+ auto actualValue = SkiffToTable(logicalType, skiffSchema, HexDecode(hexSkiff)); \
+ EXPECT_EQ(actualValue, tableValue); \
+ } catch (const std::exception& ex) { \
+ ADD_FAILURE() << "unexpected exception: " << ex.what(); \
+ } \
+ } while (0)
+
+////////////////////////////////////////////////////////////////////////////////
+
+void TestAllWireTypes(bool useSchema)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Int64)->SetName("int64"),
+ CreateSimpleTypeSchema(EWireType::Uint64)->SetName("uint64"),
+ CreateSimpleTypeSchema(EWireType::Double)->SetName("double_1"),
+ CreateSimpleTypeSchema(EWireType::Double)->SetName("double_2"),
+ CreateSimpleTypeSchema(EWireType::Boolean)->SetName("boolean"),
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("string32"),
+ CreateSimpleTypeSchema(EWireType::Nothing)->SetName("null"),
+
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Int64),
+ })->SetName("opt_int64"),
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Uint64),
+ })->SetName("opt_uint64"),
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Double),
+ })->SetName("opt_double_1"),
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Double),
+ })->SetName("opt_double_2"),
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Boolean),
+ })->SetName("opt_boolean"),
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::String32),
+ })->SetName("opt_string32"),
+ });
+ std::vector<TTableSchemaPtr> tableSchemas;
+ if (useSchema) {
+ tableSchemas.push_back(New<TTableSchema>(std::vector{
+ TColumnSchema("int64", EValueType::Int64),
+ TColumnSchema("uint64", EValueType::Uint64),
+ TColumnSchema("double_1", EValueType::Double),
+ TColumnSchema("double_2", ESimpleLogicalValueType::Float),
+ TColumnSchema("boolean", EValueType::Boolean),
+ TColumnSchema("string32", EValueType::String),
+ TColumnSchema("null", EValueType::Null),
+ TColumnSchema("opt_int64", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))),
+ TColumnSchema("opt_uint64", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Uint64))),
+ TColumnSchema("opt_double_1", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Double))),
+ TColumnSchema("opt_double_2", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Float))),
+ TColumnSchema("opt_boolean", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Boolean))),
+ TColumnSchema("opt_string32", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))),
+ }));
+ } else {
+ tableSchemas.push_back(New<TTableSchema>());
+ }
+ auto nameTable = New<TNameTable>();
+ TString result;
+ {
+ TStringOutput resultStream(result);
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, tableSchemas);
+
+ auto isWriterReady = writer->Write({
+ MakeRow(nameTable, {
+ {"int64", -1},
+ {"uint64", 2u},
+ {"double_1", 3.0},
+ {"double_2", 3.0},
+ {"boolean", true},
+ {"string32", "four"},
+ {"null", nullptr},
+
+ {"opt_int64", -5},
+ {"opt_uint64", 6u},
+ {"opt_double_1", 7.0},
+ {"opt_double_2", 7.0},
+ {"opt_boolean", false},
+ {"opt_string32", "eight"},
+ {TString(TableIndexColumnName), 0},
+ }).Get(),
+ });
+ if (!isWriterReady) {
+ writer->GetReadyEvent().Get().ThrowOnError();
+ }
+
+ Y_UNUSED(writer->Write({
+ MakeRow(nameTable, {
+ {"int64", -9},
+ {"uint64", 10u},
+ {"double_1", 11.0},
+ {"double_2", 11.0},
+ {"boolean", false},
+ {"string32", "twelve"},
+ {"null", nullptr},
+
+ {"opt_int64", nullptr},
+ {"opt_uint64", nullptr},
+ {"opt_double_1", nullptr},
+ {"opt_double_2", nullptr},
+ {"opt_boolean", nullptr},
+ {"opt_string32", nullptr},
+ {TString(TableIndexColumnName), 0},
+ }).Get()
+ }));
+
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+ }
+
+ TStringInput resultInput(result);
+ TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
+
+ // row 0
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), -1);
+ ASSERT_EQ(checkedSkiffParser.ParseUint64(), 2u);
+ // double_1
+ ASSERT_EQ(checkedSkiffParser.ParseDouble(), 3.0);
+ // double_2
+ ASSERT_EQ(checkedSkiffParser.ParseDouble(), 3.0);
+ ASSERT_EQ(checkedSkiffParser.ParseBoolean(), true);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "four");
+
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), -5);
+
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseUint64(), 6u);
+
+ // double_1
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseDouble(), 7.0);
+
+ // double_2
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseDouble(), 7.0);
+
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseBoolean(), false);
+
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "eight");
+
+ // row 1
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), -9);
+ ASSERT_EQ(checkedSkiffParser.ParseUint64(), 10u);
+ // double_1
+ ASSERT_EQ(checkedSkiffParser.ParseDouble(), 11.0);
+ // double_2
+ ASSERT_EQ(checkedSkiffParser.ParseDouble(), 11.0);
+ ASSERT_EQ(checkedSkiffParser.ParseBoolean(), false);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "twelve");
+
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
+ // double_1
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
+ // double_2
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
+
+ // end
+ ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
+ checkedSkiffParser.ValidateFinished();
+}
+
+TEST(TSkiffWriter, TestAllWireTypesNoSchema)
+{
+ TestAllWireTypes(false);
+}
+
+TEST(TSkiffWriter, TestAllWireTypesWithSchema)
+{
+ TestAllWireTypes(true);
+}
+
+class TSkiffYsonWireTypeP
+ : public ::testing::TestWithParam<std::tuple<
+ TLogicalTypePtr,
+ TNamedValue::TValue,
+ TString
+ >>
+{
+public:
+ static std::vector<ParamType> GetCases()
+ {
+ using namespace NLogicalTypeShortcuts;
+ std::vector<ParamType> result;
+
+ for (const auto& example : GetPrimitiveValueExamples()) {
+ result.emplace_back(example.LogicalType, example.Value, example.PrettyYson);
+ result.emplace_back(nullptr, example.Value, example.PrettyYson);
+ }
+
+ for (const auto type : TEnumTraits<ESimpleLogicalValueType>::GetDomainValues()) {
+ auto logicalType = OptionalLogicalType(SimpleLogicalType(type));
+ if (IsV3Composite(logicalType)) {
+ // Optional<Null> is not v1 type
+ continue;
+ }
+ result.emplace_back(logicalType, nullptr, "#");
+ }
+ return result;
+ }
+
+ static const std::vector<ParamType> Cases;
+};
+
+const std::vector<TSkiffYsonWireTypeP::ParamType> TSkiffYsonWireTypeP::Cases = TSkiffYsonWireTypeP::GetCases();
+
+INSTANTIATE_TEST_SUITE_P(
+ Cases,
+ TSkiffYsonWireTypeP,
+ ::testing::ValuesIn(TSkiffYsonWireTypeP::Cases));
+
+TEST_P(TSkiffYsonWireTypeP, Test)
+{
+ const auto& [logicalType, value, expectedYson] = GetParam();
+ TTableSchemaPtr tableSchema;
+ if (logicalType) {
+ tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{
+ TColumnSchema("column", logicalType),
+ });
+ } else {
+ tableSchema = New<TTableSchema>();
+ }
+ auto skiffTableSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Yson32)->SetName("column"),
+ });
+ auto nameTable = New<TNameTable>();
+ TStringStream actualSkiffDataStream;
+ auto writer = CreateSkiffWriter(skiffTableSchema, nameTable, &actualSkiffDataStream, {tableSchema});
+ Y_UNUSED(writer->Write({
+ MakeRow(nameTable, {{"column", value}})
+ }));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ auto actualSkiffData = actualSkiffDataStream.Str();
+ {
+ TMemoryInput in(actualSkiffData);
+ TCheckedSkiffParser parser(CreateVariant16Schema({skiffTableSchema}), &in);
+ EXPECT_EQ(parser.ParseVariant16Tag(), 0);
+ auto actualYson = parser.ParseYson32();
+ parser.ValidateFinished();
+
+ EXPECT_EQ(CanonizeYson(actualYson), CanonizeYson(expectedYson));
+ }
+
+ TCollectingValueConsumer rowCollector(nameTable);
+ auto parser = CreateParserForSkiff(skiffTableSchema, tableSchema, &rowCollector);
+ parser->Read(actualSkiffDataStream.Str());
+ parser->Finish();
+ auto actualValue = rowCollector.GetRowValue(0, "column");
+ EXPECT_EQ(actualValue, TNamedValue("column", value).ToUnversionedValue(nameTable));
+}
+
+TEST(TSkiffWriter, TestYsonWireType)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Yson32)->SetName("yson32"),
+
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Yson32),
+ })->SetName("opt_yson32"),
+ });
+ auto nameTable = New<TNameTable>();
+ TString result;
+ {
+ TStringOutput resultStream(result);
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()});
+
+ auto write = [&] (TUnversionedRow row) {
+ if (!writer->Write({row})) {
+ writer->GetReadyEvent().Get().ThrowOnError();
+ }
+ };
+
+ // Row 0 (Null)
+ write({
+ MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+
+ {"yson32", nullptr},
+ {"opt_yson32", nullptr},
+ }).Get(),
+ });
+
+ // Row 1 (Int64)
+ write({
+ MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+
+ {"yson32", -5},
+ {"opt_yson32", -6},
+ }).Get(),
+ });
+
+ // Row 2 (Uint64)
+ write({
+ MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+
+ {"yson32", 42u},
+ {"opt_yson32", 43u},
+ }).Get(),
+ });
+
+ // Row 3 ((Double)
+ write({
+ MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+
+ {"yson32", 2.7182818},
+ {"opt_yson32", 3.1415926},
+ }).Get(),
+ });
+
+ // Row 4 ((Boolean)
+ write({
+ MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+
+ {"yson32", true},
+ {"opt_yson32", false},
+ }).Get(),
+ });
+
+ // Row 5 ((String)
+ write({
+ MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+
+ {"yson32", "Yin"},
+ {"opt_yson32", "Yang"},
+ }).Get(),
+ });
+
+ // Row 6 ((Any)
+ write({
+ MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+
+ {"yson32", EValueType::Any, "{foo=bar;}"},
+ {"opt_yson32", EValueType::Any, "{bar=baz;}"},
+ }).Get(),
+ });
+
+ // Row 7 ((missing optional values)
+ write({
+ MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+ }).Get(),
+ });
+
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+ }
+
+ TStringInput resultInput(result);
+ TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
+
+ auto parseYson = [] (TCheckedSkiffParser* parser) {
+ auto yson = TString{parser->ParseYson32()};
+ return ConvertToNode(TYsonString(yson));
+ };
+
+ // Row 0 (Null)
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(parseYson(&checkedSkiffParser)->GetType(), ENodeType::Entity);
+
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
+
+ // Row 1 (Int64)
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(parseYson(&checkedSkiffParser)->AsInt64()->GetValue(), -5);
+
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(parseYson(&checkedSkiffParser)->AsInt64()->GetValue(), -6);
+
+ // Row 2 (Uint64)
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(parseYson(&checkedSkiffParser)->AsUint64()->GetValue(), 42u);
+
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(parseYson(&checkedSkiffParser)->AsUint64()->GetValue(), 43u);
+
+ // Row 3 (Double)
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(parseYson(&checkedSkiffParser)->AsDouble()->GetValue(), 2.7182818);
+
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(parseYson(&checkedSkiffParser)->AsDouble()->GetValue(), 3.1415926);
+
+ // Row 4 (Boolean)
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(parseYson(&checkedSkiffParser)->AsBoolean()->GetValue(), true);
+
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(parseYson(&checkedSkiffParser)->AsBoolean()->GetValue(), false);
+
+ // Row 5 (String)
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(parseYson(&checkedSkiffParser)->AsString()->GetValue(), "Yin");
+
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(parseYson(&checkedSkiffParser)->AsString()->GetValue(), "Yang");
+
+ // Row 6 (Any)
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(parseYson(&checkedSkiffParser)->AsMap()->GetChildOrThrow("foo")->AsString()->GetValue(), "bar");
+
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(parseYson(&checkedSkiffParser)->AsMap()->GetChildOrThrow("bar")->AsString()->GetValue(), "baz");
+
+ // Row 7 (Null)
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(parseYson(&checkedSkiffParser)->GetType(), ENodeType::Entity);
+
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
+
+ // end
+ ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
+ checkedSkiffParser.ValidateFinished();
+}
+
+class TSkiffFormatSmallIntP
+: public ::testing::TestWithParam<std::tuple<
+ std::shared_ptr<TSkiffSchema>,
+ TLogicalTypePtr,
+ TNamedValue::TValue,
+ TString
+>>
+{
+public:
+ static std::vector<ParamType> GetCases()
+ {
+ using namespace NLogicalTypeShortcuts;
+
+ std::vector<ParamType> result;
+
+ auto addSimpleCase = [&result] (
+ EWireType wireType,
+ const TLogicalTypePtr& logicalType,
+ auto value,
+ TStringBuf skiffValue)
+ {
+ auto simpleSkiffSchema = CreateSimpleTypeSchema(wireType);
+ auto simpleSkiffData = TString(2, 0) + skiffValue;
+ result.emplace_back(simpleSkiffSchema, logicalType, value, simpleSkiffData);
+ };
+
+ auto addListCase = [&result] (
+ EWireType wireType,
+ const TLogicalTypePtr& logicalType,
+ auto value,
+ TStringBuf skiffValue)
+ {
+ auto listSkiffSchema = CreateRepeatedVariant8Schema({CreateSimpleTypeSchema(wireType)});
+ auto listSkiffData = TString(3, 0) + skiffValue + TString(1, '\xff');
+ auto listValue = TNamedValue::TValue{
+ TNamedValue::TComposite{
+ BuildYsonStringFluently()
+ .BeginList()
+ .Item().Value(value)
+ .EndList().ToString()
+ }
+ };
+ result.emplace_back(listSkiffSchema, List(logicalType), listValue, listSkiffData);
+ };
+
+ auto addSimpleAndListCases = [&] (
+ EWireType wireType,
+ const TLogicalTypePtr& logicalType,
+ auto value,
+ TStringBuf skiffValue)
+ {
+ addSimpleCase(wireType, logicalType, value, skiffValue);
+ addListCase(wireType, logicalType, value, skiffValue);
+ };
+
+ auto addMultiCase = [&] (EWireType wireType, auto value, TStringBuf skiffValue) {
+ auto add = [&] (const TLogicalTypePtr& logicalType) {
+ addSimpleAndListCases(wireType, logicalType, value, skiffValue);
+ };
+ addSimpleCase(wireType, Yson(), value, skiffValue);
+
+ using T = std::decay_t<decltype(value)>;
+ static_assert(std::is_integral_v<T>);
+ if constexpr (std::is_signed_v<T>) {
+ if (std::numeric_limits<i8>::min() <= value && value <= std::numeric_limits<i8>::max()) {
+ add(Int8());
+ }
+ if (std::numeric_limits<i16>::min() <= value && value <= std::numeric_limits<i16>::max()) {
+ add(Int16());
+ }
+ if (std::numeric_limits<i32>::min() <= value && value <= std::numeric_limits<i32>::max()) {
+ add(Int32());
+ }
+ add(Int64());
+ } else {
+ if (value <= std::numeric_limits<ui8>::max()) {
+ add(Uint8());
+ }
+ if (value <= std::numeric_limits<ui16>::max()) {
+ add(Uint16());
+ }
+ if (value <= std::numeric_limits<ui32>::max()) {
+ add(Uint32());
+ }
+ add(Uint64());
+ }
+ };
+ addMultiCase(EWireType::Int8, 0, TStringBuf("\x00"sv));
+ addMultiCase(EWireType::Int8, 42, TStringBuf("*"));
+ addMultiCase(EWireType::Int8, -42, TStringBuf("\xd6"sv));
+ addMultiCase(EWireType::Int8, 127, TStringBuf("\x7f"sv));
+ addMultiCase(EWireType::Int8, -128, TStringBuf("\x80"sv));
+
+ addMultiCase(EWireType::Int16, 0, TStringBuf("\x00\x00"sv));
+ addMultiCase(EWireType::Int16, 42, TStringBuf("\x2a\x00"sv));
+ addMultiCase(EWireType::Int16, -42, TStringBuf("\xd6\xff"sv));
+ addMultiCase(EWireType::Int16, 0x7fff, TStringBuf("\xff\x7f"sv));
+ addMultiCase(EWireType::Int16, -0x8000, TStringBuf("\x00\x80"sv));
+
+ addMultiCase(EWireType::Int32, 0, TStringBuf("\x00\x00\x00\x00"sv));
+ addMultiCase(EWireType::Int32, 42, TStringBuf("\x2a\x00\x00\x00"sv));
+ addMultiCase(EWireType::Int32, -42, TStringBuf("\xd6\xff\xff\xff"sv));
+ addMultiCase(EWireType::Int32, 0x7fffffff, TStringBuf("\xff\xff\xff\x7f"sv));
+ addMultiCase(EWireType::Int32, -0x80000000l, TStringBuf("\x00\x00\x00\x80"sv));
+
+ addMultiCase(EWireType::Uint8, 0ull, TStringBuf("\x00"sv));
+ addMultiCase(EWireType::Uint8, 42ull, TStringBuf("*"));
+ addMultiCase(EWireType::Uint8, 255ull, TStringBuf("\xff"sv));
+
+ addMultiCase(EWireType::Uint16, 0ull, TStringBuf("\x00\x00"sv));
+ addMultiCase(EWireType::Uint16, 42ull, TStringBuf("\x2a\x00"sv));
+ addMultiCase(EWireType::Uint16, 0xFFFFull, TStringBuf("\xff\xff"sv));
+
+ addMultiCase(EWireType::Uint32, 0ull, TStringBuf("\x00\x00\x00\x00"sv));
+ addMultiCase(EWireType::Uint32, 42ull, TStringBuf("\x2a\x00\x00\x00"sv));
+ addMultiCase(EWireType::Uint32, 0xFFFFFFFFull, TStringBuf("\xff\xff\xff\xff"sv));
+
+ addSimpleAndListCases(EWireType::Uint16, Date(), 0ull, TStringBuf("\x00\x00"sv));
+ addSimpleAndListCases(EWireType::Uint16, Date(), 42ull, TStringBuf("\x2a\x00"sv));
+ addSimpleAndListCases(EWireType::Uint16, Date(), DateUpperBound - 1, TStringBuf("\x08\xc2"sv));
+
+ addSimpleAndListCases(EWireType::Uint32, Datetime(), 0ull, TStringBuf("\x00\x00\x00\x00"sv));
+ addSimpleAndListCases(EWireType::Uint32, Datetime(), 42ull, TStringBuf("\x2a\x00\x00\x00"sv));
+ addSimpleAndListCases(EWireType::Uint32, Datetime(), DatetimeUpperBound - 1, TStringBuf("\x7f\xdd\xce\xff"sv));
+
+ addSimpleAndListCases(EWireType::Int64, Date32(), 0ll, TStringBuf("\x00\x00\x00\x00\x00\x00\x00\x00"sv));
+ addSimpleAndListCases(EWireType::Int64, Date32(), Date32UpperBound - 1, TStringBuf("\x3f\x73\x2e\x03\x00\x00\x00\x00"sv));
+ addSimpleAndListCases(EWireType::Int64, Date32(), Date32LowerBound, TStringBuf("\xbf\x8c\xd1\xfc\xff\xff\xff\xff"sv));
+
+ addSimpleAndListCases(EWireType::Int32, Date32(), 0ll, TStringBuf("\x00\x00\x00\x00"sv));
+ addSimpleAndListCases(EWireType::Int32, Date32(), Date32UpperBound - 1, TStringBuf("\x3f\x73\x2e\x03"sv));
+ addSimpleAndListCases(EWireType::Int32, Date32(), Date32LowerBound, TStringBuf("\xbf\x8c\xd1\xfc"sv));
+
+ addSimpleAndListCases(EWireType::Int64, Datetime64(), 0ll, TStringBuf("\x00\x00\x00\x00\x00\x00\x00\x00"sv));
+ addSimpleAndListCases(EWireType::Int64, Datetime64(), Datetime64UpperBound - 1, TStringBuf("\xff\xdf\xf0\xbc\x31\x04\x00\x00"sv));
+ addSimpleAndListCases(EWireType::Int64, Datetime64(), Datetime64LowerBound, TStringBuf("\x80\xce\x0d\x43\xce\xfb\xff\xff"sv));
+
+ addSimpleAndListCases(EWireType::Int64, Timestamp64(), 0ll, TStringBuf("\x00\x00\x00\x00\x00\x00\x00\x00"sv));
+ addSimpleAndListCases(EWireType::Int64, Timestamp64(), Timestamp64UpperBound - 1, TStringBuf("\xff\xff\xf7\x75\x42\xf1\xff\x3f"sv));
+ addSimpleAndListCases(EWireType::Int64, Timestamp64(), Timestamp64LowerBound, TStringBuf("\x00\xa0\x30\x6c\xa9\x0e\x00\xc0"sv));
+
+ addSimpleAndListCases(EWireType::Int64, Interval64(), 0ll, TStringBuf("\x00\x00\x00\x00\x00\x00\x00\x00"sv));
+ addSimpleAndListCases(EWireType::Int64, Interval64(), Interval64UpperBound - 1, TStringBuf("\x00\x60\xc7\x09\x99\xe2\xff\x7f"sv));
+ addSimpleAndListCases(EWireType::Int64, Interval64(), -Interval64UpperBound + 1, TStringBuf("\x00\xa0\x38\xf6\x66\x1d\x00\x80"sv));
+
+ return result;
+ }
+
+ static const std::vector<ParamType> Cases;
+};
+
+const std::vector<TSkiffFormatSmallIntP::ParamType> TSkiffFormatSmallIntP::Cases = TSkiffFormatSmallIntP::GetCases();
+
+INSTANTIATE_TEST_SUITE_P(
+ Cases,
+ TSkiffFormatSmallIntP,
+ ::testing::ValuesIn(TSkiffFormatSmallIntP::Cases));
+
+TEST_P(TSkiffFormatSmallIntP, Test)
+{
+ const auto& [skiffValueSchema, logicalType, value, expectedSkiffData] = GetParam();
+
+ const auto nameTable = New<TNameTable>();
+
+ TStringStream actualSkiffData;
+ auto skiffTableSchema = CreateTupleSchema({
+ skiffValueSchema->SetName("column")
+ });
+ auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{
+ TColumnSchema("column", logicalType),
+ });
+ auto writer = CreateSkiffWriter(skiffTableSchema, nameTable, &actualSkiffData, {tableSchema});
+ Y_UNUSED(writer->Write({
+ MakeRow(nameTable, {{"column", value}})
+ }));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+ EXPECT_EQ(actualSkiffData.Str(), expectedSkiffData);
+
+ TCollectingValueConsumer rowCollector(nameTable);
+ auto parser = CreateParserForSkiff(skiffTableSchema, tableSchema, &rowCollector);
+ parser->Read(expectedSkiffData);
+ parser->Finish();
+ auto actualValue = rowCollector.GetRowValue(0, "column");
+
+ EXPECT_EQ(actualValue, TNamedValue("common", value).ToUnversionedValue(nameTable));
+}
+
+TEST(TSkiffWriter, TestBadSmallIntegers)
+{
+ using namespace NLogicalTypeShortcuts;
+ auto writeSkiffValue = [] (
+ std::shared_ptr<TSkiffSchema>&& typeSchema,
+ TLogicalTypePtr logicalType,
+ TNamedValue::TValue value)
+ {
+ TStringStream result;
+ auto skiffSchema = CreateTupleSchema({
+ typeSchema->SetName("column")
+ });
+ auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{
+ TColumnSchema("column", std::move(logicalType)),
+ });
+ auto nameTable = New<TNameTable>();
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &result, {tableSchema});
+ Y_UNUSED(writer->Write({
+ MakeRow(nameTable, {{"column", std::move(value)}})
+ }));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+ return result.Str();
+ };
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ writeSkiffValue(CreateSimpleTypeSchema(EWireType::Int8), Int64(), 128),
+ "is out of range for possible values");
+ EXPECT_THROW_WITH_SUBSTRING(
+ writeSkiffValue(CreateSimpleTypeSchema(EWireType::Int8), Int64(), -129),
+ "is out of range for possible values");
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ writeSkiffValue(CreateSimpleTypeSchema(EWireType::Int16), Int64(), 0x8000),
+ "is out of range for possible values");
+ EXPECT_THROW_WITH_SUBSTRING(
+ writeSkiffValue(CreateSimpleTypeSchema(EWireType::Int16), Int64(), -0x8001),
+ "is out of range for possible values");
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ writeSkiffValue(CreateSimpleTypeSchema(EWireType::Int32), Int64(), 0x80000000ll),
+ "is out of range for possible values");
+ EXPECT_THROW_WITH_SUBSTRING(
+ writeSkiffValue(CreateSimpleTypeSchema(EWireType::Int32), Int64(), -0x80000001ll),
+ "is out of range for possible values");
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ writeSkiffValue(CreateSimpleTypeSchema(EWireType::Uint8), Uint64(), 256ull),
+ "is out of range for possible values");
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ writeSkiffValue(CreateSimpleTypeSchema(EWireType::Uint16), Uint64(), 0x1FFFFull),
+ "is out of range for possible values");
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ writeSkiffValue(CreateSimpleTypeSchema(EWireType::Uint32), Uint64(), 0x100000000ull),
+ "is out of range for possible values");
+}
+
+class TSkiffFormatUuidTestP : public ::testing::TestWithParam<std::tuple<
+ TNameTablePtr,
+ TTableSchemaPtr,
+ std::shared_ptr<TSkiffSchema>,
+ std::vector<TUnversionedOwningRow>,
+ TString
+>>
+{
+public:
+ static std::vector<ParamType> GetCases()
+ {
+ using namespace NLogicalTypeShortcuts;
+
+ auto nameTable = New<TNameTable>();
+ const auto stringUuidValue = TStringBuf("\xee\x1f\x37\x70" "\xb9\x93\x64\xb5" "\xe4\xdf\xe9\x03" "\x67\x5c\x30\x62");
+ const auto uint128UuidValue = TStringBuf("\x62\x30\x5c\x67" "\x03\xe9\xdf\xe4" "\xb5\x64\x93\xb9" "\x70\x37\x1f\xee");
+
+ const auto requiredTableSchema = New<TTableSchema>(std::vector<TColumnSchema>{TColumnSchema("uuid", Uuid())});
+ const auto optionalTableSchema = New<TTableSchema>(std::vector<TColumnSchema>{TColumnSchema("uuid", Optional(Uuid()))});
+
+ const auto optionalUint128SkiffSchema = CreateTupleSchema({
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Uint128),
+ })->SetName("uuid"),
+ });
+
+ const auto requiredUint128SkiffSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Uint128)->SetName("uuid"),
+ });
+
+ const auto optionalStringSkiffSchema = CreateTupleSchema({
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::String32),
+ })->SetName("uuid"),
+ });
+
+ const auto requiredStringSkiffSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("uuid"),
+ });
+
+ std::vector<ParamType> result;
+
+ result.emplace_back(
+ nameTable,
+ requiredTableSchema,
+ requiredUint128SkiffSchema,
+ std::vector<TUnversionedOwningRow>{
+ MakeRow(nameTable, {{"uuid", stringUuidValue}}),
+ },
+ TString(2, '\0') + uint128UuidValue);
+
+ result.emplace_back(
+ nameTable,
+ optionalTableSchema,
+ requiredUint128SkiffSchema,
+ std::vector<TUnversionedOwningRow>{
+ MakeRow(nameTable, {{"uuid", stringUuidValue}}),
+ },
+ TString(2, '\0') + uint128UuidValue);
+
+ result.emplace_back(
+ nameTable,
+ requiredTableSchema,
+ optionalUint128SkiffSchema,
+ std::vector<TUnversionedOwningRow>{
+ MakeRow(nameTable, {{"uuid", stringUuidValue}}),
+ },
+ TString(2, '\0') + "\1" + uint128UuidValue);
+
+ result.emplace_back(
+ nameTable,
+ optionalTableSchema,
+ optionalUint128SkiffSchema,
+ std::vector<TUnversionedOwningRow>{
+ MakeRow(nameTable, {{"uuid", stringUuidValue}}),
+ },
+ TString(2, '\0') + "\1" + uint128UuidValue);
+
+ const TString uuidLen = TString(TStringBuf("\x10\x00\x00\x00"sv));
+
+ result.emplace_back(
+ nameTable,
+ requiredTableSchema,
+ requiredStringSkiffSchema,
+ std::vector<TUnversionedOwningRow>{
+ MakeRow(nameTable, {{"uuid", stringUuidValue}}),
+ },
+ TString(2, '\0') + uuidLen + stringUuidValue);
+
+ result.emplace_back(
+ nameTable,
+ optionalTableSchema,
+ requiredStringSkiffSchema,
+ std::vector<TUnversionedOwningRow>{
+ MakeRow(nameTable, {{"uuid", stringUuidValue}}),
+ },
+ TString(2, '\0') + uuidLen + stringUuidValue);
+
+ result.emplace_back(
+ nameTable,
+ requiredTableSchema,
+ optionalStringSkiffSchema,
+ std::vector<TUnversionedOwningRow>{
+ MakeRow(nameTable, {{"uuid", stringUuidValue}}),
+ },
+ TString(2, '\0') + "\1" + uuidLen + stringUuidValue);
+
+ result.emplace_back(
+ nameTable,
+ optionalTableSchema,
+ optionalStringSkiffSchema,
+ std::vector<TUnversionedOwningRow>{
+ MakeRow(nameTable, {{"uuid", stringUuidValue}}),
+ },
+ TString(2, '\0') + "\1" + uuidLen + stringUuidValue);
+
+ return result;
+ }
+
+ static const std::vector<ParamType> Cases;
+};
+
+const std::vector<TSkiffFormatUuidTestP::ParamType> TSkiffFormatUuidTestP::Cases = TSkiffFormatUuidTestP::GetCases();
+
+INSTANTIATE_TEST_SUITE_P(
+ Cases,
+ TSkiffFormatUuidTestP,
+ ::testing::ValuesIn(TSkiffFormatUuidTestP::Cases));
+
+TEST_P(TSkiffFormatUuidTestP, Test)
+{
+ const auto& [nameTable, tableSchema, skiffSchema, rows, skiffString] = GetParam();
+
+ TStringStream result;
+ std::vector<TUnversionedRow> nonOwningRows;
+ for (const auto& row : rows) {
+ nonOwningRows.emplace_back(row);
+ }
+ auto skiffWriter = CreateSkiffWriter(skiffSchema, nameTable, &result, {tableSchema});
+ Y_UNUSED(skiffWriter->Write(TRange(nonOwningRows)));
+ skiffWriter->Close().Get().ThrowOnError();
+ ASSERT_EQ(result.Str(), skiffString);
+
+ TCollectingValueConsumer rowCollector(nameTable);
+ auto requiredParser = CreateParserForSkiff(skiffSchema, tableSchema, &rowCollector);
+ requiredParser->Read(result.Str());
+ requiredParser->Finish();
+ ASSERT_EQ(rowCollector.GetRowList(), rows);
+}
+
+TEST(TSkiffFormatUuidTest, TestError)
+{
+ using namespace NLogicalTypeShortcuts;
+
+ auto nameTable = New<TNameTable>();
+ auto tableSchema = New<TTableSchema>(
+ std::vector<TColumnSchema>{TColumnSchema("uuid", Optional(Uuid()))});
+
+ auto skiffSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Uint128)->SetName("uuid"),
+ });
+
+ TStringStream result;
+ auto skiffWriter = CreateSkiffWriter(skiffSchema, nameTable, &result, {tableSchema});
+ Y_UNUSED(skiffWriter->Write({
+ MakeRow(nameTable, {{"uuid", nullptr}}),
+ }));
+ EXPECT_THROW_WITH_SUBSTRING(skiffWriter->Close().Get().ThrowOnError(),
+ "Unexpected type");
+
+}
+
+class TSkiffWriterSingular
+ : public ::testing::Test
+ , public ::testing::WithParamInterface<ESimpleLogicalValueType>
+{};
+
+INSTANTIATE_TEST_SUITE_P(
+ Singular,
+ TSkiffWriterSingular,
+ ::testing::Values(ESimpleLogicalValueType::Null, ESimpleLogicalValueType::Void));
+
+TEST_P(TSkiffWriterSingular, TestOptionalSingular)
+{
+ const auto singularType = GetParam();
+
+ auto skiffSchema = CreateTupleSchema({
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ })->SetName("opt_null"),
+ });
+
+ auto nameTable = New<TNameTable>();
+ const std::vector<TTableSchemaPtr> tableSchemas = {
+ New<TTableSchema>(std::vector{
+ TColumnSchema("opt_null", OptionalLogicalType(SimpleLogicalType(singularType))),
+ }),
+ };
+
+ TString result;
+ {
+ TStringOutput resultStream(result);
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, tableSchemas);
+ // Row 0
+ auto isReady = writer->Write({
+ MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+ {"opt_null", nullptr},
+ }).Get(),
+ });
+ if (!isReady) {
+ writer->GetReadyEvent().Get().ThrowOnError();
+ }
+ // Row 1
+ Y_UNUSED(writer->Write({
+ MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+ {"opt_null", EValueType::Composite, "[#]"},
+ }).Get(),
+ }));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+ }
+
+ TStringInput resultInput(result);
+ TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
+
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
+
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+
+ ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
+ checkedSkiffParser.ValidateFinished();
+}
+
+TEST(TSkiffWriter, TestRearrange)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Int64)->SetName("number"),
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::String32),
+ })->SetName("eng"),
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::String32),
+ })->SetName("rus"),
+ });
+ auto nameTable = New<TNameTable>();
+ TString result;
+ {
+ TStringOutput resultStream(result);
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()});
+
+ auto write = [&] (TUnversionedRow row) {
+ if (!writer->Write({row})) {
+ writer->GetReadyEvent().Get().ThrowOnError();
+ }
+ };
+
+ write(MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+ {"number", 1},
+ {"eng", "one"},
+ {"rus", nullptr},
+ }).Get());
+
+ write(MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+ {"eng", nullptr},
+ {"number", 2},
+ {"rus", "dva"},
+ }).Get());
+
+ write(MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+ {"rus", "tri"},
+ {"eng", "three"},
+ {"number", 3},
+ }).Get());
+
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+ }
+
+ TStringInput resultInput(result);
+ TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
+
+ // row 0
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "one");
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
+
+ // row 1
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 2);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "dva");
+
+ // row 2
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 3);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "three");
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "tri");
+
+ // end
+ ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
+ checkedSkiffParser.ValidateFinished();
+}
+
+TEST(TSkiffWriter, TestMissingRequiredField)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Int64)->SetName("number"),
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("eng"),
+ });
+ auto nameTable = New<TNameTable>();
+ TString result;
+ try {
+ TStringOutput resultStream(result);
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()});
+
+ Y_UNUSED(writer->Write({
+ MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+ {"number", 1},
+ }).Get()
+ }));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+ ADD_FAILURE();
+ } catch (const std::exception& e) {
+ EXPECT_THAT(e.what(), testing::HasSubstr("Unexpected type of \"eng\" column"));
+ }
+}
+
+TEST(TSkiffWriter, TestSparse)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateRepeatedVariant16Schema({
+ CreateSimpleTypeSchema(EWireType::Int64)->SetName("int64"),
+ CreateSimpleTypeSchema(EWireType::Uint64)->SetName("uint64"),
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("string32"),
+ })->SetName("$sparse_columns"),
+ });
+
+ auto nameTable = New<TNameTable>();
+ TString result;
+ TStringOutput resultStream(result);
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()});
+
+ auto write = [&] (TUnversionedRow row) {
+ if (!writer->Write({row})) {
+ writer->GetReadyEvent().Get().ThrowOnError();
+ }
+ };
+
+ write(MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+ {"int64", -1},
+ {"string32", "minus one"},
+ }).Get());
+
+ write(MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+ {"string32", "minus five"},
+ {"int64", -5},
+ }).Get());
+
+ write(MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+ {"uint64", 42u},
+ }).Get());
+
+ write(MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+ {"int64", -8},
+ {"uint64", nullptr},
+ {"string32", nullptr},
+ }).Get());
+
+ write(MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+ }).Get());
+
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TStringInput resultInput(result);
+ TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
+
+ // row 0
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), -1);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 2);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "minus one");
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), EndOfSequenceTag<ui16>());
+
+ // row 1
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 2);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "minus five");
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), -5);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), EndOfSequenceTag<ui16>());
+
+ // row 2
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseUint64(), 42u);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), EndOfSequenceTag<ui16>());
+
+ // row 3
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), -8);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), EndOfSequenceTag<ui16>());
+
+ // row 4
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), EndOfSequenceTag<ui16>());
+
+ // end
+ ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
+ checkedSkiffParser.ValidateFinished();
+}
+
+TEST(TSkiffWriter, TestMissingFields)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("value"),
+ });
+
+ try {
+ TStringStream resultStream;
+ auto nameTable = New<TNameTable>();
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()});
+
+ Y_UNUSED(writer->Write({
+ MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+ {"unknown_column", "four"},
+ }).Get(),
+ }));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+ ADD_FAILURE();
+ } catch (const std::exception& e) {
+ EXPECT_THAT(e.what(), testing::HasSubstr("Column \"unknown_column\" is not described by Skiff schema"));
+ }
+
+ try {
+ TStringStream resultStream;
+ auto nameTable = New<TNameTable>();
+ auto unknownColumnId = nameTable->RegisterName("unknown_column");
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, std::vector{New<TTableSchema>()});
+
+ ASSERT_TRUE(unknownColumnId < nameTable->GetId("value"));
+
+ Y_UNUSED(writer->Write({
+ MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+ {"unknown_column", "four"},
+ }).Get(),
+ }));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+ ADD_FAILURE();
+ } catch (const std::exception& e) {
+ EXPECT_THAT(e.what(), testing::HasSubstr("Column \"unknown_column\" is not described by Skiff schema"));
+ }
+}
+
+TEST(TSkiffWriter, TestOtherColumns)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Int64)
+ })->SetName("int64_column"),
+ CreateSimpleTypeSchema(EWireType::Yson32)->SetName("$other_columns"),
+ });
+
+ TStringStream resultStream;
+ auto nameTable = New<TNameTable>();
+ nameTable->RegisterName("string_column");
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()});
+
+ auto write = [&] (TUnversionedRow row) {
+ if (!writer->Write({row})) {
+ writer->GetReadyEvent().Get().ThrowOnError();
+ }
+ };
+
+ // Row 0.
+ write(MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+ {"string_column", "foo"},
+ }).Get());
+
+ // Row 1.
+ write(MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+ {"int64_column", 42},
+ }).Get());
+
+ // Row 2.
+ write(MakeRow(nameTable, {
+ {TString(TableIndexColumnName), 0},
+ {"other_string_column", "bar"},
+ }).Get());
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TStringInput resultInput(resultStream.Str());
+ TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
+
+ auto parseYson = [] (TCheckedSkiffParser* parser) {
+ auto yson = TString{parser->ParseYson32()};
+ return ConvertToYsonTextStringStable(ConvertToNode(TYsonString(yson)));
+ };
+
+ // row 0
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
+ ASSERT_EQ(parseYson(&checkedSkiffParser), "{\"string_column\"=\"foo\";}");
+
+ // row 1
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 42);
+ ASSERT_EQ(parseYson(&checkedSkiffParser), "{}");
+
+ // row 2
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
+ ASSERT_EQ(parseYson(&checkedSkiffParser), "{\"other_string_column\"=\"bar\";}");
+
+ // end
+ ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
+ checkedSkiffParser.ValidateFinished();
+}
+
+TEST(TSkiffWriter, TestKeySwitch)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("value"),
+ CreateSimpleTypeSchema(EWireType::Boolean)->SetName("$key_switch"),
+ });
+
+ TStringStream resultStream;
+ auto nameTable = New<TNameTable>();
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()}, 1);
+
+ auto write = [&] (TUnversionedRow row) {
+ if (!writer->Write({row})) {
+ writer->GetReadyEvent().Get().ThrowOnError();
+ }
+ };
+
+ // Row 0.
+ write(MakeRow(nameTable, {
+ {"value", "one"},
+ {TString(TableIndexColumnName), 0},
+ }).Get());
+ // Row 1.
+ write(MakeRow(nameTable, {
+ {"value", "one"},
+ {TString(TableIndexColumnName), 0},
+ }).Get());
+ // Row 2.
+ write(MakeRow(nameTable, {
+ {"value", "two"},
+ {TString(TableIndexColumnName), 0},
+ }).Get());
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TStringInput resultInput(resultStream.Str());
+ TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
+
+ TString buf;
+
+ // row 0
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "one");
+ ASSERT_EQ(checkedSkiffParser.ParseBoolean(), false);
+
+ // row 1
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "one");
+ ASSERT_EQ(checkedSkiffParser.ParseBoolean(), false);
+
+ // row 2
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "two");
+ ASSERT_EQ(checkedSkiffParser.ParseBoolean(), true);
+
+ // end
+ ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
+ checkedSkiffParser.ValidateFinished();
+}
+
+TEST(TSkiffWriter, TestEndOfStream)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("value"),
+ });
+
+ TStringStream resultStream;
+ auto nameTable = New<TNameTable>();
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()}, 1, true);
+
+ auto write = [&] (TUnversionedRow row) {
+ if (!writer->Write({row})) {
+ writer->GetReadyEvent().Get().ThrowOnError();
+ }
+ };
+
+ // Row 0.
+ write(MakeRow(nameTable, {
+ {"value", "zero"},
+ {TString(TableIndexColumnName), 0},
+ }).Get());
+ // Row 1.
+ write(MakeRow(nameTable, {
+ {"value", "one"},
+ {TString(TableIndexColumnName), 0},
+ }).Get());
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TStringInput resultInput(resultStream.Str());
+ TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
+
+ TString buf;
+
+ // Row 0.
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "zero");
+
+ // Row 1.
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "one");
+
+ // End of stream.
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0xffff);
+
+ // The End.
+ ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
+ checkedSkiffParser.ValidateFinished();
+}
+
+TEST(TSkiffWriter, TestRowRangeIndex)
+{
+ const auto rowAndRangeIndex = CreateTupleSchema({
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Int64),
+ })->SetName("$range_index"),
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Int64),
+ })->SetName("$row_index"),
+ });
+
+ struct TRow {
+ int TableIndex;
+ std::optional<int> RangeIndex;
+ std::optional<int> RowIndex;
+ };
+ auto generateUnversionedRow = [] (const TRow& row, const TNameTablePtr& nameTable) {
+ std::vector<TNamedValue> values = {
+ {TString(TableIndexColumnName), row.TableIndex},
+ };
+ if (row.RangeIndex) {
+ values.emplace_back(TString(RangeIndexColumnName), *row.RangeIndex);
+ }
+ if (row.RowIndex) {
+ values.emplace_back(TString(RowIndexColumnName), *row.RowIndex);
+ }
+ return MakeRow(nameTable, values);
+ };
+
+ auto skiffWrite = [generateUnversionedRow] (const std::vector<TRow>& rows, const std::shared_ptr<TSkiffSchema>& skiffSchema) {
+ std::vector<TTableSchemaPtr> tableSchemas;
+ {
+ THashSet<int> tableIndices;
+ for (const auto& row : rows) {
+ tableIndices.insert(row.TableIndex);
+ }
+ tableSchemas.assign(tableIndices.size(), New<TTableSchema>());
+ }
+
+
+ TStringStream resultStream;
+ auto nameTable = New<TNameTable>();
+ auto writer = CreateSkiffWriter(
+ skiffSchema,
+ nameTable,
+ &resultStream,
+ tableSchemas);
+
+ for (const auto& row : rows) {
+ if (!writer->Write({generateUnversionedRow(row, nameTable)})) {
+ writer->GetReadyEvent().Get().ThrowOnError();
+ }
+ }
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ return HexEncode(resultStream.Str());
+ };
+
+ EXPECT_STREQ(
+ skiffWrite({
+ {0, 0, 0},
+ {0, 0, 1},
+ {0, 0, 2},
+ }, rowAndRangeIndex).data(),
+
+ "0000" "01""00000000""00000000" "01""00000000""00000000"
+ "0000" "00" "00"
+ "0000" "00" "00");
+
+ EXPECT_STREQ(
+ skiffWrite({
+ {0, 0, 0},
+ {0, 0, 1},
+ {0, 0, 3},
+ }, rowAndRangeIndex).data(),
+
+ "0000" "01""00000000""00000000" "01""00000000""00000000"
+ "0000" "00" "00"
+ "0000" "00" "01""03000000""00000000");
+
+ EXPECT_STREQ(
+ skiffWrite({
+ {0, 0, 0},
+ {0, 0, 1},
+ {0, 1, 2},
+ {0, 1, 3},
+ }, rowAndRangeIndex).data(),
+
+ "0000" "01""00000000""00000000" "01""00000000""00000000"
+ "0000" "00" "00"
+ "0000" "01""01000000""00000000" "01""02000000""00000000"
+ "0000" "00" "00");
+
+ EXPECT_THROW_WITH_SUBSTRING(skiffWrite({{0, 0, {}}}, rowAndRangeIndex), "index requested but reader did not return it");
+ EXPECT_THROW_WITH_SUBSTRING(skiffWrite({{0, {}, 0}}, rowAndRangeIndex), "index requested but reader did not return it");
+
+ const auto rowAndRangeIndexAllowMissing = CreateTupleSchema({
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Int64),
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ })->SetName("$range_index"),
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Int64),
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ })->SetName("$row_index"),
+ });
+
+ EXPECT_STREQ(
+ skiffWrite({
+ {0, 0, 0},
+ {0, 0, 1},
+ {0, 0, 2},
+ }, rowAndRangeIndexAllowMissing).data(),
+
+ "0000" "01""00000000""00000000" "01""00000000""00000000"
+ "0000" "00" "00"
+ "0000" "00" "00");
+
+ EXPECT_STREQ(
+ skiffWrite({
+ {0, 0, 0},
+ {0, 0, 1},
+ {0, 0, 3},
+ }, rowAndRangeIndexAllowMissing).data(),
+
+ "0000" "01""00000000""00000000" "01""00000000""00000000"
+ "0000" "00" "00"
+ "0000" "00" "01""03000000""00000000");
+
+ EXPECT_STREQ(
+ skiffWrite({
+ {0, 0, 0},
+ {0, 0, 1},
+ {0, 1, 2},
+ {0, 1, 3},
+ }, rowAndRangeIndexAllowMissing).data(),
+
+ "0000" "01""00000000""00000000" "01""00000000""00000000"
+ "0000" "00" "00"
+ "0000" "01""01000000""00000000" "01""02000000""00000000"
+ "0000" "00" "00");
+
+ EXPECT_STREQ(
+ skiffWrite({
+ {0, {}, {}},
+ {0, {}, {}},
+ {0, {}, {}},
+ {0, {}, {}},
+ }, rowAndRangeIndexAllowMissing).data(),
+
+ "0000" "02" "02"
+ "0000" "02" "02"
+ "0000" "02" "02"
+ "0000" "02" "02");
+
+ EXPECT_STREQ(
+ skiffWrite({
+ {0, {}, 0},
+ {0, {}, 1},
+ {0, {}, 3},
+ {0, {}, 4},
+ }, rowAndRangeIndexAllowMissing).data(),
+
+ "0000" "02" "01""00000000""00000000"
+ "0000" "02" "00"
+ "0000" "02" "01""03000000""00000000"
+ "0000" "02" "00");
+
+ EXPECT_STREQ(
+ skiffWrite({
+ {0, 0, {}},
+ {0, 0, {}},
+ {0, 1, {}},
+ {0, 1, {}},
+ }, rowAndRangeIndexAllowMissing).data(),
+
+ "0000" "01""00000000""00000000" "02"
+ "0000" "00" "02"
+ "0000" "01""01000000""00000000" "02"
+ "0000" "00" "02");
+}
+
+TEST(TSkiffWriter, TestRowIndexOnlyOrRangeIndexOnly)
+{
+ std::string columnNameList[] = {
+ RowIndexColumnName,
+ RangeIndexColumnName,
+ };
+
+ for (const auto& columnName : columnNameList) {
+ auto skiffSchema = CreateTupleSchema({
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Int64),
+ })->SetName(TString(columnName)),
+ });
+
+ TStringStream resultStream;
+ auto nameTable = New<TNameTable>();
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()}, 1);
+
+ // Row 0.
+ Y_UNUSED(writer->Write({
+ MakeRow(nameTable, {
+ {TString(columnName), 0},
+ }).Get(),
+ }));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TStringInput resultInput(resultStream.Str());
+ TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
+
+ // row 0
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 0);
+
+ ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
+ checkedSkiffParser.ValidateFinished();
+ }
+}
+
+TEST(TSkiffWriter, TestComplexType)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("name"),
+ CreateRepeatedVariant8Schema({
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Int64)->SetName("x"),
+ CreateSimpleTypeSchema(EWireType::Int64)->SetName("y"),
+ })
+ })->SetName("points")
+ })->SetName("value"),
+ });
+
+ {
+ TStringStream resultStream;
+ auto nameTable = New<TNameTable>();
+ auto tableSchema = New<TTableSchema>(std::vector{
+ TColumnSchema("value", StructLogicalType({
+ {"name", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {
+ "points",
+ ListLogicalType(
+ StructLogicalType({
+ {"x", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
+ {"y", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
+ }))
+ }
+ })),
+ });
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, std::vector{tableSchema});
+
+ // Row 0.
+ Y_UNUSED(writer->Write({
+ MakeRow(nameTable, {
+ {"value", EValueType::Composite, "[foo;[[0; 1];[2;3]]]"},
+ {TString(TableIndexColumnName), 0},
+ }).Get(),
+ }));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TStringInput resultInput(resultStream.Str());
+ TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
+
+ // row 0
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "foo");
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 2);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 3);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), EndOfSequenceTag<ui8>());
+
+ ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
+ checkedSkiffParser.ValidateFinished();
+ }
+}
+
+TEST(TSkiffWriter, TestEmptyComplexType)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("name"),
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("value"),
+ })
+ })->SetName("value"),
+ });
+
+ {
+ TStringStream resultStream;
+ auto nameTable = New<TNameTable>();
+ auto tableSchema = New<TTableSchema>(std::vector{
+ TColumnSchema("value", OptionalLogicalType(
+ StructLogicalType({
+ {"name", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"value", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ }))),
+ });
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, std::vector{tableSchema});
+
+ // Row 0.
+ Y_UNUSED(writer->Write({
+ MakeRow(nameTable, {
+ {"value", nullptr},
+ {TString(TableIndexColumnName), 0},
+ }).Get(),
+ }));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TStringInput resultInput(resultStream.Str());
+ TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
+
+ // row 0
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
+
+ ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
+ checkedSkiffParser.ValidateFinished();
+ }
+}
+
+TEST(TSkiffWriter, TestSparseComplexType)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateRepeatedVariant16Schema({
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("name"),
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("value"),
+ })->SetName("value"),
+ })->SetName("$sparse_columns"),
+ });
+
+ {
+ TStringStream resultStream;
+ auto nameTable = New<TNameTable>();
+ auto tableSchema = New<TTableSchema>(std::vector{
+ TColumnSchema("value", OptionalLogicalType(
+ StructLogicalType({
+ {"name", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"value", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ }))),
+ });
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, std::vector{tableSchema});
+
+ // Row 0.
+ Y_UNUSED(writer->Write({
+ MakeRow(nameTable, {
+ {"value", EValueType::Composite, "[foo;bar;]"},
+ {TString(TableIndexColumnName), 0},
+ }).Get(),
+ }));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TStringInput resultInput(resultStream.Str());
+ TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
+
+ // row 0
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "foo");
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "bar");
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), EndOfSequenceTag<ui16>());
+
+ ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
+ checkedSkiffParser.ValidateFinished();
+ }
+}
+
+TEST(TSkiffWriter, TestSparseComplexTypeWithExtraOptional)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateRepeatedVariant16Schema({
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("name"),
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("value"),
+ })
+ })->SetName("value"),
+ })->SetName("$sparse_columns"),
+ });
+
+ TStringStream resultStream;
+ auto nameTable = New<TNameTable>();
+ auto tableSchema = New<TTableSchema>(std::vector{
+ TColumnSchema("value", OptionalLogicalType(
+ StructLogicalType({
+ {"name", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"value", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ }))),
+ });
+
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, std::vector{tableSchema});
+
+ // Row 0.
+ Y_UNUSED(writer->Write({
+ MakeRow(nameTable, {
+ {"value", EValueType::Composite, "[foo;bar;]"},
+ {TString(TableIndexColumnName), 0},
+ }).Get(),
+ }));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TStringInput resultInput(resultStream.Str());
+ TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
+
+ // row 0
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "foo");
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "bar");
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), EndOfSequenceTag<ui16>());
+
+ ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
+ checkedSkiffParser.ValidateFinished();
+}
+
+TEST(TSkiffWriter, TestBadWireTypeForSimpleColumn)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateVariant8Schema({
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Yson32),
+ })
+ })->SetName("opt_yson32"),
+ });
+ auto nameTable = New<TNameTable>();
+ TStringStream resultStream;
+ EXPECT_THROW_WITH_SUBSTRING(
+ CreateSkiffWriter(skiffSchema, nameTable, &resultStream, std::vector{New<TTableSchema>()}),
+ "cannot be represented with Skiff schema");
+}
+
+TEST(TSkiffWriter, TestMissingComplexColumn)
+{
+ auto optionalSkiffSchema = CreateTupleSchema({
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateRepeatedVariant8Schema({CreateSimpleTypeSchema(EWireType::Int64)}),
+ })->SetName("opt_list"),
+ });
+ auto requiredSkiffSchema = CreateTupleSchema({
+ CreateRepeatedVariant8Schema({CreateSimpleTypeSchema(EWireType::Int64)})->SetName("opt_list"),
+ });
+
+ { // Non optional Skiff schema
+ auto nameTable = New<TNameTable>();
+ EXPECT_THROW_WITH_SUBSTRING(
+ CreateSkiffWriter(requiredSkiffSchema, nameTable, &Cnull, std::vector{New<TTableSchema>()}),
+ "cannot be represented with Skiff schema");
+ }
+
+ {
+ auto nameTable = New<TNameTable>();
+ TStringStream resultStream;
+ auto writer = CreateSkiffWriter(optionalSkiffSchema, nameTable, &resultStream, std::vector{New<TTableSchema>()});
+ Y_UNUSED(writer->Write({
+ MakeRow(nameTable, { }).Get(),
+ MakeRow(nameTable, {
+ {"opt_list", nullptr},
+ }).Get(),
+ MakeRow(nameTable, { }).Get(),
+ }));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ EXPECT_EQ(HexEncode(resultStream.Str()), "0000" "00" "0000" "00" "0000" "00");
+ }
+}
+
+TEST(TSkiffWriter, TestSkippedFields)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Int64)->SetName("number"),
+ CreateSimpleTypeSchema(EWireType::Nothing)->SetName("string"),
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Int64),
+ })->SetName(TString(RangeIndexColumnName)),
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Int64),
+ })->SetName(TString(RowIndexColumnName)),
+ CreateSimpleTypeSchema(EWireType::Double)->SetName("double"),
+ });
+ auto tableSchema = New<TTableSchema>(std::vector{
+ TColumnSchema("number", EValueType::Int64),
+ TColumnSchema("string", EValueType::String),
+ TColumnSchema("double", EValueType::Double),
+ });
+
+ auto nameTable = New<TNameTable>();
+ TString result;
+ {
+ TStringOutput resultStream(result);
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {tableSchema});
+
+ if (!writer->Write({
+ MakeRow(nameTable, {
+ {"number", 1},
+ {"string", "hello"},
+ {TString(RangeIndexColumnName), 0},
+ {TString(RowIndexColumnName), 0},
+ {"double", 1.5},
+ }).Get()
+ }))
+ {
+ writer->GetReadyEvent().Get().ThrowOnError();
+ }
+ Y_UNUSED(writer->Write({
+ MakeRow(nameTable, {
+ {"number", 1},
+ {TString(RangeIndexColumnName), 5},
+ {TString(RowIndexColumnName), 1},
+ {"double", 2.5},
+ }).Get()
+ }));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TStringInput resultInput(result);
+ TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
+
+ // row 0
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseDouble(), 1.5);
+ // row 1
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 5);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseDouble(), 2.5);
+ ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
+ checkedSkiffParser.ValidateFinished();
+ }
+
+}
+
+TEST(TSkiffWriter, TestSkippedFieldsOutOfRange)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Nothing)->SetName("string"),
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Int64),
+ })->SetName(TString(RangeIndexColumnName)),
+ });
+ auto tableSchema = New<TTableSchema>(std::vector{
+ TColumnSchema("string", EValueType::String),
+ });
+
+ auto nameTable = New<TNameTable>();
+ TString result;
+ {
+ TStringOutput resultStream(result);
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {tableSchema});
+
+ if (!writer->Write({
+ MakeRow(nameTable, {
+ {"string", "hello"},
+ {TString(RangeIndexColumnName), 0},
+ }).Get()
+ }))
+ {
+ writer->GetReadyEvent().Get().ThrowOnError();
+ }
+ Y_UNUSED(writer->Write({
+ MakeRow(nameTable, {
+ {TString(RangeIndexColumnName), 5},
+ }).Get()
+ }));
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TStringInput resultInput(result);
+ TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
+
+ // row 0
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 0);
+ // row 1
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 5);
+ ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
+ checkedSkiffParser.ValidateFinished();
+ }
+
+}
+
+TEST(TSkiffWriter, TestSkippedFieldsAndKeySwitch)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("value"),
+ CreateSimpleTypeSchema(EWireType::Nothing)->SetName("skipped"),
+ CreateSimpleTypeSchema(EWireType::Boolean)->SetName("$key_switch"),
+ CreateSimpleTypeSchema(EWireType::Int64)->SetName("value1"),
+ });
+ TStringStream resultStream;
+ auto nameTable = New<TNameTable>();
+ auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()}, 1);
+
+ auto write = [&] (TUnversionedRow row) {
+ if (!writer->Write({row})) {
+ writer->GetReadyEvent().Get().ThrowOnError();
+ }
+ };
+
+ // Row 0.
+ write(MakeRow(nameTable, {
+ {"value", "one"},
+ {"value1", 0},
+ {TString(TableIndexColumnName), 0},
+ }).Get());
+ // Row 1.
+ write(MakeRow(nameTable, {
+ {"value", "one"},
+ {"value1", 1},
+ {TString(TableIndexColumnName), 0},
+ }).Get());
+ // Row 2.
+ write(MakeRow(nameTable, {
+ {"value", "two"},
+ {"value1", 2},
+ {TString(TableIndexColumnName), 0},
+ }).Get());
+ writer->Close()
+ .Get()
+ .ThrowOnError();
+
+ TStringInput resultInput(resultStream.Str());
+ TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
+
+ TString buf;
+
+ // row 0
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "one");
+ ASSERT_EQ(checkedSkiffParser.ParseBoolean(), false);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 0);
+
+ // row 1
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "one");
+ ASSERT_EQ(checkedSkiffParser.ParseBoolean(), false);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 1);
+
+ // row 2
+ ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
+ ASSERT_EQ(checkedSkiffParser.ParseString32(), "two");
+ ASSERT_EQ(checkedSkiffParser.ParseBoolean(), true);
+ ASSERT_EQ(checkedSkiffParser.ParseInt64(), 2);
+
+ // end
+ ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
+ checkedSkiffParser.ValidateFinished();
+
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TSkiffParser, Simple)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Int64)->SetName("int64"),
+ CreateSimpleTypeSchema(EWireType::Uint64)->SetName("uint64"),
+ CreateSimpleTypeSchema(EWireType::Double)->SetName("double"),
+ CreateSimpleTypeSchema(EWireType::Boolean)->SetName("boolean"),
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("string32"),
+ CreateSimpleTypeSchema(EWireType::Nothing)->SetName("null"),
+
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Int64),
+ })->SetName("opt_int64"),
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Uint64),
+ })->SetName("opt_uint64"),
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Double),
+ })->SetName("opt_double"),
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Boolean),
+ })->SetName("opt_boolean"),
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::String32),
+ })->SetName("opt_string32"),
+ });
+
+ TCollectingValueConsumer collectedRows;
+ auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
+
+ TStringStream dataStream;
+ TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
+
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteInt64(-1);
+ checkedSkiffWriter.WriteUint64(2);
+ checkedSkiffWriter.WriteDouble(3.0);
+ checkedSkiffWriter.WriteBoolean(true);
+ checkedSkiffWriter.WriteString32("foo");
+
+ checkedSkiffWriter.WriteVariant8Tag(0);
+ checkedSkiffWriter.WriteVariant8Tag(0);
+ checkedSkiffWriter.WriteVariant8Tag(0);
+ checkedSkiffWriter.WriteVariant8Tag(0);
+ checkedSkiffWriter.WriteVariant8Tag(0);
+
+ checkedSkiffWriter.Finish();
+
+ parser->Read(dataStream.Str());
+ parser->Finish();
+
+ ASSERT_EQ(static_cast<int>(collectedRows.Size()), 1);
+
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(0, "int64")), -1);
+ ASSERT_EQ(GetUint64(collectedRows.GetRowValue(0, "uint64")), 2u);
+ ASSERT_EQ(GetDouble(collectedRows.GetRowValue(0, "double")), 3.0);
+ ASSERT_EQ(GetBoolean(collectedRows.GetRowValue(0, "boolean")), true);
+ ASSERT_EQ(GetString(collectedRows.GetRowValue(0, "string32")), "foo");
+ ASSERT_EQ(IsNull(collectedRows.GetRowValue(0, "null")), true);
+
+ ASSERT_EQ(IsNull(collectedRows.GetRowValue(0, "opt_int64")), true);
+ ASSERT_EQ(IsNull(collectedRows.GetRowValue(0, "opt_uint64")), true);
+ ASSERT_EQ(IsNull(collectedRows.GetRowValue(0, "opt_double")), true);
+ ASSERT_EQ(IsNull(collectedRows.GetRowValue(0, "opt_boolean")), true);
+ ASSERT_EQ(IsNull(collectedRows.GetRowValue(0, "opt_string32")), true);
+}
+
+TEST(TSkiffParser, TestOptionalNull)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ })->SetName("opt_null"),
+ });
+ auto nameTable = New<TNameTable>();
+
+ {
+ TCollectingValueConsumer collectedRows;
+ EXPECT_THROW_WITH_SUBSTRING(
+ CreateParserForSkiff(skiffSchema, &collectedRows),
+ "cannot be represented with Skiff schema");
+ }
+
+ auto tableSchema = New<TTableSchema>(std::vector{
+ TColumnSchema("opt_null", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Null))),
+ });
+
+ TCollectingValueConsumer collectedRows(tableSchema);
+ auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
+
+ TStringStream dataStream;
+ TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
+
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteVariant8Tag(0);
+
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteVariant8Tag(1);
+
+ checkedSkiffWriter.Finish();
+
+ parser->Read(dataStream.Str());
+ parser->Finish();
+
+ ASSERT_EQ(static_cast<int>(collectedRows.Size()), 2);
+
+ ASSERT_EQ(collectedRows.GetRowValue(0, "opt_null").Type, EValueType::Null);
+}
+
+TEST(TSkiffParser, TestSparse)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateRepeatedVariant16Schema({
+ CreateSimpleTypeSchema(EWireType::Int64)->SetName("int64"),
+ CreateSimpleTypeSchema(EWireType::Uint64)->SetName("uint64"),
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("string32"),
+ })->SetName("$sparse_columns"),
+ });
+
+ TCollectingValueConsumer collectedRows;
+ auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
+
+ TStringStream dataStream;
+ TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
+
+ // row 1
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ // sparse fields begin
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteInt64(-42);
+ checkedSkiffWriter.WriteVariant16Tag(1);
+ checkedSkiffWriter.WriteUint64(54);
+ checkedSkiffWriter.WriteVariant16Tag(EndOfSequenceTag<ui16>());
+
+ // row 2
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ // sparse fields begin
+ checkedSkiffWriter.WriteVariant16Tag(2);
+ checkedSkiffWriter.WriteString32("foo");
+ checkedSkiffWriter.WriteVariant16Tag(EndOfSequenceTag<ui16>());
+
+ checkedSkiffWriter.Finish();
+
+ parser->Read(dataStream.Str());
+ parser->Finish();
+
+ ASSERT_EQ(static_cast<int>(collectedRows.Size()), 2);
+
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(0, "int64")), -42);
+ ASSERT_EQ(GetUint64(collectedRows.GetRowValue(0, "uint64")), 54u);
+ ASSERT_FALSE(collectedRows.FindRowValue(0, "string32"));
+
+ ASSERT_FALSE(collectedRows.FindRowValue(1, "int64"));
+ ASSERT_FALSE(collectedRows.FindRowValue(1, "uint64"));
+ ASSERT_EQ(GetString(collectedRows.GetRowValue(1, "string32")), "foo");
+}
+
+TEST(TSkiffParser, TestYsonWireType)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Yson32)->SetName("yson"),
+ });
+
+ TCollectingValueConsumer collectedRows;
+ auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
+
+ TStringStream dataStream;
+ TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
+
+ // Row 0.
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteYson32("-42");
+
+ // Row 1.
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteYson32("42u");
+
+ // Row 2.
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteYson32("\"foobar\"");
+
+ // Row 3.
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteYson32("%true");
+
+ // Row 4.
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteYson32("{foo=bar}");
+
+ // Row 5.
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteYson32("#");
+
+ checkedSkiffWriter.Finish();
+
+ parser->Read(dataStream.Str());
+ parser->Finish();
+
+ ASSERT_EQ(static_cast<int>(collectedRows.Size()), 6);
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(0, "yson")), -42);
+ ASSERT_EQ(GetUint64(collectedRows.GetRowValue(1, "yson")), 42u);
+ ASSERT_EQ(GetString(collectedRows.GetRowValue(2, "yson")), "foobar");
+ ASSERT_EQ(GetBoolean(collectedRows.GetRowValue(3, "yson")), true);
+ ASSERT_EQ(GetAny(collectedRows.GetRowValue(4, "yson"))->AsMap()->GetChildOrThrow("foo")->AsString()->GetValue(), "bar");
+ ASSERT_EQ(IsNull(collectedRows.GetRowValue(5, "yson")), true);
+}
+
+TEST(TSkiffParser, TestBadYsonWireType)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Yson32)->SetName("yson"),
+ });
+
+ auto parseYsonUsingSkiff = [&] (TStringBuf ysonValue) {
+ TCollectingValueConsumer collectedRows;
+ auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
+ TStringStream dataStream;
+ ASSERT_NO_THROW({
+ TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
+
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteYson32(ysonValue);
+
+ checkedSkiffWriter.Finish();
+ });
+
+ parser->Read(dataStream.Str());
+ parser->Finish();
+ };
+
+ try {
+ parseYsonUsingSkiff("[42");
+ } catch (const std::exception& e) {
+ EXPECT_THAT(e.what(), testing::HasSubstr("Premature end of stream"));
+ }
+
+ try {
+ parseYsonUsingSkiff("<foo=bar>42");
+ } catch (const std::exception& e) {
+ EXPECT_THAT(e.what(), testing::HasSubstr("Table values cannot have top-level attributes"));
+ }
+}
+
+TEST(TSkiffParser, TestSpecialColumns)
+{
+ std::shared_ptr<TSkiffSchema> skiffSchemaList[] = {
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Yson32)->SetName("yson"),
+ CreateSimpleTypeSchema(EWireType::Boolean)->SetName("$key_switch"),
+ }),
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Yson32)->SetName("yson"),
+ CreateSimpleTypeSchema(EWireType::Boolean)->SetName("$row_switch"),
+ }),
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Yson32)->SetName("yson"),
+ CreateSimpleTypeSchema(EWireType::Boolean)->SetName("$range_switch"),
+ }),
+ };
+
+ for (const auto& skiffSchema : skiffSchemaList) {
+ try {
+ TCollectingValueConsumer collectedRows;
+ auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
+ } catch (std::exception& e) {
+ EXPECT_THAT(e.what(), testing::HasSubstr("Skiff parser does not support \"$key_switch\""));
+ }
+ }
+}
+
+TEST(TSkiffParser, TestOtherColumns)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("name"),
+ CreateSimpleTypeSchema(EWireType::Yson32)->SetName("$other_columns"),
+ });
+
+ TCollectingValueConsumer collectedRows;
+ auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
+
+ TStringStream dataStream;
+ TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
+
+ // Row 0.
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteString32("row_0");
+ checkedSkiffWriter.WriteYson32("{foo=-42;}");
+
+ // Row 1.
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteString32("row_1");
+ checkedSkiffWriter.WriteYson32("{bar=qux;baz={boolean=%false;};}");
+
+ // Row 2.
+ checkedSkiffWriter.Finish();
+
+ parser->Read(dataStream.Str());
+ parser->Finish();
+
+ ASSERT_EQ(static_cast<int>(collectedRows.Size()), 2);
+ ASSERT_EQ(GetString(collectedRows.GetRowValue(0, "name")), "row_0");
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(0, "foo")), -42);
+
+ ASSERT_EQ(GetString(collectedRows.GetRowValue(1, "name")), "row_1");
+ ASSERT_EQ(GetString(collectedRows.GetRowValue(1, "bar")), "qux");
+ ASSERT_EQ(ConvertToYsonTextStringStable(GetAny(collectedRows.GetRowValue(1, "baz"))), "{\"boolean\"=%false;}");
+}
+
+TEST(TSkiffParser, TestComplexColumn)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("key"),
+ CreateSimpleTypeSchema(EWireType::Int64)->SetName("value"),
+ })->SetName("column")
+ });
+
+ TCollectingValueConsumer collectedRows(
+ New<TTableSchema>(std::vector{
+ TColumnSchema("column", NTableClient::StructLogicalType({
+ {"key", NTableClient::SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"value", NTableClient::SimpleLogicalType(ESimpleLogicalValueType::Int64)}
+ }))
+ }));
+ auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
+
+ TStringStream dataStream;
+ TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
+
+ // Row 0.
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteString32("row_0");
+ checkedSkiffWriter.WriteInt64(42);
+
+ checkedSkiffWriter.Finish();
+
+ parser->Read(dataStream.Str());
+ parser->Finish();
+
+ ASSERT_EQ(static_cast<int>(collectedRows.Size()), 1);
+ ASSERT_EQ(ConvertToYsonTextStringStable(GetComposite(collectedRows.GetRowValue(0, "column"))), "[\"row_0\";42;]");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TSkiffParser, TestEmptyInput)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("column"),
+ });
+
+ TCollectingValueConsumer collectedRows;
+
+ {
+ auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
+ parser->Finish();
+ ASSERT_EQ(static_cast<int>(collectedRows.Size()), 0);
+ }
+ {
+ auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
+ parser->Read("");
+ parser->Finish();
+ ASSERT_EQ(static_cast<int>(collectedRows.Size()), 0);
+ }
+ {
+ auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
+ parser->Read("");
+ parser->Read("");
+ parser->Finish();
+ ASSERT_EQ(static_cast<int>(collectedRows.Size()), 0);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TSkiffParser, ColumnIds)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Int64)->SetName("field_a"),
+ CreateSimpleTypeSchema(EWireType::Uint64)->SetName("field_b")
+ });
+
+ TCollectingValueConsumer collectedRows;
+ collectedRows.GetNameTable()->GetIdOrRegisterName("field_b");
+ auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
+
+ TStringStream dataStream;
+ TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
+
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteInt64(-1);
+ checkedSkiffWriter.WriteUint64(2);
+
+ checkedSkiffWriter.Finish();
+
+ parser->Read(dataStream.Str());
+ parser->Finish();
+
+ ASSERT_EQ(static_cast<int>(collectedRows.Size()), 1);
+
+ ASSERT_EQ(GetInt64(collectedRows.GetRowValue(0, "field_a")), -1);
+ ASSERT_EQ(GetUint64(collectedRows.GetRowValue(0, "field_b")), 2u);
+}
+
+TEST(TSkiffParser, TestSparseComplexType)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateRepeatedVariant16Schema({
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("name"),
+ CreateSimpleTypeSchema(EWireType::Int64)->SetName("value"),
+ })->SetName("value"),
+ })->SetName("$sparse_columns"),
+ });
+
+ TCollectingValueConsumer collectedRows(
+ New<TTableSchema>(std::vector{
+ TColumnSchema("value", OptionalLogicalType(
+ StructLogicalType({
+ {"name", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"value", SimpleLogicalType(ESimpleLogicalValueType::Int64)}
+ })))
+ }));
+ auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
+
+ TStringStream dataStream;
+ TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
+
+ // Row 0.
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteString32("row_0");
+ checkedSkiffWriter.WriteInt64(10);
+ checkedSkiffWriter.WriteVariant16Tag(EndOfSequenceTag<ui16>());
+
+ // Row 1.
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteVariant16Tag(EndOfSequenceTag<ui16>());
+
+ checkedSkiffWriter.Finish();
+
+ parser->Read(dataStream.Str());
+ parser->Finish();
+
+ ASSERT_EQ(static_cast<int>(collectedRows.Size()), 2);
+ EXPECT_EQ(ConvertToYsonTextStringStable(GetComposite(collectedRows.GetRowValue(0, "value"))), "[\"row_0\";10;]");
+ EXPECT_FALSE(collectedRows.FindRowValue(1, "value"));
+}
+
+TEST(TSkiffParser, TestSparseComplexTypeWithExtraOptional)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateRepeatedVariant16Schema({
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("key"),
+ CreateSimpleTypeSchema(EWireType::Int64)->SetName("value"),
+ })
+ })->SetName("column"),
+ })->SetName("$sparse_columns"),
+ });
+
+ TCollectingValueConsumer collectedRows(
+ New<TTableSchema>(std::vector{
+ TColumnSchema("column", OptionalLogicalType(
+ StructLogicalType({
+ {"key", NTableClient::SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"value", NTableClient::SimpleLogicalType(ESimpleLogicalValueType::Int64)}
+ })))
+ }));
+ auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
+
+ TStringStream dataStream;
+ TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
+
+ // Row 0.
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteVariant8Tag(1);
+ checkedSkiffWriter.WriteString32("row_0");
+ checkedSkiffWriter.WriteInt64(42);
+ checkedSkiffWriter.WriteVariant16Tag(EndOfSequenceTag<ui16>());
+
+ // Row 1.
+ checkedSkiffWriter.WriteVariant16Tag(0);
+ checkedSkiffWriter.WriteVariant16Tag(EndOfSequenceTag<ui16>());
+
+ checkedSkiffWriter.Finish();
+
+ parser->Read(dataStream.Str());
+ parser->Finish();
+
+ ASSERT_EQ(static_cast<int>(collectedRows.Size()), 2);
+ ASSERT_EQ(ConvertToYsonTextStringStable(GetComposite(collectedRows.GetRowValue(0, "column"))), "[\"row_0\";42;]");
+ ASSERT_FALSE(collectedRows.FindRowValue(1, "column"));
+}
+
+
+TEST(TSkiffParser, TestBadWireTypeForSimpleColumn)
+{
+ auto skiffSchema = CreateTupleSchema({
+ CreateVariant8Schema({
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Yson32),
+ })
+ })->SetName("opt_yson32"),
+ });
+
+ TCollectingValueConsumer collectedRows;
+ EXPECT_THROW_WITH_SUBSTRING(
+ CreateParserForSkiff(skiffSchema, &collectedRows),
+ "cannot be represented with Skiff schema");
+}
+
+TEST(TSkiffParser, TestEmptyColumns)
+{
+ auto skiffSchema = CreateTupleSchema({});
+ TCollectingValueConsumer collectedRows;
+ auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
+
+ parser->Read(TStringBuf("\x00\x00\x00\x00"sv));
+ parser->Finish();
+
+ ASSERT_EQ(static_cast<int>(collectedRows.Size()), 2);
+}
+
+TEST(TSkiffFormat, TestTimestamp)
+{
+ using namespace NLogicalTypeShortcuts;
+ CHECK_BIDIRECTIONAL_CONVERSION(Timestamp(), CreateSimpleTypeSchema(EWireType::Uint64), 42ull, "2A000000" "00000000");
+ CHECK_BIDIRECTIONAL_CONVERSION(Interval(), CreateSimpleTypeSchema(EWireType::Int64), 42, "2A000000" "00000000");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT
diff --git a/yt/yt/library/formats/unittests/skiff_yson_converter_ut.cpp b/yt/yt/library/formats/unittests/skiff_yson_converter_ut.cpp
new file mode 100644
index 0000000000..67e526a9dc
--- /dev/null
+++ b/yt/yt/library/formats/unittests/skiff_yson_converter_ut.cpp
@@ -0,0 +1,707 @@
+#include <yt/yt/library/logical_type_shortcuts/logical_type_shortcuts.h>
+
+#include <yt/yt/core/test_framework/framework.h>
+
+#include <yt/yt/client/table_client/logical_type.h>
+#include <yt/yt/library/formats/skiff_yson_converter.h>
+
+#include <yt/yt/core/yson/parser.h>
+#include <yt/yt/core/yson/pull_parser.h>
+#include <yt/yt/core/yson/token_writer.h>
+#include <yt/yt/core/yson/writer.h>
+
+#include <library/cpp/skiff/skiff.h>
+#include <library/cpp/skiff/skiff_schema.h>
+
+#include <util/string/hex.h>
+
+#include <util/stream/mem.h>
+
+namespace NYT::NFormats {
+namespace {
+
+using namespace NTableClient;
+using namespace NSkiff;
+using namespace NYson;
+using namespace NTableClient::NLogicalTypeShortcuts;
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::shared_ptr<TSkiffSchema> SkiffOptional(std::shared_ptr<TSkiffSchema> skiffSchema)
+{
+ return CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ std::move(skiffSchema)
+ });
+}
+
+TString ConvertYsonHex(
+ const TLogicalTypePtr& logicalType,
+ const std::shared_ptr<TSkiffSchema>& skiffSchema,
+ TStringBuf ysonString,
+ const TYsonToSkiffConverterConfig& config = {})
+{
+ auto converter = CreateYsonToSkiffConverter(
+ TComplexTypeFieldDescriptor("test-field", logicalType),
+ skiffSchema,
+ config);
+
+ // Yson parsers have a bug when they can't parse some values that end unexpectedly.
+ TString spacedYsonInput = TString{ysonString} + " ";
+
+ TStringStream out;
+ {
+ TCheckedInDebugSkiffWriter writer(skiffSchema, &out);
+
+ TMemoryInput in(spacedYsonInput);
+ TYsonPullParser pullParser(&in, EYsonType::Node);
+ TYsonPullParserCursor cursor(&pullParser);
+
+ converter(&cursor, &writer);
+
+ EXPECT_EQ(cursor.GetCurrent().GetType(), EYsonItemType::EndOfStream);
+ writer.Finish();
+ }
+
+ auto result = HexEncode(out.Str());
+ result.to_lower();
+ return result;
+}
+
+TString ConvertHexToTextYson(
+ const TLogicalTypePtr& logicalType,
+ const std::shared_ptr<TSkiffSchema>& skiffSchema,
+ TStringBuf hexString,
+ const TSkiffToYsonConverterConfig& config = {})
+{
+ auto converter = CreateSkiffToYsonConverter(TComplexTypeFieldDescriptor("test-field", logicalType), skiffSchema, config);
+
+
+ TStringStream binaryOut;
+ {
+ TString binaryString = HexDecode(hexString);
+ TMemoryInput in(binaryString);
+ TCheckedInDebugSkiffParser parser(skiffSchema, &in);
+
+ auto writer = TCheckedInDebugYsonTokenWriter(&binaryOut);
+ converter(&parser, &writer);
+ EXPECT_EQ(parser.GetReadBytesCount(), binaryString.size());
+ }
+ binaryOut.Finish();
+
+ TStringStream out;
+ {
+ auto writer = TYsonWriter(&out, EYsonFormat::Text);
+ ParseYsonStringBuffer(binaryOut.Str(), EYsonType::Node, &writer);
+ }
+ out.Finish();
+
+ return out.Str();
+}
+
+
+#define CHECK_BIDIRECTIONAL_CONVERSION(logicalType, skiffSchema, ysonString, skiffString, ...) \
+ do { \
+ std::tuple<TYsonToSkiffConverterConfig, TSkiffToYsonConverterConfig> cfg = {__VA_ARGS__}; \
+ auto actualSkiffString = ConvertYsonHex(logicalType, skiffSchema, ysonString, std::get<0>(cfg)); \
+ EXPECT_EQ(actualSkiffString, skiffString) << "Yson -> Skiff conversion error"; \
+ auto actualYsonString = ConvertHexToTextYson(logicalType, skiffSchema, skiffString, std::get<1>(cfg)); \
+ EXPECT_EQ(actualYsonString, ysonString) << "Skiff -> Yson conversion error"; \
+ } while (0)
+
+
+TEST(TYsonSkiffConverterTest, TestSimpleTypes)
+{
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Int8(),
+ CreateSimpleTypeSchema(EWireType::Int64),
+ "-42",
+ "d6ffffff" "ffffffff");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Uint64(),
+ CreateSimpleTypeSchema(EWireType::Uint64),
+ "42u",
+ "2a000000" "00000000");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Uint64(),
+ CreateSimpleTypeSchema(EWireType::Uint64),
+ "8u",
+ "08000000" "00000000");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Bool(),
+ CreateSimpleTypeSchema(EWireType::Boolean),
+ "%true",
+ "01");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Double(),
+ CreateSimpleTypeSchema(EWireType::Double),
+ "0.",
+ "00000000" "00000000");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Float(),
+ CreateSimpleTypeSchema(EWireType::Double),
+ "0.",
+ "00000000" "00000000");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ String(),
+ CreateSimpleTypeSchema(EWireType::String32),
+ "\"foo\"",
+ "03000000" "666f6f");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Null(),
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ "#",
+ "");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Uuid(),
+ CreateSimpleTypeSchema(EWireType::Uint128),
+ "\"\\xF0\\xF1\\xF2\\xF3\\xF4\\xF5\\xF6\\xF7\\xF8\\xF9\\xFA\\xFB\\xFC\\xFD\\xFE\\xFF\"",
+ "fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Uuid(),
+ CreateSimpleTypeSchema(EWireType::String32),
+ "\"\\xF0\\xF1\\xF2\\xF3\\xF4\\xF5\\xF6\\xF7\\xF8\\xF9\\xFA\\xFB\\xFC\\xFD\\xFE\\xFF\"",
+ "10000000f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff");
+}
+
+TEST(TYsonSkiffConverterTest, TestYson32)
+{
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Yson(),
+ CreateSimpleTypeSchema(EWireType::Yson32),
+ "-42",
+ "02000000" "0253");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Yson(),
+ CreateSimpleTypeSchema(EWireType::Yson32),
+ "#",
+ "01000000" "23");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Yson(),
+ CreateSimpleTypeSchema(EWireType::Yson32),
+ "[1;2;[3;];]",
+ "0e000000" "5b02023b02043b5b02063b5d3b5d");
+}
+
+TEST(TYsonSkiffConverterTest, TestOptionalTypes)
+{
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Optional(Int64()),
+ SkiffOptional(CreateSimpleTypeSchema(EWireType::Int64)),
+ "-42",
+ "01" "d6ffffff" "ffffffff");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Optional(Int64()),
+ SkiffOptional(CreateSimpleTypeSchema(EWireType::Int64)),
+ "#",
+ "00");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Optional(Optional(Bool())),
+ SkiffOptional(SkiffOptional(CreateSimpleTypeSchema(EWireType::Boolean))),
+ "[%true;]",
+ "01" "01" "01");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Optional(Optional(Bool())),
+ SkiffOptional(SkiffOptional(CreateSimpleTypeSchema(EWireType::Boolean))),
+ "[#;]",
+ "01" "00");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Optional(Optional(Bool())),
+ SkiffOptional(SkiffOptional(CreateSimpleTypeSchema(EWireType::Boolean))),
+ "#",
+ "00");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Optional(List(Bool())),
+ SkiffOptional(CreateRepeatedVariant8Schema({CreateSimpleTypeSchema(EWireType::Boolean)})),
+ "#",
+ "00");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Optional(Optional(List(Bool()))),
+ SkiffOptional(
+ SkiffOptional(
+ CreateRepeatedVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Boolean)
+ }))),
+ "[[%true;%false;%true;];]",
+ "01" "01" "0001" "0000" "0001" "ff");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Optional(Optional(List(Bool()))),
+ SkiffOptional(
+ SkiffOptional(
+ CreateRepeatedVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Boolean)
+ }))),
+ "[#;]",
+ "0100");
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ ConvertYsonHex(
+ Optional(Optional(Bool())),
+ SkiffOptional(CreateSimpleTypeSchema(EWireType::Boolean)),
+ " [ %true ] "),
+ "Optional nesting mismatch");
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ ConvertHexToTextYson(
+ Optional(Bool()),
+ CreateSimpleTypeSchema(EWireType::Boolean),
+ "00"),
+ "Optional nesting mismatch");
+
+ TYsonToSkiffConverterConfig ysonToSkiffConfig;
+ ysonToSkiffConfig.AllowOmitTopLevelOptional = true;
+
+ TSkiffToYsonConverterConfig skiffToYsonConfig;
+ skiffToYsonConfig.AllowOmitTopLevelOptional = true;
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Optional(Optional(Bool())),
+ SkiffOptional(CreateSimpleTypeSchema(EWireType::Boolean)),
+ "[%true;]",
+ "01" "01",
+ ysonToSkiffConfig,
+ skiffToYsonConfig);
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Optional(Optional(Bool())),
+ SkiffOptional(CreateSimpleTypeSchema(EWireType::Boolean)),
+ "[#;]",
+ "00",
+ ysonToSkiffConfig,
+ skiffToYsonConfig);
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ ConvertYsonHex(
+ Optional(Optional(Bool())),
+ SkiffOptional(CreateSimpleTypeSchema(EWireType::Boolean)),
+ " # ",
+ ysonToSkiffConfig),
+ "value expected to be nonempty");
+}
+
+TEST(TYsonSkiffConverterTest, TestListTypes)
+{
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ List(Bool()),
+ CreateRepeatedVariant8Schema({CreateSimpleTypeSchema(EWireType::Boolean)}),
+ "[]",
+ "ff");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ List(Bool()),
+ CreateRepeatedVariant8Schema({CreateSimpleTypeSchema(EWireType::Boolean)}),
+ "[%true;%true;%true;]",
+ "00" "01" "00" "01" "00" "01" "ff");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ List(List(Bool())),
+ CreateRepeatedVariant8Schema({CreateRepeatedVariant8Schema({CreateSimpleTypeSchema(EWireType::Boolean)})}),
+ "[[];[%true;];[%true;%true;];]",
+ "00" "ff" "00" "0001ff" "00" "00010001ff" "ff");
+}
+
+TEST(TYsonSkiffConverterTest, TestStruct)
+{
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Struct(
+ "key", String(),
+ "value", Bool()),
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("key"),
+ CreateSimpleTypeSchema(EWireType::Boolean)->SetName("value"),
+ }),
+ "[\"true\";%true;]",
+ "04000000" "74727565" "01");
+}
+
+TEST(TYsonSkiffConverterTest, TestSkippedFields)
+{
+ TString skiffString;
+ skiffString = ConvertYsonHex(
+ Struct(
+ "key", String(),
+ "subkey", Int64(),
+ "value", Bool()),
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("key"),
+ CreateSimpleTypeSchema(EWireType::Boolean)->SetName("value"),
+ }),
+ " [ true ; 1; %true ] ");
+ EXPECT_EQ(skiffString, "04000000" "74727565" "01"sv);
+
+ skiffString = ConvertYsonHex(
+ Struct(
+ "key", String(),
+ "subkey", Int64(),
+ "value", Bool()),
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Int64)->SetName("subkey"),
+ }),
+ " [ true ; 1; %true ] ");
+ EXPECT_EQ(skiffString, "01000000" "00000000"sv);
+
+ try {
+ ConvertHexToTextYson(
+ Struct(
+ "key", String(),
+ "subkey", Int64(),
+ "value", Bool()),
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Int64)->SetName("subkey"),
+ }),
+ "01000000" "00000000");
+ } catch (const std::exception& e) {
+ EXPECT_THAT(e.what(), testing::ContainsRegex("Non optional struct field .* is missing"));
+ }
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Struct(
+ "key", Optional(String()),
+ "subkey", Int64(),
+ "value", Optional(Bool())),
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Int64)->SetName("subkey"),
+ }),
+ "[#;15;#;]",
+ "0f000000" "00000000");
+}
+
+TEST(TYsonSkiffConverterTest, TestUnknownSkiffFields)
+{
+ TString skiffString;
+ skiffString = ConvertYsonHex(
+ Struct(
+ "key", String(),
+ "subkey", Int64(),
+ "value", Bool()),
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("key"),
+ SkiffOptional(CreateSimpleTypeSchema(EWireType::String32))->SetName("key2"),
+ CreateSimpleTypeSchema(EWireType::Boolean)->SetName("value"),
+ }),
+ " [ true ; 1; %true ] ");
+ EXPECT_EQ(skiffString, "04000000" "74727565" "00" "01"sv);
+
+ skiffString = ConvertYsonHex(
+ Struct(
+ "key", String(),
+ "subkey", Int64(),
+ "value", Bool()),
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("key"),
+ CreateSimpleTypeSchema(EWireType::Boolean)->SetName("value"),
+ SkiffOptional(CreateSimpleTypeSchema(EWireType::Yson32))->SetName("value2"),
+ }),
+ " [ true ; 1; %true ] ");
+ EXPECT_EQ(skiffString, "04000000" "74727565" "01" "00"sv);
+
+
+ try {
+ ConvertYsonHex(
+ Struct(
+ "key", String(),
+ "subkey", Int64(),
+ "value", Bool()),
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("key"),
+ CreateSimpleTypeSchema(EWireType::Boolean)->SetName("value"),
+ CreateSimpleTypeSchema(EWireType::Yson32)->SetName("value2"),
+ }),
+ " [ true ; 1; %true ] ");
+ GTEST_FAIL() << "exception expected";
+ } catch (const std::exception& e) {
+ EXPECT_THAT(e.what(), testing::ContainsRegex("Non optional Skiff field .* is missing corresponding logical struct field"));
+ }
+
+ try {
+ ConvertHexToTextYson(
+ Struct(
+ "key", String(),
+ "subkey", Int64(),
+ "value", Bool()),
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32)->SetName("key"),
+ SkiffOptional(CreateSimpleTypeSchema(EWireType::String32))->SetName("key2"),
+ CreateSimpleTypeSchema(EWireType::Boolean)->SetName("value"),
+ }),
+ "04000000" "74727565" "00" "01"sv);
+ GTEST_FAIL() << "expected_exception";
+ } catch (const std::exception& e) {
+ EXPECT_THAT(e.what(), testing::ContainsRegex("is not found in logical type"));
+ }
+}
+
+TEST(TYsonSkiffConverterTest, TestTuple)
+{
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Tuple(String(), Bool()),
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32),
+ CreateSimpleTypeSchema(EWireType::Boolean),
+ }),
+ "[\"true\";%true;]",
+ "04000000" "74727565" "01");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ Tuple(Int64(), Optional(Int64())),
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Int64),
+ SkiffOptional(CreateSimpleTypeSchema(EWireType::Int64)),
+ }),
+ "[2;42;]",
+ "02000000" "00000000" "01" "2a000000" "00000000");
+}
+
+TEST(TYsonSkiffConverterTest, TestTupleSkippedFields)
+{
+ TString skiffString;
+ skiffString = ConvertYsonHex(
+ Tuple(String(), Int64(), Bool()),
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32),
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Boolean),
+ }),
+ " [ true ; 1; %true ] ");
+ EXPECT_EQ(skiffString, "04000000" "74727565" "01"sv);
+
+ skiffString = ConvertYsonHex(
+ Tuple(String(), Int64(), Bool()),
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Int64),
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ }),
+ " [ true ; 1; %true ] ");
+ EXPECT_EQ(skiffString, "01000000" "00000000"sv);
+
+ skiffString = ConvertYsonHex(
+ Tuple(Optional(String()), Int64(), Optional(Bool())),
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Int64),
+ CreateSimpleTypeSchema(EWireType::Nothing)
+ }),
+ "[#;15;#;]");
+ EXPECT_EQ(skiffString, "0f000000" "00000000"sv);
+}
+
+TEST(TYsonSkiffConverterTest, TestDict)
+{
+ const auto logicalType = Dict(String(), Int64());
+ const auto skiffSchema = CreateRepeatedVariant8Schema({
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32),
+ CreateSimpleTypeSchema(EWireType::Int64)
+ })
+ });
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ logicalType,
+ skiffSchema,
+ "[[\"one\";1;];[\"two\";2;];]",
+ "00" "03000000" "6f6e65" "01000000" "00000000"
+ "00" "03000000" "74776f" "02000000" "00000000"
+ "ff");
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ ConvertHexToTextYson(logicalType, skiffSchema, "01" "01000000" "6f" "01000000" "00000000" "ff"),
+ "Unexpected \"repeated_variant8\" tag");
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ ConvertHexToTextYson(logicalType, skiffSchema, "00" "01000000" "6f" "01000000" "00000000"),
+ "Premature end of stream");
+}
+
+TEST(TYsonSkiffConverterTest, TestTagged)
+{
+ const auto logicalType = Tagged(
+ "tag",
+ Dict(Tagged("tag", String()), Int64()));
+ const auto skiffSchema = CreateRepeatedVariant8Schema({
+ CreateTupleSchema({
+ CreateSimpleTypeSchema(EWireType::String32),
+ CreateSimpleTypeSchema(EWireType::Int64)
+ })
+ });
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ logicalType,
+ skiffSchema,
+ "[[\"one\";1;];[\"two\";2;];]",
+ "00" "03000000" "6f6e65" "01000000" "00000000"
+ "00" "03000000" "74776f" "02000000" "00000000"
+ "ff");
+}
+
+TEST(TYsonSkiffConverterTest, TestOptionalVariantSimilarity)
+{
+ auto logicalType = Optional(
+ VariantTuple(Null(), Int64()));
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ logicalType,
+ SkiffOptional(SkiffOptional(CreateSimpleTypeSchema(EWireType::Int64))),
+ "[1;42;]",
+ "01" "01" "2a000000" "00000000");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ logicalType,
+ SkiffOptional(SkiffOptional(CreateSimpleTypeSchema(EWireType::Int64))),
+ "[0;#;]",
+ "01" "00");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ logicalType,
+ SkiffOptional(SkiffOptional(CreateSimpleTypeSchema(EWireType::Int64))),
+ "#",
+ "00");
+
+ TYsonToSkiffConverterConfig ysonToSkiffConfig;
+ ysonToSkiffConfig.AllowOmitTopLevelOptional = true;
+
+ TSkiffToYsonConverterConfig skiffToYsonConfig;
+ skiffToYsonConfig.AllowOmitTopLevelOptional = true;
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ logicalType,
+ SkiffOptional(CreateSimpleTypeSchema(EWireType::Int64)),
+ "[1;42;]",
+ "01" "2a000000" "00000000",
+ ysonToSkiffConfig,
+ skiffToYsonConfig);
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ logicalType,
+ SkiffOptional(CreateSimpleTypeSchema(EWireType::Int64)),
+ "[0;#;]",
+ "00",
+ ysonToSkiffConfig,
+ skiffToYsonConfig);
+
+ EXPECT_THROW_WITH_SUBSTRING(
+ ConvertYsonHex(
+ logicalType,
+ SkiffOptional(CreateSimpleTypeSchema(EWireType::Int64)),
+ "#",
+ ysonToSkiffConfig),
+ "value expected to be nonempty");
+}
+
+class TYsonSkiffConverterTestVariant
+ : public ::testing::TestWithParam<std::tuple<ELogicalMetatype, EWireType>>
+{
+public:
+ TLogicalTypePtr VariantLogicalType(const std::vector<TLogicalTypePtr>& elements)
+ {
+ auto [metatype, wireType] = GetParam();
+ if (metatype == ELogicalMetatype::VariantTuple) {
+ return VariantTupleLogicalType(elements);
+ } else {
+ std::vector<TStructField> fields;
+ for (size_t i = 0; i < elements.size(); ++i) {
+ fields.push_back({Format("field%v", i), elements[i]});
+ }
+ return VariantStructLogicalType(fields);
+ }
+ }
+
+ std::shared_ptr<TSkiffSchema> VariantSkiffSchema(std::vector<std::shared_ptr<TSkiffSchema>> elements)
+ {
+ for (size_t i = 0; i < elements.size(); ++i) {
+ elements[i]->SetName(Format("field%v", i));
+ }
+ auto [metatype, wireType] = GetParam();
+ if (wireType == EWireType::Variant8) {
+ return CreateVariant8Schema(std::move(elements));
+ } else if (wireType == EWireType::Variant16) {
+ return CreateVariant16Schema(std::move(elements));
+ }
+ Y_UNREACHABLE();
+ }
+
+ TString VariantTagInfix() const
+ {
+ auto [metatype, wireType] = GetParam();
+ if (wireType == EWireType::Variant16) {
+ return "00";
+ }
+ return {};
+ }
+};
+
+TEST_P(TYsonSkiffConverterTestVariant, TestVariant)
+{
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ VariantLogicalType({
+ Int64(),
+ Bool()
+ }),
+ VariantSkiffSchema({
+ CreateSimpleTypeSchema(EWireType::Int64),
+ CreateSimpleTypeSchema(EWireType::Boolean),
+ }),
+ "[0;42;]",
+ "00" + VariantTagInfix() + "2a000000" "00000000");
+
+ CHECK_BIDIRECTIONAL_CONVERSION(
+ VariantLogicalType({
+ Int64(),
+ Bool()
+ }),
+ VariantSkiffSchema({
+ CreateSimpleTypeSchema(EWireType::Int64),
+ CreateSimpleTypeSchema(EWireType::Boolean),
+ }),
+ "[1;%true;]",
+ "01" + VariantTagInfix() + "01");
+}
+
+TEST_P(TYsonSkiffConverterTestVariant, TestMalformedVariants)
+{
+ auto logicalType = VariantLogicalType({
+ Bool(),
+ Int64(),
+ });
+ auto skiffSchema = VariantSkiffSchema({
+ CreateSimpleTypeSchema(EWireType::Boolean),
+ CreateSimpleTypeSchema(EWireType::Int64),
+ });
+
+ EXPECT_THROW_WITH_SUBSTRING(ConvertYsonHex(logicalType, skiffSchema, "[2; 42]"), "Yson to Skiff conversion error");
+ EXPECT_THROW_WITH_SUBSTRING(ConvertYsonHex(logicalType, skiffSchema, "[]"), "Yson to Skiff conversion error");
+ EXPECT_THROW_WITH_SUBSTRING(ConvertYsonHex(logicalType, skiffSchema, "[0]"), "Yson to Skiff conversion error");
+
+ EXPECT_THROW_WITH_SUBSTRING(ConvertHexToTextYson(logicalType, skiffSchema, "02" + VariantTagInfix() + "00"),
+ "Skiff to Yson conversion error");
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ Variants,
+ TYsonSkiffConverterTestVariant,
+ ::testing::Combine(
+ ::testing::ValuesIn({ELogicalMetatype::VariantStruct, ELogicalMetatype::VariantTuple}),
+ ::testing::ValuesIn({EWireType::Variant8, EWireType::Variant16}))
+);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT::NFormats
diff --git a/yt/yt/library/formats/unittests/value_examples.cpp b/yt/yt/library/formats/unittests/value_examples.cpp
new file mode 100644
index 0000000000..da41a6341e
--- /dev/null
+++ b/yt/yt/library/formats/unittests/value_examples.cpp
@@ -0,0 +1,163 @@
+#include "value_examples.h"
+
+#include <yt/yt/library/logical_type_shortcuts/logical_type_shortcuts.h>
+
+#include <yt/yt/library/decimal/decimal.h>
+
+#include <cmath>
+
+namespace NYT::NTableClient {
+
+////////////////////////////////////////////////////////////////////////////////
+
+using namespace NLogicalTypeShortcuts;
+using namespace NNamedValue;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TValueExample::TValueExample(TLogicalTypePtr logicalType, TNamedValue::TValue value, TString prettyYson)
+ : LogicalType(std::move(logicalType))
+ , Value(std::move(value))
+ , PrettyYson(std::move(prettyYson))
+{ }
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::vector<TValueExample> GetPrimitiveValueExamples()
+{
+ static const std::vector<TValueExample> valueExamples = {
+ TValueExample{Int8(), 0, "0"},
+ TValueExample{Int8(), -5, "-5"},
+ TValueExample{Int8(), 42, "42"},
+ TValueExample{Int8(), -128, "-128"},
+ TValueExample{Int8(), 127, "127"},
+
+ TValueExample{Int16(), 0, "0"},
+ TValueExample{Int16(), -6, "-6"},
+ TValueExample{Int16(), 43, "43"},
+ TValueExample{Int16(), 0x7FFF, "32767"},
+ TValueExample{Int16(), -0x8000, "-32768"},
+
+ TValueExample{Int32(), 0, "0"},
+ TValueExample{Int32(), -7, "-7"},
+ TValueExample{Int32(), 44, "44"},
+ TValueExample{Int32(), 0x7FFFFFFF, "2147483647"},
+ TValueExample{Int32(), -0x80000000ll, "-2147483648"},
+
+ TValueExample{Int64(), 0, "0"},
+ TValueExample{Int64(), -7, "-7"},
+ TValueExample{Int64(), 45, "45"},
+ TValueExample{Int64(), 0x7FFFFFFFFFFFFFFFll, "9223372036854775807"},
+ TValueExample{Int64(), i64(-0x8000000000000000ll), "-9223372036854775808"},
+
+ TValueExample{Uint8(), 0ull, "0u"},
+ TValueExample{Uint8(), 46ull, "46u"},
+ TValueExample{Uint8(), 255ull, "255u"},
+
+ TValueExample{Uint16(), 0ull, "0u"},
+ TValueExample{Uint16(), 47ull, "47u"},
+ TValueExample{Uint16(), 0xFFFFull, "65535u"},
+
+ TValueExample{Uint32(), 0ull, "0u"},
+ TValueExample{Uint32(), 48ull, "48u"},
+ TValueExample{Uint32(), 0xFFFFFFFFull, "4294967295u"},
+
+ TValueExample{Uint64(), 0ull, "0u"},
+ TValueExample{Uint64(), 49ull, "49u"},
+ TValueExample{Uint64(), 0xFFFFFFFFFFFFFFFFull, "18446744073709551615u"},
+
+ TValueExample{String(), "", R"("")"},
+ TValueExample{String(), "foo", R"("foo")"},
+ TValueExample{String(), TString(TStringBuf("\xf0\x00"sv)), R"("\xf0\x00")"},
+
+ TValueExample{Utf8(), "", R"("")"},
+ TValueExample{Utf8(), "bar", R"("bar")"},
+
+ TValueExample{Bool(), true, "%true"},
+ TValueExample{Bool(), false, "%false"},
+
+ // NB. .125 = 1 / 8 is
+ TValueExample{Double(), 3.125, "3.125"},
+ TValueExample{Double(), 2.775, "2.775"},
+ // TPrimitiveTypeExample{Double(), std::nan("1"), "%nan"},
+ TValueExample{Double(), INFINITY, "%inf"},
+ TValueExample{Double(), -INFINITY, "%-inf"},
+
+ TValueExample{Float(), 5.125, "5.125"},
+ TValueExample{Float(), 6.775, "6.775"},
+
+ TValueExample{Null(), nullptr, "#"},
+ TValueExample{Void(), nullptr, "#"},
+
+ TValueExample{Json(), "83", R"("83")"},
+ TValueExample{Json(), "[]", R"("[]")"},
+
+ TValueExample{
+ Uuid(),
+ TString(16, 0),
+ TString(TStringBuf(R"("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00")"))
+ },
+ TValueExample{
+ Uuid(),
+ TString(TStringBuf("\x01\x23\x45\x67\x89\xAB\xCD\xEF\xFE\xDC\xBA\x98\x76\x54\x32\x10"sv)),
+ TString(TStringBuf(R"("\x01\x23\x45\x67\x89\xAB\xCD\xEF\xFE\xDC\xBA\x98\x76\x54\x32\x10")"))
+ },
+
+ TValueExample{Date(), 0ull, "0u"},
+ TValueExample{Date(), 18431ull, "18431u"},
+ TValueExample{Date(), 49672ull, "49672u"},
+
+ TValueExample{Datetime(), 0ull, "0u"},
+ TValueExample{Datetime(), 668800588ull, "668800588u"},
+ TValueExample{Datetime(), 4291747199ull, "4291747199u"},
+
+ TValueExample{Timestamp(), 0ull, "0u"},
+ TValueExample{Timestamp(), 2508452463052426ull, "2508452463052426u"},
+ TValueExample{Timestamp(), 4291747199999999ull, "4291747199999999u"},
+
+ TValueExample{Interval(), 0, "0"},
+ TValueExample{Timestamp(), 2208610308646589ll, "2208610308646589"},
+ TValueExample{Timestamp(), 1187314596653899ll, "1187314596653899"},
+ TValueExample{Timestamp(), 4291747199999999ll, "4291747199999999"},
+ TValueExample{Timestamp(), -4291747199999999ll, "-4291747199999999"},
+
+ TValueExample{Date32(), -53375809, "-53375809"},
+ TValueExample{Date32(), 0, "0"},
+ TValueExample{Date32(), 53375807, "53375807"},
+
+ TValueExample{Datetime64(), -4611669897600ll, "-4611669897600"},
+ TValueExample{Datetime64(), 42, "42"},
+ TValueExample{Datetime64(), 4611669811199ll, "4611669811199"},
+
+ TValueExample{Timestamp64(), -4611669897600000000ll, "-4611669897600000000"},
+ TValueExample{Timestamp64(), 42, "42"},
+ TValueExample{Timestamp64(), 4611669811199999999l, "4611669811199999999"},
+
+ TValueExample{Interval64(), -9223339708799999999ll, "-9223339708799999999"},
+ TValueExample{Interval64(), 0, "0"},
+ TValueExample{Interval64(), 9223339708799999999ll, "9223339708799999999"},
+
+ TValueExample{Yson(), "qux", R"("qux")"},
+
+ TValueExample{Decimal(3, 2), NDecimal::TDecimal::TextToBinary("3.14", 3, 2), R"("\x80\x00\x01\x3a")"},
+ };
+
+ THashSet<ESimpleLogicalValueType> allValueTypes;
+ for (const auto value : TEnumTraits<ESimpleLogicalValueType>::GetDomainValues()) {
+ allValueTypes.insert(value);
+ }
+ for (const auto& example : valueExamples) {
+ if (example.LogicalType->GetMetatype() == ELogicalMetatype::Simple) {
+ allValueTypes.erase(example.LogicalType->AsSimpleTypeRef().GetElement());
+ }
+ }
+ if (!allValueTypes.empty()) {
+ THROW_ERROR_EXCEPTION("PrimitiveTypeExample variable doesn't contain values: %v",
+ allValueTypes);
+ }
+ return valueExamples;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NTableClient
diff --git a/yt/yt/library/formats/unittests/value_examples.h b/yt/yt/library/formats/unittests/value_examples.h
new file mode 100644
index 0000000000..06644e2cd6
--- /dev/null
+++ b/yt/yt/library/formats/unittests/value_examples.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <yt/yt/library/named_value/named_value.h>
+
+#include <yt/yt/client/table_client/logical_type.h>
+
+namespace NYT::NTableClient {
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TValueExample
+{
+ TLogicalTypePtr LogicalType;
+ NNamedValue::TNamedValue::TValue Value;
+ TString PrettyYson;
+
+ TValueExample(TLogicalTypePtr logicalType, NNamedValue::TNamedValue::TValue value, TString prettyYson);
+};
+
+std::vector<TValueExample> GetPrimitiveValueExamples();
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NTableClient
diff --git a/yt/yt/library/formats/unittests/web_json_writer_ut.cpp b/yt/yt/library/formats/unittests/web_json_writer_ut.cpp
new file mode 100644
index 0000000000..d7f20ec20a
--- /dev/null
+++ b/yt/yt/library/formats/unittests/web_json_writer_ut.cpp
@@ -0,0 +1,1714 @@
+#include <yt/yt/core/test_framework/framework.h>
+
+#include <yt/yt/library/formats/web_json_writer.h>
+
+#include <yt/yt/client/table_client/logical_type.h>
+#include <yt/yt/client/table_client/name_table.h>
+#include <yt/yt/client/table_client/schema.h>
+
+#include <yt/yt/core/concurrency/async_stream.h>
+
+#include <yt/yt/core/json/json_parser.h>
+
+#include <yt/yt/core/ytree/fluent.h>
+
+#include <yt/yt/library/named_value/named_value.h>
+
+#include <limits>
+
+namespace NYT::NFormats {
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+
+using namespace NYTree;
+using namespace NYson;
+using namespace NConcurrency;
+using namespace NTableClient;
+
+using NNamedValue::MakeRow;
+
+INodePtr ParseJsonToNode(TStringBuf string)
+{
+ TBuildingYsonConsumerViaTreeBuilder<INodePtr> builder(EYsonType::Node);
+ TMemoryInput stream(string);
+
+ // For plain (raw) JSON parsing we need to switch off
+ // "smart" attribute analysis and UTF-8 decoding.
+ auto config = New<NJson::TJsonFormatConfig>();
+ config->EncodeUtf8 = false;
+ config->Plain = true;
+
+ NJson::ParseJson(&stream, &builder, std::move(config));
+ return builder.Finish();
+}
+
+class TWriterForWebJson
+ : public ::testing::Test
+{
+protected:
+ TNameTablePtr NameTable_ = New<TNameTable>();
+ TWebJsonFormatConfigPtr Config_ = New<TWebJsonFormatConfig>();
+ TStringStream OutputStream_;
+ ISchemalessFormatWriterPtr Writer_;
+
+ const TString ValueColumnName_ = "value";
+
+ void CreateStandardWriter(const std::vector<TTableSchemaPtr>& schemas = {New<TTableSchema>()})
+ {
+ Writer_ = CreateWriterForWebJson(
+ Config_,
+ NameTable_,
+ schemas,
+ CreateAsyncAdapter(static_cast<IOutputStream*>(&OutputStream_)));
+ }
+};
+
+TEST_F(TWriterForWebJson, Simple)
+{
+ Config_->MaxAllColumnNamesCount = 2;
+
+ CreateStandardWriter();
+
+ bool written = Writer_->Write({
+ MakeRow(NameTable_, {
+ {"column_a", 100500u},
+ {"column_b", true},
+ {"column_c", "row1_c"},
+ {TString(RowIndexColumnName), 0},
+ }).Get(),
+ MakeRow(NameTable_, {
+ {"column_c", "row2_c"},
+ {"column_b", "row2_b"},
+ {TString(RowIndexColumnName), 1},
+ }).Get(),
+ });
+ EXPECT_TRUE(written);
+ WaitFor(Writer_->Close())
+ .ThrowOnError();
+
+ TString expectedOutput =
+ "{"
+ "\"rows\":["
+ "{"
+ "\"column_a\":{"
+ "\"$type\":\"uint64\","
+ "\"$value\":\"100500\""
+ "},"
+ "\"column_b\":{"
+ "\"$type\":\"boolean\","
+ "\"$value\":\"true\""
+ "},"
+ "\"column_c\":{"
+ "\"$type\":\"string\","
+ "\"$value\":\"row1_c\""
+ "}"
+ "},"
+ "{"
+ "\"column_c\":{"
+ "\"$type\":\"string\","
+ "\"$value\":\"row2_c\""
+ "},"
+ "\"column_b\":{"
+ "\"$type\":\"string\","
+ "\"$value\":\"row2_b\""
+ "}"
+ "}"
+ "],"
+ "\"incomplete_columns\":\"false\","
+ "\"incomplete_all_column_names\":\"true\","
+ "\"all_column_names\":["
+ "\"column_a\","
+ "\"column_b\""
+ "]"
+ "}";
+
+ EXPECT_EQ(std::ssize(expectedOutput), Writer_->GetWrittenSize());
+ EXPECT_EQ(expectedOutput, OutputStream_.Str());
+}
+
+TEST_F(TWriterForWebJson, SliceColumnsByMaxCount)
+{
+ Config_->MaxSelectedColumnCount = 2;
+
+ CreateStandardWriter();
+ bool written = Writer_->Write({
+ MakeRow(NameTable_, {
+ {"column_a", "row1_a"},
+ {"column_b", "row1_b"},
+ {"column_c", "row1_c"},
+ }).Get(),
+ MakeRow(NameTable_, {
+ {"column_c", "row2_c"},
+ {"column_b", "row2_b"},
+ }).Get(),
+ MakeRow(NameTable_, {
+ {"column_c", "row3_c"},
+ }).Get(),
+ });
+ EXPECT_TRUE(written);
+ YT_UNUSED_FUTURE(Writer_->Close());
+
+ TString expectedOutput =
+ "{"
+ "\"rows\":["
+ "{"
+ "\"column_a\":{"
+ "\"$type\":\"string\","
+ "\"$value\":\"row1_a\""
+ "},"
+ "\"column_b\":{"
+ "\"$type\":\"string\","
+ "\"$value\":\"row1_b\""
+ "}"
+ "},"
+ "{"
+ "\"column_b\":{"
+ "\"$type\":\"string\","
+ "\"$value\":\"row2_b\""
+ "}"
+ "},"
+ "{"
+ "}"
+ "],"
+ "\"incomplete_columns\":\"true\","
+ "\"incomplete_all_column_names\":\"false\","
+ "\"all_column_names\":["
+ "\"column_a\","
+ "\"column_b\","
+ "\"column_c\""
+ "]"
+ "}";
+
+ EXPECT_EQ(std::ssize(expectedOutput), Writer_->GetWrittenSize());
+ EXPECT_EQ(expectedOutput, OutputStream_.Str());
+}
+
+TEST_F(TWriterForWebJson, SliceStrings)
+{
+ Config_->FieldWeightLimit = 6;
+
+ CreateStandardWriter();
+
+ bool written = Writer_->Write({
+ MakeRow(NameTable_, {
+ {"column_b", "row1_b"},
+ {"column_c", "rooooow1_c"},
+ {"column_a", "row1_a"},
+ }).Get(),
+ MakeRow(NameTable_, {
+ {"column_c", "row2_c"},
+ {"column_b", "rooow2_b"},
+ }).Get(),
+ MakeRow(NameTable_, {
+ {"column_c", "row3_c"},
+ }).Get(),
+ });
+ EXPECT_TRUE(written);
+ YT_UNUSED_FUTURE(Writer_->Close());
+
+ TString expectedOutput =
+ "{"
+ "\"rows\":["
+ "{"
+ "\"column_b\":{"
+ "\"$type\":\"string\","
+ "\"$value\":\"row1_b\""
+ "},"
+ "\"column_c\":{"
+ "\"$incomplete\":true,"
+ "\"$type\":\"string\","
+ "\"$value\":\"rooooo\""
+ "},"
+ "\"column_a\":{"
+ "\"$type\":\"string\","
+ "\"$value\":\"row1_a\""
+ "}"
+ "},"
+ "{"
+ "\"column_c\":{"
+ "\"$type\":\"string\","
+ "\"$value\":\"row2_c\""
+ "},"
+ "\"column_b\":{"
+ "\"$incomplete\":true,"
+ "\"$type\":\"string\","
+ "\"$value\":\"rooow2\""
+ "}"
+ "},"
+ "{"
+ "\"column_c\":{"
+ "\"$type\":\"string\","
+ "\"$value\":\"row3_c\""
+ "}"
+ "}"
+ "],"
+ "\"incomplete_columns\":\"false\","
+ "\"incomplete_all_column_names\":\"false\","
+ "\"all_column_names\":["
+ "\"column_a\","
+ "\"column_b\","
+ "\"column_c\""
+ "]"
+ "}";
+
+ EXPECT_EQ(std::ssize(expectedOutput), Writer_->GetWrittenSize());
+ EXPECT_EQ(expectedOutput, OutputStream_.Str());
+}
+
+TEST_F(TWriterForWebJson, ReplaceAnyWithNull)
+{
+ Config_->FieldWeightLimit = 8;
+
+ CreateStandardWriter();
+
+ bool written = Writer_->Write({
+ MakeRow(NameTable_, {
+ {"column_b", EValueType::Any, "{key=a}"},
+ {"column_c", "row1_c"},
+ {"column_a", "row1_a"},
+ }).Get(),
+ MakeRow(NameTable_, {
+ {"column_c", EValueType::Any, "{key=aaaaaa}"},
+ {"column_b", "row2_b"},
+ }).Get(),
+ MakeRow(NameTable_, {
+ {"column_c", "row3_c"},
+ }).Get(),
+ });
+ EXPECT_TRUE(written);
+ WaitFor(Writer_->Close())
+ .ThrowOnError();
+
+ TString expectedOutput =
+ "{"
+ "\"rows\":["
+ "{"
+ "\"column_b\":{"
+ "\"key\":{"
+ "\"$type\":\"string\","
+ "\"$value\":\"a\""
+ "}"
+ "},"
+ "\"column_c\":{"
+ "\"$type\":\"string\","
+ "\"$value\":\"row1_c\""
+ "},"
+ "\"column_a\":{"
+ "\"$type\":\"string\","
+ "\"$value\":\"row1_a\""
+ "}"
+ "},"
+ "{"
+ "\"column_c\":{"
+ "\"$incomplete\":true,"
+ "\"$type\":\"any\","
+ "\"$value\":\"\""
+ "},"
+ "\"column_b\":{"
+ "\"$type\":\"string\","
+ "\"$value\":\"row2_b\""
+ "}"
+ "},"
+ "{"
+ "\"column_c\":{"
+ "\"$type\":\"string\","
+ "\"$value\":\"row3_c\""
+ "}"
+ "}"
+ "],"
+ "\"incomplete_columns\":\"false\","
+ "\"incomplete_all_column_names\":\"false\","
+ "\"all_column_names\":["
+ "\"column_a\","
+ "\"column_b\","
+ "\"column_c\""
+ "]"
+ "}";
+
+ EXPECT_EQ(std::ssize(expectedOutput), Writer_->GetWrittenSize());
+ EXPECT_EQ(expectedOutput, OutputStream_.Str());
+}
+
+TEST_F(TWriterForWebJson, NotSkipSystemColumns)
+{
+ Config_->SkipSystemColumns = false;
+
+ CreateStandardWriter();
+
+ bool written = Writer_->Write({
+ MakeRow(NameTable_, {
+ {TString(TableIndexColumnName), 0},
+ {TString(RowIndexColumnName), 1},
+ {TString(TabletIndexColumnName), 2},
+ {ValueColumnName_, 3}
+ }).Get(),
+ });
+ EXPECT_TRUE(written);
+ WaitFor(Writer_->Close())
+ .ThrowOnError();
+
+ TString expectedOutput =
+ "{"
+ "\"rows\":["
+ "{"
+ "\"$$table_index\":{"
+ "\"$type\":\"int64\","
+ "\"$value\":\"0\""
+ "},"
+ "\"$$row_index\":{"
+ "\"$type\":\"int64\","
+ "\"$value\":\"1\""
+ "},"
+ "\"$$tablet_index\":{"
+ "\"$type\":\"int64\","
+ "\"$value\":\"2\""
+ "},"
+ "\"value\":{"
+ "\"$type\":\"int64\","
+ "\"$value\":\"3\""
+ "}"
+ "}"
+ "],"
+ "\"incomplete_columns\":\"false\","
+ "\"incomplete_all_column_names\":\"false\","
+ "\"all_column_names\":["
+ "\"$row_index\","
+ "\"$table_index\","
+ "\"$tablet_index\","
+ "\"value\""
+ "]"
+ "}";
+
+ EXPECT_EQ(std::ssize(expectedOutput), Writer_->GetWrittenSize());
+ EXPECT_EQ(expectedOutput, OutputStream_.Str());
+}
+
+TEST_F(TWriterForWebJson, SkipSystemColumns)
+{
+ Config_->SkipSystemColumns = true;
+
+ CreateStandardWriter();
+
+ bool written = Writer_->Write({
+ MakeRow(NameTable_, {
+ {TString(TableIndexColumnName), 0},
+ {TString(RowIndexColumnName), 1},
+ {TString(TabletIndexColumnName), 2},
+ {ValueColumnName_, 3}
+ }).Get(),
+ });
+ EXPECT_TRUE(written);
+ WaitFor(Writer_->Close())
+ .ThrowOnError();
+
+ TString expectedOutput =
+ "{"
+ "\"rows\":["
+ "{"
+ "\"value\":{"
+ "\"$type\":\"int64\","
+ "\"$value\":\"3\""
+ "}"
+ "}"
+ "],"
+ "\"incomplete_columns\":\"false\","
+ "\"incomplete_all_column_names\":\"false\","
+ "\"all_column_names\":["
+ "\"value\""
+ "]"
+ "}";
+
+ EXPECT_EQ(std::ssize(expectedOutput), Writer_->GetWrittenSize());
+ EXPECT_EQ(expectedOutput, OutputStream_.Str());
+}
+
+TEST_F(TWriterForWebJson, NotSkipRequestedSystemColumns)
+{
+ Config_->SkipSystemColumns = true;
+ Config_->ColumnNames = std::vector<std::string>{TabletIndexColumnName, ValueColumnName_};
+
+ CreateStandardWriter();
+
+ bool written = Writer_->Write({
+ MakeRow(NameTable_, {
+ {TString(TableIndexColumnName), 0},
+ {TString(RowIndexColumnName), 1},
+ {TString(TabletIndexColumnName), 2},
+ {ValueColumnName_, 3}
+ }).Get(),
+ });
+ EXPECT_TRUE(written);
+ WaitFor(Writer_->Close())
+ .ThrowOnError();
+
+ TString expectedOutput =
+ "{"
+ "\"rows\":["
+ "{"
+ "\"$$tablet_index\":{"
+ "\"$type\":\"int64\","
+ "\"$value\":\"2\""
+ "},"
+ "\"value\":{"
+ "\"$type\":\"int64\","
+ "\"$value\":\"3\""
+ "}"
+ "}"
+ "],"
+ "\"incomplete_columns\":\"false\","
+ "\"incomplete_all_column_names\":\"false\","
+ "\"all_column_names\":["
+ "\"$tablet_index\","
+ "\"value\""
+ "]"
+ "}";
+
+ EXPECT_EQ(std::ssize(expectedOutput), Writer_->GetWrittenSize());
+ EXPECT_EQ(expectedOutput, OutputStream_.Str());
+}
+
+TEST_F(TWriterForWebJson, SkipUnregisteredColumns)
+{
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row;
+ int keyDId = -1;
+ row.AddValue(MakeUnversionedBooleanValue(true, keyDId));
+ std::vector<TUnversionedRow> rows = {row.GetRow()};
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+
+ keyDId = NameTable_->RegisterName("column_d");
+
+ rows.clear();
+ row.Reset();
+ row.AddValue(MakeUnversionedBooleanValue(true, keyDId));
+ rows.push_back(row.GetRow());
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ YT_UNUSED_FUTURE(Writer_->Close());
+
+ TString expectedOutput =
+ "{"
+ "\"rows\":["
+ "{"
+ "},"
+ "{"
+ "\"column_d\":{"
+ "\"$type\":\"boolean\","
+ "\"$value\":\"true\""
+ "}"
+ "}"
+ "],"
+ "\"incomplete_columns\":\"false\","
+ "\"incomplete_all_column_names\":\"false\","
+ "\"all_column_names\":["
+ "\"column_d\""
+ "]"
+ "}";
+
+ EXPECT_EQ(std::ssize(expectedOutput), Writer_->GetWrittenSize());
+ EXPECT_EQ(expectedOutput, OutputStream_.Str());
+}
+
+TEST_F(TWriterForWebJson, SliceColumnsByName)
+{
+ Config_->ColumnNames = {
+ "column_b",
+ "column_c",
+ "$tablet_index"};
+ Config_->MaxSelectedColumnCount = 2;
+ Config_->SkipSystemColumns = false;
+
+ CreateStandardWriter();
+
+ bool written = Writer_->Write({
+ MakeRow(NameTable_, {
+ {"column_a", 100500u},
+ {"column_b", 0.42},
+ {"column_c", "abracadabra"},
+ {TString(TabletIndexColumnName), 10},
+ }).Get(),
+ });
+ EXPECT_TRUE(written);
+ WaitFor(Writer_->Close())
+ .ThrowOnError();
+ auto result = ParseJsonToNode(OutputStream_.Str());
+
+ TString expectedOutput =
+ "{"
+ "\"rows\":["
+ "{"
+ "\"column_b\":{"
+ "\"$type\":\"double\","
+ "\"$value\":\"0.42\""
+ "},"
+ "\"column_c\":{"
+ "\"$type\":\"string\","
+ "\"$value\":\"abracadabra\""
+ "},"
+ "\"$$tablet_index\":{"
+ "\"$type\":\"int64\","
+ "\"$value\":\"10\""
+ "}"
+ "}"
+ "],"
+ "\"incomplete_columns\":\"true\","
+ "\"incomplete_all_column_names\":\"false\","
+ "\"all_column_names\":["
+ "\"$tablet_index\","
+ "\"column_a\","
+ "\"column_b\","
+ "\"column_c\""
+ "]"
+ "}";
+
+ EXPECT_EQ(std::ssize(expectedOutput), Writer_->GetWrittenSize());
+ EXPECT_EQ(expectedOutput, OutputStream_.Str());
+}
+
+template <typename TValue>
+void CheckYqlValue(
+ const INodePtr& valueNode,
+ const TValue& expectedValue)
+{
+ using TDecayedValue = std::decay_t<TValue>;
+ if constexpr (std::is_convertible_v<TDecayedValue, TString>) {
+ ASSERT_EQ(valueNode->GetType(), ENodeType::String);
+ EXPECT_EQ(valueNode->GetValue<TString>(), expectedValue);
+ } else if constexpr (std::is_same_v<TDecayedValue, double>) {
+ ASSERT_EQ(valueNode->GetType(), ENodeType::String);
+ EXPECT_FLOAT_EQ(FromString<double>(valueNode->GetValue<TString>()), expectedValue);
+ } else if constexpr (std::is_same_v<TDecayedValue, bool>) {
+ ASSERT_EQ(valueNode->GetType(), ENodeType::Boolean);
+ EXPECT_EQ(valueNode->GetValue<bool>(), expectedValue);
+ } else if constexpr (std::is_same_v<TDecayedValue, INodePtr>) {
+ EXPECT_TRUE(AreNodesEqual(valueNode, expectedValue))
+ << "actualValueNode is " << ConvertToYsonString(valueNode, EYsonFormat::Pretty).AsStringBuf()
+ << "\nexpectedValue is " << ConvertToYsonString(expectedValue, EYsonFormat::Pretty).AsStringBuf();
+ } else {
+ static_assert(TDependentFalse<TDecayedValue>, "Type not allowed");
+ }
+}
+
+template <typename TType>
+void CheckYqlType(
+ const INodePtr& typeNode,
+ const TType& expectedType,
+ const std::vector<INodePtr>& yqlTypes)
+{
+ ASSERT_EQ(typeNode->GetType(), ENodeType::String);
+ auto typeIndexString = typeNode->GetValue<TString>();
+ auto typeIndex = FromString<int>(typeIndexString);
+ ASSERT_LT(typeIndex, static_cast<int>(yqlTypes.size()));
+ ASSERT_GE(typeIndex, 0);
+ const auto& yqlType = yqlTypes[typeIndex];
+ EXPECT_EQ(yqlType->GetType(), ENodeType::List);
+
+ auto expectedTypeNode = [&] () -> INodePtr {
+ using TDecayedType = std::decay_t<TType>;
+ if constexpr (std::is_convertible_v<TDecayedType, TString>) {
+ return ConvertToNode(TYsonString(TString(expectedType)));
+ } else if constexpr (std::is_same_v<TDecayedType, INodePtr>) {
+ return expectedType;
+ } else {
+ static_assert(TDependentFalse<TDecayedType>, "Type not allowed");
+ }
+ }();
+ EXPECT_TRUE(AreNodesEqual(yqlType, expectedTypeNode))
+ << "yqlType is " << ConvertToYsonString(yqlType, EYsonFormat::Pretty).AsStringBuf()
+ << "\nexpectedTypeNode is " << ConvertToYsonString(expectedTypeNode, EYsonFormat::Pretty).AsStringBuf();
+}
+
+template <typename TValue, typename TType>
+void CheckYqlTypeAndValue(
+ const INodePtr& row,
+ TStringBuf name,
+ const TType& expectedType,
+ const TValue& expectedValue,
+ const std::vector<INodePtr>& yqlTypes)
+{
+ ASSERT_EQ(row->GetType(), ENodeType::Map);
+ auto entry = row->AsMap()->FindChild(TString(name));
+ ASSERT_TRUE(entry);
+ ASSERT_EQ(entry->GetType(), ENodeType::List);
+ ASSERT_EQ(entry->AsList()->GetChildCount(), 2);
+ auto valueNode = entry->AsList()->GetChildOrThrow(0);
+ CheckYqlValue(valueNode, expectedValue);
+ auto typeNode = entry->AsList()->GetChildOrThrow(1);
+ CheckYqlType(typeNode, expectedType, yqlTypes);
+}
+
+#define CHECK_YQL_TYPE_AND_VALUE(row, name, expectedType, expectedValue, yqlTypes) \
+ do { \
+ SCOPED_TRACE(name); \
+ CheckYqlTypeAndValue(row, name, expectedType, expectedValue, yqlTypes); \
+ } while (0)
+
+TEST_F(TWriterForWebJson, YqlValueFormat_SimpleTypes)
+{
+ Config_->MaxAllColumnNamesCount = 2;
+ Config_->ValueFormat = EWebJsonValueFormat::Yql;
+
+ // We will emulate writing rows from two tables.
+ CreateStandardWriter(std::vector{New<TTableSchema>(), New<TTableSchema>()});
+
+ {
+ bool written = Writer_->Write({
+ MakeRow(NameTable_, {
+ {"column_a", 100500u},
+ {"column_b", true},
+ {"column_c", "row1_c"},
+ {TString(RowIndexColumnName), 0},
+ {TString(TableIndexColumnName), 0},
+ }).Get(),
+ MakeRow(NameTable_, {
+ {"column_c", "row2_c"},
+ {"column_b", "row2_b"},
+ {TString(RowIndexColumnName), 1},
+ {TString(TableIndexColumnName), 0},
+ }).Get(),
+ MakeRow(NameTable_, {
+ {"column_a", -100500},
+ {"column_b", EValueType::Any, "{x=2;y=3}"},
+ {"column_c", 2.71828},
+ {TString(RowIndexColumnName), 1},
+ }).Get(),
+ });
+ EXPECT_TRUE(written);
+ Writer_->Close().Get().ThrowOnError();
+ }
+
+ auto result = ParseJsonToNode(OutputStream_.Str());
+ ASSERT_EQ(result->GetType(), ENodeType::Map);
+
+ auto rows = result->AsMap()->FindChild("rows");
+ ASSERT_TRUE(rows);
+ auto incompleteColumns = result->AsMap()->FindChild("incomplete_columns");
+ ASSERT_TRUE(incompleteColumns);
+ auto incompleteAllColumnNames = result->AsMap()->FindChild("incomplete_all_column_names");
+ ASSERT_TRUE(incompleteAllColumnNames);
+ auto allColumnNames = result->AsMap()->FindChild("all_column_names");
+ ASSERT_TRUE(allColumnNames);
+ auto yqlTypeRegistry = result->AsMap()->FindChild("yql_type_registry");
+ ASSERT_TRUE(yqlTypeRegistry);
+
+ ASSERT_EQ(incompleteColumns->GetType(), ENodeType::String);
+ EXPECT_EQ(incompleteColumns->GetValue<TString>(), "false");
+
+ ASSERT_EQ(incompleteAllColumnNames->GetType(), ENodeType::String);
+ EXPECT_EQ(incompleteAllColumnNames->GetValue<TString>(), "true");
+
+ ASSERT_EQ(allColumnNames->GetType(), ENodeType::List);
+ std::vector<TString> allColumnNamesVector;
+ ASSERT_NO_THROW(allColumnNamesVector = ConvertTo<decltype(allColumnNamesVector)>(allColumnNames));
+ EXPECT_EQ(allColumnNamesVector, (std::vector<TString>{"column_a", "column_b"}));
+
+ ASSERT_EQ(yqlTypeRegistry->GetType(), ENodeType::List);
+ auto yqlTypes = ConvertTo<std::vector<INodePtr>>(yqlTypeRegistry);
+
+ ASSERT_EQ(rows->GetType(), ENodeType::List);
+ ASSERT_EQ(rows->AsList()->GetChildCount(), 3);
+
+ auto row1 = rows->AsList()->GetChildOrThrow(0);
+ auto row2 = rows->AsList()->GetChildOrThrow(1);
+ auto row3 = rows->AsList()->GetChildOrThrow(2);
+
+ ASSERT_EQ(row1->GetType(), ENodeType::Map);
+ EXPECT_EQ(row1->AsMap()->GetChildCount(), 3);
+ CHECK_YQL_TYPE_AND_VALUE(row1, "column_a", R"(["DataType"; "Uint64"])", "100500", yqlTypes);
+ CHECK_YQL_TYPE_AND_VALUE(row1, "column_b", R"(["DataType"; "Boolean"])", true, yqlTypes);
+ CHECK_YQL_TYPE_AND_VALUE(row1, "column_c", R"(["DataType"; "String"])", "row1_c", yqlTypes);
+
+ ASSERT_EQ(row2->GetType(), ENodeType::Map);
+ EXPECT_EQ(row2->AsMap()->GetChildCount(), 2);
+ CHECK_YQL_TYPE_AND_VALUE(row2, "column_b", R"(["DataType"; "String"])", "row2_b", yqlTypes);
+ CHECK_YQL_TYPE_AND_VALUE(row2, "column_c", R"(["DataType"; "String"])", "row2_c", yqlTypes);
+
+ ASSERT_EQ(row3->GetType(), ENodeType::Map);
+ EXPECT_EQ(row3->AsMap()->GetChildCount(), 3);
+ CHECK_YQL_TYPE_AND_VALUE(row3, "column_a", R"(["DataType"; "Int64"])", "-100500", yqlTypes);
+ auto row3BValue = ConvertToNode(TYsonString(TStringBuf(R"({
+ val = {
+ x = {
+ "$type" = "int64";
+ "$value" = "2";
+ };
+ y = {
+ "$type" = "int64";
+ "$value" = "3";
+ }
+ }
+ })")));
+ CHECK_YQL_TYPE_AND_VALUE(row3, "column_b", R"(["DataType"; "Yson"])", row3BValue, yqlTypes);
+ CHECK_YQL_TYPE_AND_VALUE(row3, "column_c", R"(["DataType"; "Double"])", 2.71828, yqlTypes);
+}
+
+TEST_F(TWriterForWebJson, ColumnNameEncoding)
+{
+ Config_->MaxAllColumnNamesCount = 2;
+ Config_->ValueFormat = EWebJsonValueFormat::Yql;
+
+ CreateStandardWriter();
+
+ {
+ bool written = Writer_->Write({
+ MakeRow(NameTable_, {
+ {"column_a", 100500u},
+ {"column_non_ascii_\xd0\x81", -100500},
+ }).Get()
+ });
+ EXPECT_TRUE(written);
+ Writer_->Close().Get().ThrowOnError();
+ }
+
+ auto result = ParseJsonToNode(OutputStream_.Str());
+ ASSERT_EQ(result->GetType(), ENodeType::Map);
+
+ auto rows = result->AsMap()->FindChild("rows");
+ ASSERT_TRUE(rows);
+ auto incompleteColumns = result->AsMap()->FindChild("incomplete_columns");
+ ASSERT_TRUE(incompleteColumns);
+ auto incompleteAllColumnNames = result->AsMap()->FindChild("incomplete_all_column_names");
+ ASSERT_TRUE(incompleteAllColumnNames);
+ auto allColumnNames = result->AsMap()->FindChild("all_column_names");
+ ASSERT_TRUE(allColumnNames);
+ auto yqlTypeRegistry = result->AsMap()->FindChild("yql_type_registry");
+ ASSERT_TRUE(yqlTypeRegistry);
+
+ ASSERT_EQ(allColumnNames->GetType(), ENodeType::List);
+ std::vector<TString> allColumnNamesVector;
+ ASSERT_NO_THROW(allColumnNamesVector = ConvertTo<decltype(allColumnNamesVector)>(allColumnNames));
+ EXPECT_EQ(allColumnNamesVector, (std::vector<TString>{"column_a", "column_non_ascii_\xc3\x90\xc2\x81"}));
+
+ ASSERT_EQ(yqlTypeRegistry->GetType(), ENodeType::List);
+ auto yqlTypes = ConvertTo<std::vector<INodePtr>>(yqlTypeRegistry);
+
+ ASSERT_EQ(rows->GetType(), ENodeType::List);
+ ASSERT_EQ(rows->AsList()->GetChildCount(), 1);
+
+ auto row1 = rows->AsList()->GetChildOrThrow(0);
+
+ ASSERT_EQ(row1->GetType(), ENodeType::Map);
+ EXPECT_EQ(row1->AsMap()->GetChildCount(), 2);
+ CHECK_YQL_TYPE_AND_VALUE(row1, "column_a", R"(["DataType"; "Uint64"])", "100500", yqlTypes);
+ CHECK_YQL_TYPE_AND_VALUE(row1, "column_non_ascii_\xc3\x90\xc2\x81", R"(["DataType"; "Int64"])", "-100500", yqlTypes);
+}
+
+TEST_F(TWriterForWebJson, YqlValueFormat_ComplexTypes)
+{
+ Config_->ValueFormat = EWebJsonValueFormat::Yql;
+
+ auto firstSchema = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"column_a", OptionalLogicalType(
+ ListLogicalType(MakeLogicalType(ESimpleLogicalValueType::Int64, true)))},
+ {"column_b", StructLogicalType({
+ {"key", MakeLogicalType(ESimpleLogicalValueType::String, true)},
+ {"value", MakeLogicalType(ESimpleLogicalValueType::String, true)},
+ {"variant_tuple", VariantTupleLogicalType({
+ MakeLogicalType(ESimpleLogicalValueType::Int8, true),
+ MakeLogicalType(ESimpleLogicalValueType::Boolean, false),
+ })},
+ {"variant_struct", VariantStructLogicalType({
+ {"a", MakeLogicalType(ESimpleLogicalValueType::Int8, true)},
+ {"b", MakeLogicalType(ESimpleLogicalValueType::Boolean, false)},
+ })},
+ {"dict", DictLogicalType(
+ SimpleLogicalType(ESimpleLogicalValueType::Int64),
+ SimpleLogicalType(ESimpleLogicalValueType::String)),
+ },
+ {"tagged", TaggedLogicalType(
+ "MyTag",
+ SimpleLogicalType(ESimpleLogicalValueType::Int64)),
+ },
+ {"timestamp", SimpleLogicalType(ESimpleLogicalValueType::Timestamp)},
+ {"date", SimpleLogicalType(ESimpleLogicalValueType::Date)},
+ {"datetime", SimpleLogicalType(ESimpleLogicalValueType::Datetime)},
+ {"interval", SimpleLogicalType(ESimpleLogicalValueType::Interval)},
+ {"date32", SimpleLogicalType(ESimpleLogicalValueType::Date32)},
+ {"datetime64", SimpleLogicalType(ESimpleLogicalValueType::Datetime64)},
+ {"timestamp64", SimpleLogicalType(ESimpleLogicalValueType::Timestamp64)},
+ {"interval64", SimpleLogicalType(ESimpleLogicalValueType::Interval64)},
+ {"json", SimpleLogicalType(ESimpleLogicalValueType::Json)},
+ {"float", SimpleLogicalType(ESimpleLogicalValueType::Float)},
+ })},
+ {"column_c", ListLogicalType(StructLogicalType({
+ {"very_optional_key", OptionalLogicalType(MakeLogicalType(ESimpleLogicalValueType::String, false))},
+ {"optional_value", MakeLogicalType(ESimpleLogicalValueType::String, false)},
+ }))},
+ });
+
+ auto secondSchema = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"column_a", VariantTupleLogicalType({
+ SimpleLogicalType(ESimpleLogicalValueType::Null),
+ SimpleLogicalType(ESimpleLogicalValueType::Any),
+ })},
+ {"column_b", SimpleLogicalType(ESimpleLogicalValueType::Null)},
+ {"column_c", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Null))},
+ {"column_d", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
+ });
+
+ auto firstColumnAType = ConvertToNode(TYsonString(TStringBuf(R"([
+ "OptionalType";
+ [
+ "ListType";
+ ["DataType"; "Int64"]
+ ]
+ ])")));
+ auto firstColumnBType = ConvertToNode(TYsonString(TStringBuf(R"([
+ "StructType";
+ [
+ [
+ "key";
+ ["DataType"; "String"]
+ ];
+ [
+ "value";
+ ["DataType"; "String"]
+ ];
+ [
+ "variant_tuple";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ ["DataType"; "Int8"];
+ [
+ "OptionalType";
+ ["DataType"; "Boolean"]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "variant_struct";
+ [
+ "VariantType";
+ [
+ "StructType";
+ [
+ [
+ "a";
+ ["DataType"; "Int8"]
+ ];
+ [
+ "b";
+ [
+ "OptionalType";
+ ["DataType"; "Boolean"]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "dict";
+ [
+ "DictType";
+ ["DataType"; "Int64"];
+ ["DataType"; "String"]
+ ]
+ ];
+ [
+ "tagged";
+ [
+ "TaggedType";
+ "MyTag";
+ ["DataType"; "Int64"]
+ ]
+ ];
+ [
+ "timestamp";
+ ["DataType"; "Timestamp"]
+ ];
+ [
+ "date";
+ ["DataType"; "Date"]
+ ];
+ [
+ "datetime";
+ ["DataType"; "Datetime"]
+ ];
+ [
+ "interval";
+ ["DataType"; "Interval"]
+ ];
+ [
+ "date32";
+ ["DataType"; "Date32"]
+ ];
+ [
+ "datetime64";
+ ["DataType"; "Datetime64"]
+ ];
+ [
+ "timestamp64";
+ ["DataType"; "Timestamp64"]
+ ];
+ [
+ "interval64";
+ ["DataType"; "Interval64"]
+ ];
+ [
+ "json";
+ ["DataType"; "Json"]
+ ];
+ [
+ "float";
+ ["DataType"; "Float"]
+ ];
+ ]
+ ])")));
+ auto firstColumnCType = ConvertToNode(TYsonString(TStringBuf(R"([
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "very_optional_key";
+ [
+ "OptionalType";
+ [
+ "OptionalType";
+ ["DataType"; "String"]
+ ]
+ ]
+ ];
+ [
+ "optional_value";
+ [
+ "OptionalType";
+ ["DataType"; "String"]
+ ]
+ ]
+ ]
+ ]
+ ])")));
+ auto secondColumnAType = ConvertToNode(TYsonString(TStringBuf(R"([
+ "VariantType";
+ [
+ "TupleType";
+ [
+ ["NullType"];
+ ["DataType"; "Yson"];
+ ]
+ ]
+ ])")));
+ auto secondColumnBType = ConvertToNode(TYsonString(TStringBuf(R"(["NullType"])")));
+ auto secondColumnCType = ConvertToNode(TYsonString(TStringBuf(R"([
+ "OptionalType";
+ [
+ "NullType";
+ ]
+ ])")));
+ auto secondColumnDType = ConvertToNode(TYsonString(TStringBuf(R"([
+ "OptionalType";
+ ["DataType"; "Int64"]
+ ])")));
+
+ CreateStandardWriter(std::vector{firstSchema, secondSchema});
+ {
+ bool written = Writer_->Write({
+ MakeRow(NameTable_, {
+ {"column_a", EValueType::Composite, R"([-1; -2; -5])"},
+ {
+ "column_b",
+ EValueType::Composite,
+ R"([
+ "key";
+ "value";
+ [0; 7];
+ [1; #];
+ [[1; "a"]; [2; "b"]];
+ 99;
+ 100u;
+ 101u;
+ 102u;
+ 103;
+ -42;
+ 42;
+ -42;
+ -1;
+ "[\"a\", {\"b\": 42}]";
+ -3.25;
+ ])",
+ },
+ {"column_c", EValueType::Composite, R"([[[#]; "value"]; [["key"]; #]])"},
+ {"column_d", -49},
+ {TString(TableIndexColumnName), 0},
+ {TString(RowIndexColumnName), 0},
+ }).Get(),
+ MakeRow(NameTable_, {
+ {"column_a", EValueType::Composite, R"([0; -2; -5; 177])"},
+ {
+ "column_b",
+ EValueType::Composite,
+ R"([
+ "key1";
+ "value1";
+ [1; %false];
+ [1; #];
+ [];
+ 199;
+ 0u;
+ 1101u;
+ 1102u;
+ 1103;
+ 123;
+ -123;
+ 123;
+ 123;
+ "null";
+ 0.0;
+ ])",
+ },
+ {"column_c", EValueType::Composite, R"([[#; #]; [["key1"]; #]])"},
+ {"column_d", 49u},
+ {TString(RowIndexColumnName), 1},
+ }).Get(),
+ MakeRow(NameTable_, {
+ {"column_a", EValueType::Composite, "[]"},
+ {
+ "column_b",
+ EValueType::Composite,
+ R"([
+ "key2";
+ "value2";
+ [0; 127];
+ [1; %true];
+ [[0; ""]];
+ 399;
+ 30u;
+ 3101u;
+ 3202u;
+ 3103;
+ -53375809;
+ -4611669897600;
+ -4611669897600000000;
+ -9223339708799999999;
+ "{\"x\": false}";
+ 1e10;
+ ])"
+ },
+ {"column_c", EValueType::Composite, "[[[key]; #]]"},
+ {"column_d", "49"},
+ {TString(RowIndexColumnName), 2},
+ }).Get(),
+
+ MakeRow(NameTable_, {
+ {"column_a", nullptr},
+ {
+ "column_b",
+ EValueType::Composite,
+ // First string is valid UTF-8, the second one should be Base64 encoded.
+ "["
+ "\"\xC3\xBF\";"
+ "\"\xFA\xFB\xFC\xFD\";"
+ R"(
+ [0; 127];
+ [1; %true];
+ [[-1; "-1"]; [0; ""]];
+ 499;
+ 40u;
+ 4101u;
+ 4202u;
+ 4103;
+ 53375807;
+ 4611669811199;
+ 4611669811199999999;
+ 9223339708799999999;
+ "{}";
+ -2.125;
+ ])",
+ },
+ {"column_c", EValueType::Composite, "[]"},
+ {"column_d", EValueType::Any, "{x=49}"},
+ {TString(RowIndexColumnName), 3},
+ }).Get(),
+
+ // Here come rows from the second table.
+ MakeRow(NameTable_, {
+ {"column_a", EValueType::Composite, "[0; #]"},
+ {"column_b", nullptr},
+ {"column_c", nullptr},
+ {"column_d", -49},
+ {TString(TableIndexColumnName), 1},
+ {TString(RowIndexColumnName), 0},
+ }).Get(),
+
+ MakeRow(NameTable_, {
+ {"column_a", EValueType::Composite, "[1; {z=z}]"},
+ {"column_b", nullptr},
+ {"column_c", EValueType::Composite, "[#]"},
+ {"column_d", nullptr},
+ {TString(TableIndexColumnName), 1},
+ {TString(RowIndexColumnName), 1},
+ }).Get(),
+ });
+ EXPECT_TRUE(written);
+ Writer_->Close().Get().ThrowOnError();
+ }
+
+ auto result = ParseJsonToNode(OutputStream_.Str());
+ ASSERT_EQ(result->GetType(), ENodeType::Map);
+
+ auto rows = result->AsMap()->FindChild("rows");
+ ASSERT_TRUE(rows);
+ auto incompleteColumns = result->AsMap()->FindChild("incomplete_columns");
+ ASSERT_TRUE(incompleteColumns);
+ auto incompleteAllColumnNames = result->AsMap()->FindChild("incomplete_all_column_names");
+ ASSERT_TRUE(incompleteAllColumnNames);
+ auto allColumnNames = result->AsMap()->FindChild("all_column_names");
+ ASSERT_TRUE(allColumnNames);
+ auto yqlTypeRegistry = result->AsMap()->FindChild("yql_type_registry");
+ ASSERT_TRUE(yqlTypeRegistry);
+
+ ASSERT_EQ(incompleteColumns->GetType(), ENodeType::String);
+ EXPECT_EQ(incompleteColumns->GetValue<TString>(), "false");
+
+ ASSERT_EQ(incompleteAllColumnNames->GetType(), ENodeType::String);
+ EXPECT_EQ(incompleteAllColumnNames->GetValue<TString>(), "false");
+
+ ASSERT_EQ(allColumnNames->GetType(), ENodeType::List);
+ std::vector<TString> allColumnNamesVector;
+ ASSERT_NO_THROW(allColumnNamesVector = ConvertTo<decltype(allColumnNamesVector)>(allColumnNames));
+ EXPECT_EQ(allColumnNamesVector, (std::vector<TString>{"column_a", "column_b", "column_c", "column_d"}));
+
+ ASSERT_EQ(yqlTypeRegistry->GetType(), ENodeType::List);
+ auto yqlTypes = ConvertTo<std::vector<INodePtr>>(yqlTypeRegistry);
+
+ ASSERT_EQ(rows->GetType(), ENodeType::List);
+ ASSERT_EQ(rows->AsList()->GetChildCount(), 6);
+
+ auto row1 = rows->AsList()->GetChildOrThrow(0);
+ auto row2 = rows->AsList()->GetChildOrThrow(1);
+ auto row3 = rows->AsList()->GetChildOrThrow(2);
+ auto row4 = rows->AsList()->GetChildOrThrow(3);
+ auto row5 = rows->AsList()->GetChildOrThrow(4);
+ auto row6 = rows->AsList()->GetChildOrThrow(5);
+
+ ASSERT_EQ(row1->GetType(), ENodeType::Map);
+ EXPECT_EQ(row1->AsMap()->GetChildCount(), 4);
+ auto row1AValue = ConvertToNode(TYsonString(TStringBuf(R"([{"val"=["-1"; "-2"; "-5"]}])")));
+ CHECK_YQL_TYPE_AND_VALUE(row1, "column_a", firstColumnAType, row1AValue, yqlTypes);
+ auto row1BValue = ConvertToNode(TYsonString(TStringBuf(
+ R"([
+ "key";
+ "value";
+ ["0"; "7"];
+ ["1"; #];
+ {"val"=[["1"; "a"]; ["2"; "b"]]};
+ "99";
+ "100";
+ "101";
+ "102";
+ "103";
+ "-42";
+ "42";
+ "-42";
+ "-1";
+ "[\"a\", {\"b\": 42}]";
+ "-3.25";
+ ])")));
+ CHECK_YQL_TYPE_AND_VALUE(row1, "column_b", firstColumnBType, row1BValue, yqlTypes);
+ auto row1CValue = ConvertToNode(TYsonString(TStringBuf(R"({
+ "val"=[
+ [[#]; ["value"]];
+ [[["key"]]; #]
+ ]
+ })")));
+ CHECK_YQL_TYPE_AND_VALUE(row1, "column_c", firstColumnCType, row1CValue, yqlTypes);
+ CHECK_YQL_TYPE_AND_VALUE(row1, "column_d", R"(["DataType"; "Int64"])", "-49", yqlTypes);
+
+ ASSERT_EQ(row2->GetType(), ENodeType::Map);
+ EXPECT_EQ(row2->AsMap()->GetChildCount(), 4);
+ auto row2AValue = ConvertToNode(TYsonString(TStringBuf(R"([{"val"=["0"; "-2"; "-5"; "177"]}])")));
+ CHECK_YQL_TYPE_AND_VALUE(row2, "column_a", firstColumnAType, row2AValue, yqlTypes);
+ auto row2BValue = ConvertToNode(TYsonString(TStringBuf(
+ R"([
+ "key1";
+ "value1";
+ ["1"; [%false]];
+ ["1"; #];
+ {"val"=[]};
+ "199";
+ "0";
+ "1101";
+ "1102";
+ "1103";
+ "123";
+ "-123";
+ "123";
+ "123";
+ "null";
+ "0";
+ ])")));
+ CHECK_YQL_TYPE_AND_VALUE(row2, "column_b", firstColumnBType, row2BValue, yqlTypes);
+ auto row2CValue = ConvertToNode(TYsonString(TStringBuf(R"({
+ "val"=[
+ [#; #];
+ [[["key1"]]; #]
+ ]
+ })")));
+ CHECK_YQL_TYPE_AND_VALUE(row2, "column_c", firstColumnCType, row2CValue, yqlTypes);
+ CHECK_YQL_TYPE_AND_VALUE(row2, "column_d", R"(["DataType"; "Uint64"])", "49", yqlTypes);
+
+ ASSERT_EQ(row3->GetType(), ENodeType::Map);
+ EXPECT_EQ(row3->AsMap()->GetChildCount(), 4);
+ auto row3AValue = ConvertToNode(TYsonString(TStringBuf(R"([{"val"=[]}])")));
+ CHECK_YQL_TYPE_AND_VALUE(row3, "column_a", firstColumnAType, row3AValue, yqlTypes);
+ auto row3BValue = ConvertToNode(TYsonString(TStringBuf(
+ R"([
+ "key2";
+ "value2";
+ ["0"; "127"];
+ ["1"; [%true]];
+ {"val"=[["0"; ""]]};
+ "399";
+ "30";
+ "3101";
+ "3202";
+ "3103";
+ "-53375809";
+ "-4611669897600";
+ "-4611669897600000000";
+ "-9223339708799999999";
+ "{\"x\": false}";
+ "10000000000";
+ ])")));
+ CHECK_YQL_TYPE_AND_VALUE(row3, "column_b", firstColumnBType, row3BValue, yqlTypes);
+ auto row3CValue = ConvertToNode(TYsonString(TStringBuf(R"({
+ "val"=[
+ [[["key"]]; #]
+ ]
+ })")));
+ CHECK_YQL_TYPE_AND_VALUE(row3, "column_c", firstColumnCType, row3CValue, yqlTypes);
+ CHECK_YQL_TYPE_AND_VALUE(row3, "column_d", R"(["DataType"; "String"])", "49", yqlTypes);
+
+ ASSERT_EQ(row4->GetType(), ENodeType::Map);
+ EXPECT_EQ(row4->AsMap()->GetChildCount(), 4);
+ auto row4AValue = ConvertToNode(TYsonString(TStringBuf(R"(#)")));
+ CHECK_YQL_TYPE_AND_VALUE(row4, "column_a", firstColumnAType, row4AValue, yqlTypes);
+
+ auto row4BValue = ConvertToNode(TYsonString(TStringBuf(
+ "["
+ "\"\xC3\xBF\";"
+ R"(
+ {"b64" = %true; "val" = "+vv8/Q=="};
+ ["0"; "127"];
+ ["1"; [%true]];
+ {"val"=[["-1"; "-1"]; ["0"; ""]]};
+ "499";
+ "40";
+ "4101";
+ "4202";
+ "4103";
+ "53375807";
+ "4611669811199";
+ "4611669811199999999";
+ "9223339708799999999";
+ "{}";
+ "-2.125";
+ ])")));
+ CHECK_YQL_TYPE_AND_VALUE(row4, "column_b", firstColumnBType, row4BValue, yqlTypes);
+
+ auto row4CValue = ConvertToNode(TYsonString(TStringBuf(R"({"val"=[]})")));
+ CHECK_YQL_TYPE_AND_VALUE(row4, "column_c", firstColumnCType, row4CValue, yqlTypes);
+ auto row4DValue = ConvertToNode(TYsonString(TStringBuf(R"({
+ val = {
+ x = {
+ "$type" = "int64";
+ "$value" = "49";
+ }
+ }
+ })")));
+ CHECK_YQL_TYPE_AND_VALUE(row4, "column_d", R"(["DataType"; "Yson"])", row4DValue, yqlTypes);
+
+ // Here must come rows from the second table.
+
+ ASSERT_EQ(row5->GetType(), ENodeType::Map);
+ EXPECT_EQ(row5->AsMap()->GetChildCount(), 4);
+ auto row5AValue = ConvertToNode(TYsonString(TStringBuf(R"(["0"; #])")));
+ CHECK_YQL_TYPE_AND_VALUE(row5, "column_a", secondColumnAType, row5AValue, yqlTypes);
+ auto row5BValue = ConvertToNode(TYsonString(TStringBuf(R"(#)")));
+ CHECK_YQL_TYPE_AND_VALUE(row5, "column_b", secondColumnBType, row5BValue, yqlTypes);
+ auto row5CValue = ConvertToNode(TYsonString(TStringBuf(R"(#)")));
+ CHECK_YQL_TYPE_AND_VALUE(row5, "column_c", secondColumnCType, row5CValue, yqlTypes);
+ auto row5DValue = ConvertToNode(TYsonString(TStringBuf(R"(["-49"])")));
+ CHECK_YQL_TYPE_AND_VALUE(row5, "column_d", secondColumnDType, row5DValue, yqlTypes);
+
+ ASSERT_EQ(row6->GetType(), ENodeType::Map);
+ EXPECT_EQ(row6->AsMap()->GetChildCount(), 4);
+ auto row6AValue = ConvertToNode(TYsonString(TStringBuf(R"([
+ "1";
+ {
+ val = {
+ z = {
+ "$type" = "string";
+ "$value" = "z";
+ }
+ }
+ };
+ ])")));
+ CHECK_YQL_TYPE_AND_VALUE(row6, "column_a", secondColumnAType, row6AValue, yqlTypes);
+ auto row6BValue = ConvertToNode(TYsonString(TStringBuf(R"(#)")));
+ CHECK_YQL_TYPE_AND_VALUE(row6, "column_b", secondColumnBType, row6BValue, yqlTypes);
+ auto row6CValue = ConvertToNode(TYsonString(TStringBuf(R"([#])")));
+ CHECK_YQL_TYPE_AND_VALUE(row6, "column_c", secondColumnCType, row6CValue, yqlTypes);
+ auto row6DValue = ConvertToNode(TYsonString(TStringBuf(R"(#)")));
+ CHECK_YQL_TYPE_AND_VALUE(row6, "column_d", secondColumnDType, row6DValue, yqlTypes);
+}
+
+TEST_F(TWriterForWebJson, YqlValueFormat_Incomplete)
+{
+ Config_->ValueFormat = EWebJsonValueFormat::Yql;
+ Config_->FieldWeightLimit = 215;
+ Config_->StringWeightLimit = 10;
+
+ auto schema = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"column_a", StructLogicalType({
+ {"field1", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
+ {"list", ListLogicalType(
+ VariantStructLogicalType({
+ {"a", DictLogicalType(
+ SimpleLogicalType(ESimpleLogicalValueType::Int64),
+ SimpleLogicalType(ESimpleLogicalValueType::String)),
+ },
+ {"b", SimpleLogicalType(ESimpleLogicalValueType::Any)},
+ })),
+ },
+ {"field2", SimpleLogicalType(ESimpleLogicalValueType::String)},
+ {"field3", MakeLogicalType(ESimpleLogicalValueType::Int64, false)},
+ })},
+ {"column_b", SimpleLogicalType(ESimpleLogicalValueType::Any)},
+ {"column_c", MakeLogicalType(ESimpleLogicalValueType::String, false)},
+ });
+
+ auto yqlTypeA = ConvertToNode(TYsonString(TStringBuf(R"([
+ "StructType";
+ [
+ [
+ "field1";
+ ["DataType"; "Int64"]
+ ];
+ [
+ "list";
+ [
+ "ListType";
+ [
+ "VariantType";
+ [
+ "StructType";
+ [
+ [
+ "a";
+ [
+ "DictType";
+ ["DataType"; "Int64"];
+ ["DataType"; "String"]
+ ]
+ ];
+ [
+ "b";
+ ["DataType"; "Yson"]
+ ];
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "field2";
+ ["DataType"; "String"]
+ ];
+ [
+ "field3";
+ [
+ "OptionalType";
+ ["DataType"; "Int64"]
+ ]
+ ];
+ ]
+ ])")));
+
+ auto yqlTypeB = ConvertToNode(TYsonString(TStringBuf(R"(["DataType"; "Yson"])")));
+ auto yqlTypeC = ConvertToNode(TYsonString(TStringBuf(R"(["OptionalType"; ["DataType"; "String"]])")));
+ {
+ CreateStandardWriter({schema});
+ bool written = Writer_->Write({
+ MakeRow(NameTable_, {
+ {
+ "column_a",
+ EValueType::Composite,
+ R"([
+ -1;
+ [
+ [
+ 0;
+ [
+ [-2; "UTF:)" + TString("\xF0\x90\x8D\x88") + "\xF0\x90\x8D\x88" + R"("];
+ [2; "!UTF:)" + TString("\xFA\xFB\xFC\xFD\xFA\xFB\xFC\xFD") + R"("];
+ [0; ""];
+ ]
+ ];
+ [
+ 1;
+ "{kinda_long_key = kinda_even_longer_value}"
+ ];
+ [
+ 0;
+ [
+ [0; "One more quite long string"];
+ [1; "One more quite long string"];
+ [2; "One more quite long string"];
+ [3; "One more quite long string"];
+ [4; "One more quite long string"];
+ [5; "One more quite long string"];
+ ]
+ ];
+ [
+ 1;
+ "{kinda_long_key = kinda_even_longer_value}"
+ ];
+ ];
+ "I'm short";
+ 424242238133245
+ ])"
+ },
+ {"column_b", EValueType::Any, "{kinda_long_key = kinda_even_longer_value}"},
+ {"column_c", "One more quite long string"},
+ }).Get(),
+ });
+ EXPECT_TRUE(written);
+ Writer_->Close().Get().ThrowOnError();
+ }
+
+ auto result = ParseJsonToNode(OutputStream_.Str());
+ ASSERT_EQ(result->GetType(), ENodeType::Map);
+
+ auto rows = result->AsMap()->FindChild("rows");
+ ASSERT_TRUE(rows);
+ auto yqlTypeRegistry = result->AsMap()->FindChild("yql_type_registry");
+ ASSERT_TRUE(yqlTypeRegistry);
+
+ ASSERT_EQ(yqlTypeRegistry->GetType(), ENodeType::List);
+ auto yqlTypes = ConvertTo<std::vector<INodePtr>>(yqlTypeRegistry);
+
+ ASSERT_EQ(rows->GetType(), ENodeType::List);
+ ASSERT_EQ(rows->AsList()->GetChildCount(), 1);
+
+ auto row = rows->AsList()->GetChildOrThrow(0);
+ ASSERT_EQ(row->GetType(), ENodeType::Map);
+ EXPECT_EQ(row->AsMap()->GetChildCount(), 3);
+
+ auto rowAValue = ConvertToNode(TYsonString(R"([
+ "-1";
+ {
+ "inc" = %true;
+ "val" = [
+ [
+ "0";
+ {
+ "val" = [
+ ["-2"; {"inc"=%true; "val"="UTF:)" + TString("\xF0\x90\x8D\x88") + R"("}];
+ ["2"; {"inc"=%true; "b64"=%true; "val"="IVVURjr6"}];
+ ["0"; ""];
+ ]
+ }
+ ];
+ [
+ "1";
+ {"val"=""; "inc"=%true}
+ ];
+ [
+ "0";
+ {
+ "inc" = %true;
+ "val" = [
+ ["0"; {"val"="One more q"; "inc"=%true}];
+ ["1"; {"val"="One more "; "inc"=%true}];
+ ];
+ }
+ ];
+ ];
+ };
+ {
+ "val" = "";
+ "inc" = %true;
+ };
+ ["424242238133245"];
+ ])"));
+ CHECK_YQL_TYPE_AND_VALUE(row, "column_a", yqlTypeA, rowAValue, yqlTypes);
+
+ // Simple values are not truncated to |StringWeightLimit|
+ auto rowBValue = ConvertToNode(TYsonString(TStringBuf(R"({
+ val = {
+ kinda_long_key = {
+ "$type" = "string";
+ "$value" = kinda_even_longer_value;
+ }
+ }
+ })")));
+ CHECK_YQL_TYPE_AND_VALUE(row, "column_b", yqlTypeB, rowBValue, yqlTypes);
+ auto rowCValue = ConvertToNode(TYsonString(TStringBuf(R"(["One more quite long string"])")));
+ CHECK_YQL_TYPE_AND_VALUE(row, "column_c", yqlTypeC, rowCValue, yqlTypes);
+}
+
+
+TEST_F(TWriterForWebJson, YqlValueFormat_Any)
+{
+ Config_->ValueFormat = EWebJsonValueFormat::Yql;
+
+ auto schema = New<TTableSchema>(std::vector<TColumnSchema>{
+ {"column_a", MakeLogicalType(ESimpleLogicalValueType::Any, false)},
+ });
+
+ auto yqlTypeA = ConvertToNode(TYsonString(TStringBuf(R"([
+ "OptionalType";
+ ["DataType"; "Yson"]
+ ])")));
+
+ CreateStandardWriter({schema});
+ {
+ bool written = Writer_->Write({
+ MakeRow(NameTable_, {{"column_a", EValueType::Any, "{x=y;z=2}"}}).Get(),
+ MakeRow(NameTable_, {{"column_a", true}}).Get(),
+ MakeRow(NameTable_, {{"column_a", -42}}).Get(),
+ MakeRow(NameTable_, {{"column_a", 42u}}).Get(),
+ });
+ EXPECT_TRUE(written);
+ Writer_->Close().Get().ThrowOnError();
+ }
+
+ auto result = ParseJsonToNode(OutputStream_.Str());
+ ASSERT_EQ(result->GetType(), ENodeType::Map);
+
+ auto rows = result->AsMap()->FindChild("rows");
+ ASSERT_TRUE(rows);
+ auto yqlTypeRegistry = result->AsMap()->FindChild("yql_type_registry");
+ ASSERT_TRUE(yqlTypeRegistry);
+
+ ASSERT_EQ(yqlTypeRegistry->GetType(), ENodeType::List);
+ auto yqlTypes = ConvertTo<std::vector<INodePtr>>(yqlTypeRegistry);
+
+ ASSERT_EQ(rows->GetType(), ENodeType::List);
+ ASSERT_EQ(rows->AsList()->GetChildCount(), 4);
+
+ {
+ auto row = rows->AsList()->GetChildOrThrow(0);
+ ASSERT_EQ(row->GetType(), ENodeType::Map);
+ auto rowAValue = ConvertToNode(TYsonString(TStringBuf(R"([
+ {
+ val = {
+ x = {
+ "$type" = "string";
+ "$value" = "y";
+ };
+ z = {
+ "$type" = "int64";
+ "$value" = "2";
+ }
+ }
+ }
+ ])")));
+ CHECK_YQL_TYPE_AND_VALUE(row, "column_a", yqlTypeA, rowAValue, yqlTypes);
+ }
+ {
+ auto row = rows->AsList()->GetChildOrThrow(1);
+ ASSERT_EQ(row->GetType(), ENodeType::Map);
+ auto rowAValue = ConvertToNode(TYsonString(TStringBuf(R"([
+ {
+ val = {
+ "$type" = "boolean";
+ "$value" = "true";
+ }
+ }
+ ])")));
+ CHECK_YQL_TYPE_AND_VALUE(row, "column_a", yqlTypeA, rowAValue, yqlTypes);
+ }
+ {
+ auto row = rows->AsList()->GetChildOrThrow(2);
+ ASSERT_EQ(row->GetType(), ENodeType::Map);
+ auto rowAValue = ConvertToNode(TYsonString(TStringBuf(R"([
+ {
+ val = {
+ "$type" = "int64";
+ "$value" = "-42";
+ }
+ }
+ ])")));
+ CHECK_YQL_TYPE_AND_VALUE(row, "column_a", yqlTypeA, rowAValue, yqlTypes);
+ }
+ {
+ auto row = rows->AsList()->GetChildOrThrow(3);
+ ASSERT_EQ(row->GetType(), ENodeType::Map);
+ auto rowAValue = ConvertToNode(TYsonString(TStringBuf(R"([
+ {
+ val = {
+ "$type" = "uint64";
+ "$value" = "42";
+ }
+ }
+ ])")));
+ CHECK_YQL_TYPE_AND_VALUE(row, "column_a", yqlTypeA, rowAValue, yqlTypes);
+ }
+}
+
+TEST_F(TWriterForWebJson, YqlValueFormat_CompositeNoSchema)
+{
+ Config_->ValueFormat = EWebJsonValueFormat::Yql;
+
+ auto schema = New<TTableSchema>();
+
+ auto yqlTypeA = ConvertToNode(TYsonString(TStringBuf(R"(["DataType"; "Yson"])")));
+
+ CreateStandardWriter({schema});
+ {
+ bool written = Writer_->Write({
+ MakeRow(NameTable_, {{"column_a", EValueType::Composite, "[1;2]"}}).Get(),
+ });
+ EXPECT_TRUE(written);
+ Writer_->Close().Get().ThrowOnError();
+ }
+
+ auto result = ParseJsonToNode(OutputStream_.Str());
+ ASSERT_EQ(result->GetType(), ENodeType::Map);
+
+ auto rows = result->AsMap()->FindChild("rows");
+ ASSERT_TRUE(rows);
+ auto yqlTypeRegistry = result->AsMap()->FindChild("yql_type_registry");
+ ASSERT_TRUE(yqlTypeRegistry);
+
+ ASSERT_EQ(yqlTypeRegistry->GetType(), ENodeType::List);
+ auto yqlTypes = ConvertTo<std::vector<INodePtr>>(yqlTypeRegistry);
+
+ ASSERT_EQ(rows->GetType(), ENodeType::List);
+ ASSERT_EQ(rows->AsList()->GetChildCount(), 1);
+
+ {
+ auto row = rows->AsList()->GetChildOrThrow(0);
+ ASSERT_EQ(row->GetType(), ENodeType::Map);
+ auto rowAValue = ConvertToNode(TYsonString(TStringBuf(R"({
+ "val" = [
+ {
+ "$type" = "int64";
+ "$value" = "1";
+ };
+ {
+ "$type" = "int64";
+ "$value" = "2";
+ }
+ ]
+ })")));
+ CHECK_YQL_TYPE_AND_VALUE(row, "column_a", yqlTypeA, rowAValue, yqlTypes);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT::NFormats
diff --git a/yt/yt/library/formats/unittests/ya.make b/yt/yt/library/formats/unittests/ya.make
new file mode 100644
index 0000000000..f080e66dc7
--- /dev/null
+++ b/yt/yt/library/formats/unittests/ya.make
@@ -0,0 +1,53 @@
+GTEST(unittester-formats)
+
+INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
+
+PROTO_NAMESPACE(yt)
+
+SRCS(
+ protobuf_format_ut.proto
+
+ arrow_parser_ut.cpp
+ dsv_parser_ut.cpp
+ dsv_writer_ut.cpp
+ protobuf_format_ut.cpp
+ row_helpers.cpp
+ schemaful_dsv_parser_ut.cpp
+ schemaful_dsv_writer_ut.cpp
+ skiff_format_ut.cpp
+ skiff_yson_converter_ut.cpp
+ value_examples.cpp
+ web_json_writer_ut.cpp
+ yamred_dsv_parser_ut.cpp
+ yamred_dsv_writer_ut.cpp
+ yaml_parser_ut.cpp
+ yaml_writer_ut.cpp
+ yamr_parser_ut.cpp
+ yamr_writer_ut.cpp
+ yson_helpers.cpp
+)
+
+INCLUDE(${ARCADIA_ROOT}/yt/opensource.inc)
+
+PEERDIR(
+ yt/yt/build
+ yt/yt/core/test_framework
+ yt/yt/core
+ yt/yt/client
+ yt/yt/client/formats
+ yt/yt/library/formats
+ yt/yt/library/named_value
+
+ contrib/libs/apache/arrow
+)
+
+RESOURCE(
+ ${ARCADIA_ROOT}/library/cpp/type_info/ut/test-data/good-types.txt /types/good
+ ${ARCADIA_ROOT}/library/cpp/type_info/ut/test-data/bad-types.txt /types/bad
+)
+
+SIZE(MEDIUM)
+
+REQUIREMENTS(ram:12)
+
+END()
diff --git a/yt/yt/library/formats/unittests/yaml_parser_ut.cpp b/yt/yt/library/formats/unittests/yaml_parser_ut.cpp
new file mode 100644
index 0000000000..95b9898360
--- /dev/null
+++ b/yt/yt/library/formats/unittests/yaml_parser_ut.cpp
@@ -0,0 +1,598 @@
+#include <yt/yt/core/test_framework/framework.h>
+
+#include <yt/yt/client/formats/parser.h>
+
+#include <yt/yt/client/formats/config.h>
+
+#include <yt/yt/library/formats/yaml_parser.h>
+
+#include <yt/yt/core/yson/writer.h>
+
+namespace NYT::NFormats {
+namespace {
+
+using namespace NYson;
+
+////////////////////////////////////////////////////////////////////////////
+
+TString ParseYaml(const TString& yaml, EYsonType ysonType)
+{
+ TStringStream inputStream(yaml);
+ TStringStream outputStream;
+ TYsonWriter writer(&outputStream, EYsonFormat::Pretty, ysonType);
+ auto config = New<TYamlFormatConfig>();
+ ParseYaml(&inputStream, &writer, config, ysonType);
+ return outputStream.Str();
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+TEST(TYamlParserTest, Simple)
+{
+ TString yaml = R"(
+hello)";
+ // Here and in the rest of the tests we introduce an extra leading \n for the better readabilty, which we later
+ // strip off in the comparison.
+ TString expectedYson = R"(
+"hello")";
+ EXPECT_EQ(ParseYaml(yaml, EYsonType::Node), expectedYson.substr(1));
+}
+
+TEST(TYamlParserTest, Integers)
+{
+ TString yaml = R"(
+a: 1
+b: -1
+# Hex and oct
+c: 0xDeAdBeEf
+d: 0o42
+# Various non-normalized forms of numbers
+e: -000
+f: +0
+g: +42
+# Would be oct in YAML 1.1, but not in YAML 1.2!
+h: 0042
+i: -018
+# 2^63, should be unsigned
+j: 9223372036854775808
+# 2^64 - 1, should be unsigned
+k: 18446744073709551615
+l: -9223372036854775808
+m: !yt/uint64 1234
+n: !!int 23
+o: !!int -15)";
+ TString expectedYson = R"(
+{
+ "a" = 1;
+ "b" = -1;
+ "c" = 3735928559u;
+ "d" = 34u;
+ "e" = 0;
+ "f" = 0;
+ "g" = 42;
+ "h" = 42;
+ "i" = -18;
+ "j" = 9223372036854775808u;
+ "k" = 18446744073709551615u;
+ "l" = -9223372036854775808;
+ "m" = 1234u;
+ "n" = 23;
+ "o" = -15;
+})";
+ EXPECT_EQ(ParseYaml(yaml, EYsonType::Node), expectedYson.substr(1));
+
+ std::vector<TString> invalidYamls = {
+ "!!int -0x42",
+ "!!int -0o23",
+ "!!int deadbeef",
+ "!!int 18446744073709551616",
+ "!!int -9223372036854775809"
+ "!yt/uint64 -1",
+ "!yt/uint64 18446744073709551616",
+ "!!int 0x",
+ // Examples below were integers in YAML 1.1, but not in YAML 1.2.
+ "!!int 123_456",
+ "!!int 190:20:30",
+ "!!int 0b1001",
+ "!!int \"\"",
+ };
+ for (const auto& yaml : invalidYamls) {
+ EXPECT_THROW_MESSAGE_HAS_SUBSTR(ParseYaml(yaml, EYsonType::Node), std::exception, "is not an integer or does not fit")
+ << "For YAML: " << yaml << std::endl;
+ }
+}
+
+TEST(TYamlParserTest, Floats)
+{
+ TString yaml = R"(
+a: 1.
+b: .2
+c: +3.14
+d: -2.17
+e: .inf
+f: -.Inf
+g: +.INF
+h: .nan
+i: .NaN
+j: .NAN
+k: !!float 42
+l: 1e2
+m: 1e+2
+n: 1e-2
+)";
+ TString expectedYson = R"(
+{
+ "a" = 1.;
+ "b" = 0.2;
+ "c" = 3.14;
+ "d" = -2.17;
+ "e" = %inf;
+ "f" = %-inf;
+ "g" = %inf;
+ "h" = %nan;
+ "i" = %nan;
+ "j" = %nan;
+ "k" = 42.;
+ "l" = 100.;
+ "m" = 100.;
+ "n" = 0.01;
+})";
+ EXPECT_EQ(ParseYaml(yaml, EYsonType::Node), expectedYson.substr(1));
+
+ std::vector<TString> invalidYamls = {
+ "!!float 0o23",
+ "!!float 1e",
+ "!!float 1e+",
+ "!!float 1e-",
+ "!!float 1e-2.3",
+ "!!float 1e2.3",
+ // Examples below were integers in YAML 1.1, but not in YAML 1.2.
+ "!!float 123_456",
+ "!!float 190:20:30.15",
+ "!!float inf",
+ "!!float .InF",
+ "!!float -+42.0",
+ "!!float .",
+ // For some reason arcadian FloatToString parses this, but it feels excessive to ban that
+ // despite not satisfying the regexp from the spec.
+ // "!!float 0x42",
+ };
+ for (const auto& yaml : invalidYamls) {
+ EXPECT_THROW_MESSAGE_HAS_SUBSTR(ParseYaml(yaml, EYsonType::Node), std::exception, "is not a floating point")
+ << "For YAML: " << yaml << std::endl;
+ }
+}
+
+TEST(TYamlParserTest, Booleans)
+{
+ TString yaml = R"(
+a: true
+b: false
+c: True
+d: False
+e: TRUE
+f: FALSE
+g: !!bool true
+)";
+ TString expectedYson = R"(
+{
+ "a" = %true;
+ "b" = %false;
+ "c" = %true;
+ "d" = %false;
+ "e" = %true;
+ "f" = %false;
+ "g" = %true;
+})";
+ EXPECT_EQ(ParseYaml(yaml, EYsonType::Node), expectedYson.substr(1));
+
+ std::vector<TString> invalidYamls = {
+ "!!bool 1",
+ "!!bool 0",
+ // Examples below were booleans in YAML 1.1, but not in YAML 1.2.
+ "!!bool yes",
+ "!!bool no",
+ "!!bool on",
+ "!!bool off",
+ "!!bool y",
+ "!!bool n",
+ "!!bool \"\"",
+ };
+ for (const auto& yaml : invalidYamls) {
+ EXPECT_THROW_MESSAGE_HAS_SUBSTR(ParseYaml(yaml, EYsonType::Node), std::exception, "is not a boolean")
+ << "For YAML: " << yaml << std::endl;
+ }
+}
+
+TEST(TYamlParserTest, Nulls)
+{
+ TString yaml = R"(
+a: null
+b: Null
+c: NULL
+d: ~
+e:
+f: !!null null
+# This is not allowed by a regexp in a spec, but feels excessive to ban.
+g: !!null foo
+)";
+ TString expectedYson = R"(
+{
+ "a" = #;
+ "b" = #;
+ "c" = #;
+ "d" = #;
+ "e" = #;
+ "f" = #;
+ "g" = #;
+})";
+ EXPECT_EQ(ParseYaml(yaml, EYsonType::Node), expectedYson.substr(1));
+}
+
+TEST(TYamlParserTest, Strings)
+{
+ TString yaml = R"(
+a: "hello"
+b: 'world'
+c: of
+d: !!str warcraft
+e: !!str 42
+f: !!str ~
+g: ! hello
+)";
+ TString expectedYson = R"(
+{
+ "a" = "hello";
+ "b" = "world";
+ "c" = "of";
+ "d" = "warcraft";
+ "e" = "42";
+ "f" = "~";
+ "g" = "hello";
+})";
+ EXPECT_EQ(ParseYaml(yaml, EYsonType::Node), expectedYson.substr(1));
+}
+
+TEST(TYamlParserTest, Mappings)
+{
+ TString yaml = R"(
+a:
+ x: 1
+ y:
+ foo: bar
+ bar: foo
+42:
+ z: 3
+c: {}
+)";
+ TString expectedYson = R"(
+{
+ "a" = {
+ "x" = 1;
+ "y" = {
+ "foo" = "bar";
+ "bar" = "foo";
+ };
+ };
+ "42" = {
+ "z" = 3;
+ };
+ "c" = {};
+})";
+ EXPECT_EQ(ParseYaml(yaml, EYsonType::Node), expectedYson.substr(1));
+}
+
+TEST(TYamlParserTest, Sequences)
+{
+ TString yaml = R"(
+- foo
+- - 1
+ - 2
+ - 3
+- bar
+- []
+- - - - null
+)";
+ TString expectedYson = R"(
+[
+ "foo";
+ [
+ 1;
+ 2;
+ 3;
+ ];
+ "bar";
+ [];
+ [
+ [
+ [
+ #;
+ ];
+ ];
+ ];
+])";
+ EXPECT_EQ(ParseYaml(yaml, EYsonType::Node), expectedYson.substr(1));
+}
+
+TEST(TYamlParserTest, Attributes)
+{
+ TString yaml = R"(
+!yt/attrnode
+- x: 1
+ y: 2
+- a: !yt/attrnode
+ - {}
+ - 42
+ b: !yt/attrnode
+ - x: null
+ - - 1
+ - 2
+ - 3
+ c: !yt/attrnode
+ - foo: 1
+ - null
+)";
+ // <x=1;y=2>{a=<>42; b=<x=#>[1;2;3]; c=<foo=1>#;}
+ TString expectedYson = R"(
+<
+ "x" = 1;
+ "y" = 2;
+> {
+ "a" = <> 42;
+ "b" = <
+ "x" = #;
+ > [
+ 1;
+ 2;
+ 3;
+ ];
+ "c" = <
+ "foo" = 1;
+ > #;
+})";
+ EXPECT_EQ(ParseYaml(yaml, EYsonType::Node), expectedYson.substr(1));
+
+ std::vector<std::pair<TString, TString>> invalidYamlsAndErrors = {
+ {R"(
+!yt/attrnode
+- x: 1
+)", "Unexpected event type \"sequence_end\""},
+ {R"(
+!yt/attrnode
+- foo
+- bar
+)", "Unexpected event type \"scalar\""},
+ {R"(
+!yt/attrnode
+- x: 1
+- y: 2
+- z: 3
+)", "Unexpected event type \"mapping_start\""},
+};
+ for (const auto& [yaml, error] : invalidYamlsAndErrors) {
+ EXPECT_THROW_MESSAGE_HAS_SUBSTR(ParseYaml(yaml, EYsonType::Node), std::exception, error)
+ << "For YAML: " << yaml << std::endl;
+ }
+};
+
+TEST(TYamlParserTest, MultiDocument)
+{
+ TString yaml = R"(
+a: 1
+---
+foo
+---
+~
+---
+)";
+ TString expectedYson = R"(
+{
+ "a" = 1;
+};
+"foo";
+#;
+#;
+)";
+ EXPECT_EQ(ParseYaml(yaml, EYsonType::ListFragment), expectedYson.substr(1));
+}
+
+TEST(TYamlParserTest, Anchors)
+{
+ TString yaml = R"(
+a: &foo 1
+b: *foo
+c: &bar
+ x: &baz
+ - False
+ - &qux True
+ y: 2
+ z: *baz
+ t: *foo
+ w: *qux
+d: *bar
+e: *baz
+f: *foo
+g: *qux
+)";
+ TString expectedYson = R"(
+{
+ "a" = 1;
+ "b" = 1;
+ "c" = {
+ "x" = [
+ %false;
+ %true;
+ ];
+ "y" = 2;
+ "z" = [
+ %false;
+ %true;
+ ];
+ "t" = 1;
+ "w" = %true;
+ };
+ "d" = {
+ "x" = [
+ %false;
+ %true;
+ ];
+ "y" = 2;
+ "z" = [
+ %false;
+ %true;
+ ];
+ "t" = 1;
+ "w" = %true;
+ };
+ "e" = [
+ %false;
+ %true;
+ ];
+ "f" = 1;
+ "g" = %true;
+})";
+ EXPECT_EQ(ParseYaml(yaml, EYsonType::Node), expectedYson.substr(1));
+
+ std::vector<std::pair<TString, TString>> invalidYamlsAndErrors = {
+ {R"(
+a: *foo
+)", "undefined or unfinished anchor"},
+ {R"(
+- &foo a
+- &foo b
+)", "already defined"},
+ {R"(
+a: &foo
+- b: &foo
+ - c
+)", "already defined"},
+ {R"(
+a: &foo
+ bar: *foo
+)", "undefined or unfinished anchor"},
+ {R"(
+a: &foo bar
+*foo: baz
+)", "alias as a map key is not supported"},
+ {R"(
+&foo a: b
+)", "anchors on map keys is not supported"},
+ };
+ for (const auto& [yaml, error] : invalidYamlsAndErrors) {
+ EXPECT_THROW_MESSAGE_HAS_SUBSTR(ParseYaml(yaml, EYsonType::Node), std::exception, error)
+ << "For YAML: " << yaml << std::endl;
+ }
+}
+
+TEST(TYamlParserTest, Empty)
+{
+ TString yaml = "";
+ TString expectedYson = "";
+ EXPECT_EQ(ParseYaml(yaml, EYsonType::ListFragment), expectedYson);
+}
+
+//! There is a reverse test in yaml_writer_ut.cpp.
+TEST(TYamlParserTest, RealExample)
+{
+ TString yaml = R"(
+mount_config: {}
+schema: !yt/attrnode
+- strict: true
+ unique_keys: false
+- - name: lat
+ required: false
+ type: double
+ type_v3:
+ type_name: optional
+ item: double
+ - name: lon
+ required: false
+ type: double
+ type_v3:
+ type_name: optional
+ item: double
+native_cell_tag: !yt/uint64 9991
+creation_time: 2024-08-15T11:17:59.314773Z
+inherit_acl: true
+revision: !yt/uint64 8233452423020
+resource_usage:
+ node_count: 1
+ chunk_count: 1
+ disk_space_per_medium:
+ default: 562182
+ disk_space: 562182
+ chunk_host_cell_master_memory: 0
+ master_memory: 0
+ tablet_count: 0
+ tablet_static_memory: 0
+acl: []
+id: 77d-1c53a-27070191-e4d8f5ac
+parent_id: 77d-1c0d3-2707012f-ddf40dd7
+foreign: false
+type: table
+sequoia: false
+ref_counter: 1
+builtin: false
+owner: max
+compression_ratio: 0.3679379456925491
+)";
+ TString expectedYson = R"(
+{
+ "mount_config" = {};
+ "schema" = <
+ "strict" = %true;
+ "unique_keys" = %false;
+ > [
+ {
+ "name" = "lat";
+ "required" = %false;
+ "type" = "double";
+ "type_v3" = {
+ "type_name" = "optional";
+ "item" = "double";
+ };
+ };
+ {
+ "name" = "lon";
+ "required" = %false;
+ "type" = "double";
+ "type_v3" = {
+ "type_name" = "optional";
+ "item" = "double";
+ };
+ };
+ ];
+ "native_cell_tag" = 9991u;
+ "creation_time" = "2024-08-15T11:17:59.314773Z";
+ "inherit_acl" = %true;
+ "revision" = 8233452423020u;
+ "resource_usage" = {
+ "node_count" = 1;
+ "chunk_count" = 1;
+ "disk_space_per_medium" = {
+ "default" = 562182;
+ };
+ "disk_space" = 562182;
+ "chunk_host_cell_master_memory" = 0;
+ "master_memory" = 0;
+ "tablet_count" = 0;
+ "tablet_static_memory" = 0;
+ };
+ "acl" = [];
+ "id" = "77d-1c53a-27070191-e4d8f5ac";
+ "parent_id" = "77d-1c0d3-2707012f-ddf40dd7";
+ "foreign" = %false;
+ "type" = "table";
+ "sequoia" = %false;
+ "ref_counter" = 1;
+ "builtin" = %false;
+ "owner" = "max";
+ "compression_ratio" = 0.3679379456925491;
+})";
+ EXPECT_EQ(ParseYaml(yaml, EYsonType::Node), expectedYson.substr(1));
+}
+
+////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT::NFormats
diff --git a/yt/yt/library/formats/unittests/yaml_writer_ut.cpp b/yt/yt/library/formats/unittests/yaml_writer_ut.cpp
new file mode 100644
index 0000000000..96fd4a4003
--- /dev/null
+++ b/yt/yt/library/formats/unittests/yaml_writer_ut.cpp
@@ -0,0 +1,319 @@
+#include <yt/yt/core/test_framework/framework.h>
+
+#include <yt/yt/library/formats/yaml_writer.h>
+
+#include <yt/yt/client/formats/config.h>
+
+#include <yt/yt/core/yson/string.h>
+
+#include <yt/yt/core/ytree/convert.h>
+
+namespace NYT::NFormats {
+namespace {
+
+using namespace NYson;
+using namespace NYTree;
+
+//////////////////////////////////////////////////////////////////////////////
+
+TString YsonToYaml(const TYsonString& yson, const TYsonString& formatAttributes = TYsonString(TStringBuf("{}")))
+{
+ TStringStream outputStream;
+ auto config = ConvertTo<TYamlFormatConfigPtr>(formatAttributes);
+ auto writer = CreateYamlWriter(&outputStream, yson.GetType(), config);
+ Serialize(yson, writer.get());
+ writer->Flush();
+ return outputStream.Str();
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+TEST(TYamlWriterTest, Simple)
+{
+ TString yson = "hello";
+ // Here and in the rest of the tests we introduce an extra leading \n for the better readabilty, which we later
+ // strip off in the comparison.
+ TString expectedYaml = R"(
+hello
+)";
+
+ EXPECT_EQ(YsonToYaml(TYsonString(yson)), expectedYaml.substr(1));
+}
+
+TEST(TYamlWriterTest, IntegersWithoutUintTag)
+{
+ TString yson = "{a=1; b=1u; c=-1; d=9223372036854775808u; e=-9223372036854775808; f=18446744073709551615u}";
+ TString expectedYaml = R"(
+a: 1
+b: 1
+c: -1
+d: 9223372036854775808
+e: -9223372036854775808
+f: 18446744073709551615
+)";
+ EXPECT_EQ(YsonToYaml(TYsonString(yson)), expectedYaml.substr(1));
+}
+
+TEST(TYamlWriterTest, IntegersWithUintTag)
+{
+ TString formatAttributes = "{write_uint_tag=%true}";
+ TString yson = "{a=1; b=1u; c=-1; d=9223372036854775808u; e=-9223372036854775808; f=18446744073709551615u}";
+ TString expectedYaml = R"(
+a: 1
+b: !yt/uint64 1
+c: -1
+d: !yt/uint64 9223372036854775808
+e: -9223372036854775808
+f: !yt/uint64 18446744073709551615
+)";
+ EXPECT_EQ(YsonToYaml(TYsonString(yson), TYsonString(formatAttributes)), expectedYaml.substr(1));
+}
+
+TEST(TYamlWriterTest, Doubles)
+{
+ TString yson = "{a=2.7; b=-3.14; c=0.0; d=4.; e=1e30; f=%nan; g=%inf; h=%-inf}";
+ TString expectedYaml = R"(
+a: 2.7
+b: -3.14
+c: 0.
+d: 4.
+e: 1e+30
+f: .nan
+g: .inf
+h: -.inf
+)";
+ EXPECT_EQ(YsonToYaml(TYsonString(yson)), expectedYaml.substr(1));
+}
+
+TEST(TYamlWriterTest, Entity)
+{
+ TString yson = "{a=#}";
+ TString expectedYaml = R"(
+a: null
+)";
+ EXPECT_EQ(YsonToYaml(TYsonString(yson)), expectedYaml.substr(1));
+}
+
+TEST(TYamlWriterTest, Booleans)
+{
+ TString yson = "{a=%true; b=%false}";
+ TString expectedYaml = R"(
+a: true
+b: false
+)";
+ EXPECT_EQ(YsonToYaml(TYsonString(yson)), expectedYaml.substr(1));
+}
+
+TEST(TYamlWriterTest, Strings)
+{
+ // a and b may be represented as plain scalars.
+ // c-e must be quoted on syntactical level, so libyaml chooses a single-quoted style.
+ // f-i must be quoted because they meet regexps for non-string types, so we force a double-quoted style.
+ TString yson = R"({a=hello; b="23asd"; c=" "; d="foo\nbar"; e=""; f="42"; g="TRUE"; h="1e4000"; i="~";})";
+ TString expectedYaml = R"(
+a: hello
+b: 23asd
+c: ' '
+d: 'foo
+
+ bar'
+e: ""
+f: "42"
+g: "TRUE"
+h: "1e4000"
+i: "~"
+)";
+ EXPECT_EQ(YsonToYaml(TYsonString(yson)), expectedYaml.substr(1));
+}
+
+TEST(TYamlWriterTest, Mappings)
+{
+ TString yson("{a={x=1;y={foo=bar;bar=foo}};b={z=3};c={};}");
+ TString expectedYaml = R"(
+a:
+ x: 1
+ y:
+ foo: bar
+ bar: foo
+b:
+ z: 3
+c: {}
+)";
+ EXPECT_EQ(YsonToYaml(TYsonString(yson)), expectedYaml.substr(1));
+}
+
+TEST(TYamlWriterTest, Sequences)
+{
+ TString yson = "[foo; [1; 2; 3]; bar; []; [[[#]]]]";
+ TString expectedYaml = R"(
+- foo
+- - 1
+ - 2
+ - 3
+- bar
+- []
+- - - - null
+)";
+ EXPECT_EQ(YsonToYaml(TYsonString(yson)), expectedYaml.substr(1));
+}
+
+TEST(TYamlWriterTest, MultiDocument)
+{
+ TString yson = "foo;{a=1;b=2};[x;y];{};#;bar;[]";
+ TString expectedYaml = R"(
+foo
+---
+a: 1
+b: 2
+---
+- x
+- y
+--- {}
+--- null
+--- bar
+--- []
+)";
+ EXPECT_EQ(YsonToYaml(TYsonString(yson, EYsonType::ListFragment)), expectedYaml.substr(1));
+}
+
+TEST(TYamlWriterTest, Attributes)
+{
+ TString yson = "<x=1;y=2>{a=<>42; b=<x=#>[1;2;3]; c=<foo=1>#;}";
+ TString expectedYaml = R"(
+!yt/attrnode
+- x: 1
+ y: 2
+- a: !yt/attrnode
+ - {}
+ - 42
+ b: !yt/attrnode
+ - x: null
+ - - 1
+ - 2
+ - 3
+ c: !yt/attrnode
+ - foo: 1
+ - null
+)";
+ EXPECT_EQ(YsonToYaml(TYsonString(yson)), expectedYaml.substr(1));
+};
+
+//////////////////////////////////////////////////////////////////////////////
+
+TEST(TYamlWriterTest, EmptyStream)
+{
+ TString yson = "";
+ TString expectedYaml = "";
+ EXPECT_EQ(YsonToYaml(TYsonString(yson, EYsonType::ListFragment)), expectedYaml);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+//! There is a reverse test in yaml_reader_ut.cpp.
+TEST(TYamlWriterTest, RealExample)
+{
+ TString formatAttributes = "{write_uint_tag=%true}";
+ TString yson = R"(
+{
+ "mount_config" = {};
+ "schema" = <
+ "strict" = %true;
+ "unique_keys" = %false;
+ > [
+ {
+ "name" = "lat";
+ "required" = %false;
+ "type" = "double";
+ "type_v3" = {
+ "type_name" = "optional";
+ "item" = "double";
+ };
+ };
+ {
+ "name" = "lon";
+ "required" = %false;
+ "type" = "double";
+ "type_v3" = {
+ "type_name" = "optional";
+ "item" = "double";
+ };
+ };
+ ];
+ "native_cell_tag" = 9991u;
+ "creation_time" = "2024-08-15T11:17:59.314773Z";
+ "inherit_acl" = %true;
+ "revision" = 8233452423020u;
+ "resource_usage" = {
+ "node_count" = 1;
+ "chunk_count" = 1;
+ "disk_space_per_medium" = {
+ "default" = 562182;
+ };
+ "disk_space" = 562182;
+ "chunk_host_cell_master_memory" = 0;
+ "master_memory" = 0;
+ "tablet_count" = 0;
+ "tablet_static_memory" = 0;
+ };
+ "acl" = [];
+ "id" = "77d-1c53a-27070191-e4d8f5ac";
+ "parent_id" = "77d-1c0d3-2707012f-ddf40dd7";
+ "foreign" = %false;
+ "type" = "table";
+ "sequoia" = %false;
+ "ref_counter" = 1;
+ "builtin" = %false;
+ "owner" = "max";
+ "compression_ratio" = 0.3679379456925491;
+}
+ )";
+
+ TString expectedYaml = R"(
+mount_config: {}
+schema: !yt/attrnode
+- strict: true
+ unique_keys: false
+- - name: lat
+ required: false
+ type: double
+ type_v3:
+ type_name: optional
+ item: double
+ - name: lon
+ required: false
+ type: double
+ type_v3:
+ type_name: optional
+ item: double
+native_cell_tag: !yt/uint64 9991
+creation_time: 2024-08-15T11:17:59.314773Z
+inherit_acl: true
+revision: !yt/uint64 8233452423020
+resource_usage:
+ node_count: 1
+ chunk_count: 1
+ disk_space_per_medium:
+ default: 562182
+ disk_space: 562182
+ chunk_host_cell_master_memory: 0
+ master_memory: 0
+ tablet_count: 0
+ tablet_static_memory: 0
+acl: []
+id: 77d-1c53a-27070191-e4d8f5ac
+parent_id: 77d-1c0d3-2707012f-ddf40dd7
+foreign: false
+type: table
+sequoia: false
+ref_counter: 1
+builtin: false
+owner: max
+compression_ratio: 0.3679379456925491
+)";
+ EXPECT_EQ(YsonToYaml(TYsonString(yson), TYsonString(formatAttributes)), expectedYaml.substr(1));
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT::NFormats
diff --git a/yt/yt/library/formats/unittests/yamr_parser_ut.cpp b/yt/yt/library/formats/unittests/yamr_parser_ut.cpp
new file mode 100644
index 0000000000..84c9a28457
--- /dev/null
+++ b/yt/yt/library/formats/unittests/yamr_parser_ut.cpp
@@ -0,0 +1,601 @@
+#include <yt/yt/core/test_framework/framework.h>
+
+#include <yt/yt/core/test_framework/yson_consumer_mock.h>
+
+#include <yt/yt/library/formats/yamr_parser.h>
+
+#include <yt/yt/core/yson/null_consumer.h>
+
+namespace NYT::NFormats {
+namespace {
+
+using namespace NYson;
+
+using ::testing::InSequence;
+using ::testing::StrictMock;
+using ::testing::NiceMock;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TYamrParserTest, Simple)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("key1"));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar("value1"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginAttributes());
+ EXPECT_CALL(Mock, OnKeyedItem("table_index"));
+ EXPECT_CALL(Mock, OnInt64Scalar(2));
+ EXPECT_CALL(Mock, OnEndAttributes());
+ EXPECT_CALL(Mock, OnEntity());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("key2"));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar("value2"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input =
+ "key1\tvalue1\n"
+ "2\n"
+ "key2\tvalue2\n";
+
+ ParseYamr(input, &Mock);
+}
+
+TEST(TYamrParserTest, ValueWithTabs)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar(TStringBuf("key1\0", 5)));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar("value with \t and some other"));
+ EXPECT_CALL(Mock, OnEndMap());
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("key2"));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar(TStringBuf("another\0 value with \t", 21)));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input(
+ "key1\0\tvalue with \t and some other\n"
+ "key2\tanother\0 value with \t\n",
+ 34 +
+ 27);
+
+ ParseYamr(input, &Mock);
+}
+
+TEST(TYamrParserTest, SimpleWithSubkey)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("key1"));
+ EXPECT_CALL(Mock, OnKeyedItem("subkey"));
+ EXPECT_CALL(Mock, OnStringScalar("subkey1"));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar("value1"));
+ EXPECT_CALL(Mock, OnEndMap());
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("key2"));
+ EXPECT_CALL(Mock, OnKeyedItem("subkey"));
+ EXPECT_CALL(Mock, OnStringScalar("subkey2"));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar("value2"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input =
+ "key1\tsubkey1\tvalue1\n"
+ "key2\tsubkey2\tvalue2\n";
+
+ auto config = New<TYamrFormatConfig>();
+ config->HasSubkey = true;
+
+ ParseYamr(input, &Mock, config);
+}
+
+TEST(TYamrParserTest, IncompleteRows)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("key1"));
+ EXPECT_CALL(Mock, OnKeyedItem("subkey"));
+ EXPECT_CALL(Mock, OnStringScalar("subkey1"));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar("value1"));
+ EXPECT_CALL(Mock, OnEndMap());
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("key"));
+ EXPECT_CALL(Mock, OnKeyedItem("subkey"));
+ EXPECT_CALL(Mock, OnStringScalar("subkey"));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar(""));
+ EXPECT_CALL(Mock, OnEndMap());
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("key2"));
+ EXPECT_CALL(Mock, OnKeyedItem("subkey"));
+ EXPECT_CALL(Mock, OnStringScalar("subkey2"));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar("value2"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input =
+ "key1\tsubkey1\tvalue1\n"
+ "key\tsubkey\n"
+ "key2\tsubkey2\tvalue2\n";
+
+ auto config = New<TYamrFormatConfig>();
+ config->HasSubkey = true;
+
+ ParseYamr(input, &Mock, config);
+}
+
+TEST(TYamrParserTest, IncorrectIncompleteRows)
+{
+ auto config = New<TYamrFormatConfig>();
+ config->HasSubkey = false;
+
+ EXPECT_THROW(ParseYamr("\n", GetNullYsonConsumer(), config), std::exception);
+ EXPECT_THROW(ParseYamr("key\n", GetNullYsonConsumer(), config), std::exception);
+ EXPECT_THROW(ParseYamr("key\tvalue\nkey\n", GetNullYsonConsumer(), config), std::exception);
+}
+
+TEST(TYamrParserTest, TabsInValue)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("key"));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar("a\tb\\tc\t"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ auto config = New<TYamrFormatConfig>();
+ TString input = "key\ta\tb\\tc\t";
+ ParseYamr(input, &Mock, config);
+}
+
+TEST(TYamrParserTest, Escaping)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("\tkey\t"));
+ EXPECT_CALL(Mock, OnKeyedItem("subkey"));
+ EXPECT_CALL(Mock, OnStringScalar("\n"));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar("a\tb\t\n"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ auto config = New<TYamrFormatConfig>();
+ config->HasSubkey = true;
+ config->EnableEscaping = true;
+
+ TString input = "\\tkey\\t\t\\n\ta\tb\t\\n\n";
+ ParseYamr(input, &Mock, config);
+}
+
+TEST(TYamrParserTest, CustomSeparators)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("key"));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar("value"));
+ EXPECT_CALL(Mock, OnEndMap());
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("key2"));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar("value2"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ auto config = New<TYamrFormatConfig>();
+ config->RecordSeparator = 'Y';
+ config->FieldSeparator = 'X';
+
+ TString input = "keyXvalueYkey2Xvalue2Y";
+ ParseYamr(input, &Mock, config);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TYamrLenvalParserTest, Simple)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("key1"));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar("value1"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginAttributes());
+ EXPECT_CALL(Mock, OnKeyedItem("table_index"));
+ EXPECT_CALL(Mock, OnInt64Scalar(1));
+ EXPECT_CALL(Mock, OnEndAttributes());
+ EXPECT_CALL(Mock, OnEntity());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("key2"));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar("value2"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input = TString(
+ "\x04\x00\x00\x00" "key1"
+ "\x06\x00\x00\x00" "value1"
+
+ "\xff\xff\xff\xff" "\x01\x00\x00\x00"
+
+ "\x04\x00\x00\x00" "key2"
+ "\x06\x00\x00\x00" "value2",
+
+ 2 * (2 * 4 + 4 + 6) + 8); // all i32 + lengths of keys
+
+ auto config = New<TYamrFormatConfig>();
+ config->Lenval = true;
+
+ ParseYamr(input, &Mock, config);
+}
+
+TEST(TYamrLenvalParserTest, SimpleWithSubkey)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("key1"));
+ EXPECT_CALL(Mock, OnKeyedItem("subkey"));
+ EXPECT_CALL(Mock, OnStringScalar("subkey1"));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar("value1"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("key2"));
+ EXPECT_CALL(Mock, OnKeyedItem("subkey"));
+ EXPECT_CALL(Mock, OnStringScalar("subkey2"));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar("value2"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input = TString(
+ "\x04\x00\x00\x00" "key1"
+ "\x07\x00\x00\x00" "subkey1"
+ "\x06\x00\x00\x00" "value1"
+
+ "\x04\x00\x00\x00" "key2"
+ "\x07\x00\x00\x00" "subkey2"
+ "\x06\x00\x00\x00" "value2",
+
+ 2 * (3 * 4 + 4 + 7 + 6)); // all i32 + lengths of keys
+
+ auto config = New<TYamrFormatConfig>();
+ config->HasSubkey = true;
+ config->Lenval = true;
+
+ ParseYamr(input, &Mock, config);
+}
+
+TEST(TYamrLenvalParserTest, EmptyFields)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar(""));
+ EXPECT_CALL(Mock, OnKeyedItem("subkey"));
+ EXPECT_CALL(Mock, OnStringScalar(""));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar(""));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input = TString(
+ "\x00\x00\x00\x00"
+ "\x00\x00\x00\x00"
+ "\x00\x00\x00\x00",
+ 3 * 4);
+
+ auto config = New<TYamrFormatConfig>();
+ config->HasSubkey = true;
+ config->Lenval = true;
+
+ ParseYamr(input, &Mock, config);
+}
+
+TEST(TYamrLenvalParserTest, HugeLength)
+{
+ TString input = TString(
+ "\xFF\xFF\xFF\xFF"
+ "\x00\x00\x00\x00"
+ "\x00\x00\x00\x00",
+ 3 * 4);
+
+ auto config = New<TYamrFormatConfig>();
+ config->HasSubkey = true;
+ config->Lenval = true;
+
+ EXPECT_THROW(ParseYamr(input, GetNullYsonConsumer(), config), std::exception);
+}
+
+TEST(TYamrLenvalParserTest, SimpleEndOfMessage)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("key1"));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar("value1"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginAttributes());
+ EXPECT_CALL(Mock, OnKeyedItem("table_index"));
+ EXPECT_CALL(Mock, OnInt64Scalar(1));
+ EXPECT_CALL(Mock, OnEndAttributes());
+ EXPECT_CALL(Mock, OnEntity());
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("key2"));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar("value2"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input = TString(
+ "\x04\x00\x00\x00" "key1"
+ "\x06\x00\x00\x00" "value1"
+
+ "\xff\xff\xff\xff" "\x01\x00\x00\x00"
+
+ "\x04\x00\x00\x00" "key2"
+ "\x06\x00\x00\x00" "value2"
+
+ "\xfb\xff\xff\xff" "\x02\x00\x00\x00\x00\x00\x00\x00",
+
+ 2 * (2 * 4 + 4 + 6) + 8 + 12); // all i32 + lengths of keys
+
+ auto config = New<TYamrFormatConfig>();
+ config->Lenval = true;
+ config->EnableEom = true;
+
+ ParseYamr(input, &Mock, config);
+}
+
+TEST(TYamrLenvalParserTest, EmptyFieldsWithEOM)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar(""));
+ EXPECT_CALL(Mock, OnKeyedItem("subkey"));
+ EXPECT_CALL(Mock, OnStringScalar(""));
+ EXPECT_CALL(Mock, OnKeyedItem("value"));
+ EXPECT_CALL(Mock, OnStringScalar(""));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input = TString(
+ "\x00\x00\x00\x00"
+ "\x00\x00\x00\x00"
+ "\x00\x00\x00\x00"
+ "\xfb\xff\xff\xff" "\x01\x00\x00\x00\x00\x00\x00\x00",
+ 3 * 4 + 12);
+
+ auto config = New<TYamrFormatConfig>();
+ config->HasSubkey = true;
+ config->Lenval = true;
+ config->EnableEom = true;
+
+ ParseYamr(input, &Mock, config);
+}
+
+TEST(TYamrParserTest, IncorrectPlaceOfEOM)
+{
+ auto config = New<TYamrFormatConfig>();
+ config->HasSubkey = false;
+ config->Lenval = true;
+ config->EnableEom = true;
+
+ TString input1 = TString(
+ "\x04\x00\x00\x00" "key1"
+ "\x06\x00\x00\x00" "value1"
+
+ "\xff\xff\xff\xff" "\x01\x00\x00\x00"
+
+ "\xfb\xff\xff\xff" "\x02\x00\x00\x00\x00\x00\x00\x00"
+
+ "\x04\x00\x00\x00" "key2"
+ "\x06\x00\x00\x00" "value2",
+
+ 2 * (2 * 4 + 4 + 6) + 8 + 12); // all i32 + lengths of keys
+
+ TString input2 = TString(
+ "\x04\x00\x00\x00" "key1"
+ "\x06\x00\x00\x00" "value1"
+
+ "\xff\xff\xff\xff" "\x01\x00\x00\x00"
+
+ "\x04\x00\x00\x00" "key2"
+
+ "\xfb\xff\xff\xff" "\x02\x00\x00\x00\x00\x00\x00\x00"
+
+ "\x06\x00\x00\x00" "value2",
+
+ 2 * (2 * 4 + 4 + 6) + 8 + 12); // all i32 + lengths of keys
+
+ EXPECT_THROW(ParseYamr(input1, GetNullYsonConsumer(), config), std::exception);
+ EXPECT_THROW(ParseYamr(input2, GetNullYsonConsumer(), config), std::exception);
+}
+
+TEST(TYamrParserTest, IncorrectEOM)
+{
+ auto config = New<TYamrFormatConfig>();
+ config->HasSubkey = false;
+ config->Lenval = true;
+ config->EnableEom = true;
+
+ // Garbage after EOM marker
+ TString input1 = TString(
+ "\x04\x00\x00\x00" "key1"
+ "\x06\x00\x00\x00" "value1"
+
+ "\xff\xff\xff\xff" "\x01\x00\x00\x00"
+
+ "\xfb\xff\xff\xff" "\x01\x00\x00\x00\x00\x00\x00\x00"
+
+ "\x04\x00\x00\x00" "key2"
+ "\x06\x00\x00\x00" "value2",
+
+ 2 * (2 * 4 + 4 + 6) + 8 + 12); // all i32 + lengths of keys
+
+ // Row count mismatch
+ TString input2 = TString(
+ "\x04\x00\x00\x00" "key1"
+ "\x06\x00\x00\x00" "value1"
+
+ "\xff\xff\xff\xff" "\x01\x00\x00\x00"
+
+ "\x04\x00\x00\x00" "key2"
+ "\x06\x00\x00\x00" "value2"
+
+ "\xfb\xff\xff\xff" "\x03\x00\x00\x00\x00\x00\x00\x00",
+
+ 2 * (2 * 4 + 4 + 6) + 8 + 12); // all i32 + lengths of keys
+
+ // Missing EOM marker
+ TString input3 = TString(
+ "\x04\x00\x00\x00" "key1"
+ "\x06\x00\x00\x00" "value1"
+
+ "\xff\xff\xff\xff" "\x01\x00\x00\x00"
+
+ "\x04\x00\x00\x00" "key2"
+ "\x06\x00\x00\x00" "value2",
+
+ 2 * (2 * 4 + 4 + 6) + 8); // all i32 + lengths of keys
+
+ // Missing EOM marker with empty fields
+ TString input4 = TString(
+ "\x00\x00\x00\x00"
+ "\x00\x00\x00\x00"
+ "\x00\x00\x00\x00",
+ 3 * 4);
+
+ EXPECT_THROW(ParseYamr(input1, GetNullYsonConsumer(), config), std::exception);
+ EXPECT_THROW(ParseYamr(input2, GetNullYsonConsumer(), config), std::exception);
+ EXPECT_THROW(ParseYamr(input3, GetNullYsonConsumer(), config), std::exception);
+ EXPECT_THROW(ParseYamr(input4, GetNullYsonConsumer(), config), std::exception);
+}
+
+TEST(TYamrParserTest, UnsupportedEOMInTextMode)
+{
+ auto config = New<TYamrFormatConfig>();
+ config->HasSubkey = false;
+ config->Lenval = false;
+ config->EnableEom = true;
+
+ TString input = TString(
+ "\x04\x00\x00\x00" "key1"
+ "\x06\x00\x00\x00" "value1"
+
+ "\xff\xff\xff\xff" "\x01\x00\x00\x00"
+
+
+ "\x04\x00\x00\x00" "key2"
+ "\x06\x00\x00\x00" "value2"
+
+ "\xfb\xff\xff\xff" "\x02\x00\x00\x00\x00\x00\x00\x00",
+
+ 2 * (2 * 4 + 4 + 6) + 8 + 12); // all i32 + lengths of keys
+
+ EXPECT_THROW(ParseYamr(input, GetNullYsonConsumer(), config), std::exception);
+}
+
+TEST(TYamrParserTest, UnexpectedEOM)
+{
+ auto config = New<TYamrFormatConfig>();
+ config->HasSubkey = false;
+ config->Lenval = true;
+ config->EnableEom = false;
+
+ TString input = TString(
+ "\x04\x00\x00\x00" "key1"
+ "\x06\x00\x00\x00" "value1"
+
+ "\xff\xff\xff\xff" "\x01\x00\x00\x00"
+
+ "\x04\x00\x00\x00" "key2"
+ "\x06\x00\x00\x00" "value2"
+
+ "\xfb\xff\xff\xff" "\x02\x00\x00\x00\x00\x00\x00\x00",
+
+ 2 * (2 * 4 + 4 + 6) + 8 + 12); // all i32 + lengths of keys
+
+ EXPECT_THROW(ParseYamr(input, GetNullYsonConsumer(), config), std::exception);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT::NFormats
diff --git a/yt/yt/library/formats/unittests/yamr_writer_ut.cpp b/yt/yt/library/formats/unittests/yamr_writer_ut.cpp
new file mode 100644
index 0000000000..2cad4bcbc9
--- /dev/null
+++ b/yt/yt/library/formats/unittests/yamr_writer_ut.cpp
@@ -0,0 +1,645 @@
+#include <yt/yt/core/test_framework/framework.h>
+
+#include <yt/yt/client/table_client/unversioned_row.h>
+#include <yt/yt/client/table_client/name_table.h>
+
+#include <yt/yt/library/formats/yamr_writer.h>
+
+#include <yt/yt/core/concurrency/async_stream.h>
+
+namespace NYT::NFormats {
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+
+using namespace NYTree;
+using namespace NYson;
+using namespace NConcurrency;
+using namespace NTableClient;
+
+class TSchemalessWriterForYamrTest
+ : public ::testing::Test
+{
+protected:
+ TNameTablePtr NameTable_;
+ int KeyId_;
+ int SubkeyId_;
+ int ValueId_;
+ int TableIndexId_;
+ int RangeIndexId_;
+ int RowIndexId_;
+
+ TYamrFormatConfigPtr Config_;
+
+ IUnversionedRowsetWriterPtr Writer_;
+
+ TStringStream OutputStream_;
+
+ TSchemalessWriterForYamrTest()
+ {
+ NameTable_ = New<TNameTable>();
+ KeyId_ = NameTable_->RegisterName("key");
+ SubkeyId_ = NameTable_->RegisterName("subkey");
+ ValueId_ = NameTable_->RegisterName("value");
+ TableIndexId_ = NameTable_->RegisterName(TableIndexColumnName);
+ RowIndexId_ = NameTable_->RegisterName(RowIndexColumnName);
+ RangeIndexId_ = NameTable_->RegisterName(RangeIndexColumnName);
+
+ Config_ = New<TYamrFormatConfig>();
+ }
+
+ void CreateStandardWriter(TControlAttributesConfigPtr controlAttributes = New<TControlAttributesConfig>())
+ {
+ Writer_ = CreateSchemalessWriterForYamr(
+ Config_,
+ NameTable_,
+ CreateAsyncAdapter(static_cast<IOutputStream*>(&OutputStream_)),
+ false, /* enableContextSaving */
+ controlAttributes,
+ 0 /* keyColumnCount */);
+ }
+};
+
+TEST_F(TSchemalessWriterForYamrTest, Simple)
+{
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedStringValue("key1", KeyId_));
+ row1.AddValue(MakeUnversionedStringValue("value1", ValueId_));
+
+ // Ignore system columns.
+ row1.AddValue(MakeUnversionedInt64Value(2, TableIndexId_));
+ row1.AddValue(MakeUnversionedInt64Value(42, RowIndexId_));
+ row1.AddValue(MakeUnversionedInt64Value(1, RangeIndexId_));
+
+ // Note that key and value follow not in order.
+ TUnversionedRowBuilder row2;
+ row2.AddValue(MakeUnversionedStringValue("value2", ValueId_));
+ row2.AddValue(MakeUnversionedStringValue("key2", KeyId_));
+
+ std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow() };
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString output =
+ "key1\tvalue1\n"
+ "key2\tvalue2\n";
+
+ EXPECT_EQ(output, OutputStream_.Str());
+}
+
+TEST_F(TSchemalessWriterForYamrTest, SimpleWithSubkey)
+{
+ Config_->HasSubkey = true;
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedStringValue("key1", KeyId_));
+ row1.AddValue(MakeUnversionedStringValue("value1", ValueId_));
+ row1.AddValue(MakeUnversionedStringValue("subkey1", SubkeyId_));
+
+ TUnversionedRowBuilder row2;
+ row2.AddValue(MakeUnversionedStringValue("subkey2", SubkeyId_));
+ row2.AddValue(MakeUnversionedStringValue("value2", ValueId_));
+ row2.AddValue(MakeUnversionedStringValue("key2", KeyId_));
+
+ std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow() };
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString output =
+ "key1\tsubkey1\tvalue1\n"
+ "key2\tsubkey2\tvalue2\n";
+
+ EXPECT_EQ(output, OutputStream_.Str());
+}
+
+TEST_F(TSchemalessWriterForYamrTest, SubkeyCouldBeSkipped)
+{
+ Config_->HasSubkey = true;
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row;
+ row.AddValue(MakeUnversionedStringValue("key", KeyId_));
+ row.AddValue(MakeUnversionedStringValue("value", ValueId_));
+
+ std::vector<TUnversionedRow> rows = { row.GetRow() };
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString output = "key\t\tvalue\n";
+ EXPECT_EQ(output, OutputStream_.Str());
+}
+
+TEST_F(TSchemalessWriterForYamrTest, SubkeyCouldBeNull)
+{
+ Config_->HasSubkey = true;
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row;
+ row.AddValue(MakeUnversionedStringValue("key", KeyId_));
+ row.AddValue(MakeUnversionedSentinelValue(EValueType::Null, SubkeyId_));
+ row.AddValue(MakeUnversionedStringValue("value", ValueId_));
+
+ std::vector<TUnversionedRow> rows = { row.GetRow() };
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString output = "key\t\tvalue\n";
+ EXPECT_EQ(output, OutputStream_.Str());
+}
+
+TEST_F(TSchemalessWriterForYamrTest, NonNullTerminatedStrings)
+{
+ Config_->HasSubkey = true;
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row;
+ const char* longString = "trashkeytrashsubkeytrashvalue";
+ row.AddValue(MakeUnversionedStringValue(TStringBuf(longString + 5, 3), KeyId_));
+ row.AddValue(MakeUnversionedStringValue(TStringBuf(longString + 13, 6), SubkeyId_));
+ row.AddValue(MakeUnversionedStringValue(TStringBuf(longString + 24, 5), ValueId_));
+
+ std::vector<TUnversionedRow> rows = { row.GetRow() };
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString output = "key\tsubkey\tvalue\n";
+ EXPECT_EQ(output, OutputStream_.Str());
+}
+
+TEST_F(TSchemalessWriterForYamrTest, SkippedKey)
+{
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row;
+ row.AddValue(MakeUnversionedStringValue("value", ValueId_));
+
+ std::vector<TUnversionedRow> rows = { row.GetRow() };
+
+ EXPECT_FALSE(Writer_->Write(rows));
+
+ EXPECT_THROW(Writer_->Close()
+ .Get()
+ .ThrowOnError(), std::exception);
+}
+
+TEST_F(TSchemalessWriterForYamrTest, SkippedValue)
+{
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row;
+ row.AddValue(MakeUnversionedStringValue("key", KeyId_));
+
+ std::vector<TUnversionedRow> rows = { row.GetRow() };
+
+ EXPECT_FALSE(Writer_->Write(rows));
+
+ EXPECT_THROW(Writer_->Close()
+ .Get()
+ .ThrowOnError(), std::exception);
+}
+
+TEST_F(TSchemalessWriterForYamrTest, NotStringType) {
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row;
+ row.AddValue(MakeUnversionedStringValue("key", KeyId_));
+ row.AddValue(MakeUnversionedInt64Value(42, ValueId_));
+
+ std::vector<TUnversionedRow> rows = { row.GetRow() };
+
+ EXPECT_FALSE(Writer_->Write(rows));
+
+ EXPECT_THROW(Writer_->Close()
+ .Get()
+ .ThrowOnError(), std::exception);
+}
+
+TEST_F(TSchemalessWriterForYamrTest, ExtraItem)
+{
+ int trashId = NameTable_->RegisterName("trash");
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row;
+ row.AddValue(MakeUnversionedStringValue("key", KeyId_));
+ row.AddValue(MakeUnversionedStringValue("value", ValueId_));
+ // This value will be ignored.
+ row.AddValue(MakeUnversionedStringValue("trash", trashId));
+ // This value will also be ignored because Config_->HasSubkey is off,
+ // despite the fact it has non-string type.
+ row.AddValue(MakeUnversionedInt64Value(42, SubkeyId_));
+
+ std::vector<TUnversionedRow> rows = { row.GetRow() };
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString output = "key\tvalue\n";
+ EXPECT_EQ(output, OutputStream_.Str());
+}
+
+TEST_F(TSchemalessWriterForYamrTest, Escaping)
+{
+ Config_->HasSubkey = true;
+ Config_->EnableEscaping = true;
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row;
+ row.AddValue(MakeUnversionedStringValue("\n", KeyId_));
+ row.AddValue(MakeUnversionedStringValue("\t", SubkeyId_));
+ row.AddValue(MakeUnversionedStringValue("\n", ValueId_));
+
+ std::vector<TUnversionedRow> rows = { row.GetRow() };
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString output = "\\n\t\\t\t\\n\n";
+ EXPECT_EQ(output, OutputStream_.Str());
+}
+
+TEST_F(TSchemalessWriterForYamrTest, SimpleWithTableIndex)
+{
+ Config_->EnableTableIndex = true;
+
+ auto controlAttributes = New<TControlAttributesConfig>();
+ controlAttributes->EnableTableIndex = true;
+ CreateStandardWriter(controlAttributes);
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedStringValue("key1", KeyId_));
+ row1.AddValue(MakeUnversionedStringValue("value1", ValueId_));
+ row1.AddValue(MakeUnversionedInt64Value(42, TableIndexId_));
+
+ TUnversionedRowBuilder row2;
+ row2.AddValue(MakeUnversionedStringValue("key2", KeyId_));
+ row2.AddValue(MakeUnversionedStringValue("value2", ValueId_));
+ row2.AddValue(MakeUnversionedInt64Value(42, TableIndexId_));
+
+ std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow() };
+ EXPECT_EQ(true, Writer_->Write(rows));
+
+ TUnversionedRowBuilder row3;
+ row3.AddValue(MakeUnversionedStringValue("key3", KeyId_));
+ row3.AddValue(MakeUnversionedStringValue("value3", ValueId_));
+ row3.AddValue(MakeUnversionedInt64Value(23, TableIndexId_));
+
+ rows = { row3.GetRow() };
+ EXPECT_EQ(true, Writer_->Write(rows));
+
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString output =
+ "42\n"
+ "key1\tvalue1\n"
+ "key2\tvalue2\n"
+ "23\n"
+ "key3\tvalue3\n";
+
+ EXPECT_EQ(output, OutputStream_.Str());
+}
+
+TEST_F(TSchemalessWriterForYamrTest, SimpleWithRowIndexAndTableIndex)
+{
+ Config_->EnableTableIndex = true;
+
+ auto controlAttributes = New<TControlAttributesConfig>();
+ controlAttributes->EnableTableIndex = true;
+ controlAttributes->EnableRowIndex = true;
+ CreateStandardWriter(controlAttributes);
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedStringValue("key1", KeyId_));
+ row1.AddValue(MakeUnversionedStringValue("value1", ValueId_));
+ row1.AddValue(MakeUnversionedInt64Value(42, TableIndexId_));
+ row1.AddValue(MakeUnversionedInt64Value(0, RowIndexId_));
+ row1.AddValue(MakeUnversionedInt64Value(0, RangeIndexId_));
+ TUnversionedRowBuilder row2;
+ row2.AddValue(MakeUnversionedStringValue("key2", KeyId_));
+ row2.AddValue(MakeUnversionedStringValue("value2", ValueId_));
+ std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow() };
+ EXPECT_EQ(true, Writer_->Write(rows));
+
+ TUnversionedRowBuilder row3;
+ row3.AddValue(MakeUnversionedStringValue("key3", KeyId_));
+ row3.AddValue(MakeUnversionedStringValue("value3", ValueId_));
+ row3.AddValue(MakeUnversionedInt64Value(5, RowIndexId_));
+ row3.AddValue(MakeUnversionedInt64Value(1, RangeIndexId_));
+ rows = { row3.GetRow() };
+ EXPECT_EQ(true, Writer_->Write(rows));
+
+ TUnversionedRowBuilder row4;
+ row4.AddValue(MakeUnversionedStringValue("key4", KeyId_));
+ row4.AddValue(MakeUnversionedStringValue("value4", ValueId_));
+ row4.AddValue(MakeUnversionedInt64Value(23, TableIndexId_));
+ row4.AddValue(MakeUnversionedInt64Value(10, RowIndexId_));
+ row4.AddValue(MakeUnversionedInt64Value(2, RangeIndexId_));
+ rows = { row4.GetRow() };
+ EXPECT_EQ(true, Writer_->Write(rows));
+
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString output =
+ "42\n0\n"
+ "key1\tvalue1\n"
+ "key2\tvalue2\n"
+ "42\n5\n"
+ "key3\tvalue3\n"
+ "23\n10\n"
+ "key4\tvalue4\n";
+
+ EXPECT_EQ(output, OutputStream_.Str());
+}
+
+TEST_F(TSchemalessWriterForYamrTest, Lenval)
+{
+ Config_->HasSubkey = true;
+ Config_->Lenval = true;
+ CreateStandardWriter();
+
+ // Note that order in both rows is unusual.
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedStringValue("value1", ValueId_));
+ row1.AddValue(MakeUnversionedStringValue("key1", KeyId_));
+ row1.AddValue(MakeUnversionedStringValue("subkey1", SubkeyId_));
+
+ TUnversionedRowBuilder row2;
+ row2.AddValue(MakeUnversionedStringValue("key2", KeyId_));
+ row2.AddValue(MakeUnversionedStringValue("value2", ValueId_));
+ row2.AddValue(MakeUnversionedStringValue("subkey2", SubkeyId_));
+
+ std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow() };
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString output = TString(
+ "\x04\x00\x00\x00" "key1"
+ "\x07\x00\x00\x00" "subkey1"
+ "\x06\x00\x00\x00" "value1"
+
+ "\x04\x00\x00\x00" "key2"
+ "\x07\x00\x00\x00" "subkey2"
+ "\x06\x00\x00\x00" "value2",
+
+ 2 * (3 * 4 + 4 + 6 + 7)); // all i32 + lengths of keys
+ EXPECT_EQ(output, OutputStream_.Str());
+}
+
+TEST_F(TSchemalessWriterForYamrTest, LenvalWithEmptyFields)
+{
+ Config_->HasSubkey = true;
+ Config_->Lenval = true;
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedStringValue("", KeyId_));
+ row1.AddValue(MakeUnversionedStringValue("subkey1", SubkeyId_));
+ row1.AddValue(MakeUnversionedStringValue("value1", ValueId_));
+
+ TUnversionedRowBuilder row2;
+ row2.AddValue(MakeUnversionedStringValue("key2", KeyId_));
+ row2.AddValue(MakeUnversionedStringValue("", SubkeyId_));
+ row2.AddValue(MakeUnversionedStringValue("value2", ValueId_));
+
+ TUnversionedRowBuilder row3;
+ row3.AddValue(MakeUnversionedStringValue("key3", KeyId_));
+ row3.AddValue(MakeUnversionedStringValue("subkey3", SubkeyId_));
+ row3.AddValue(MakeUnversionedStringValue("", ValueId_));
+
+ std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow(), row3.GetRow() };
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString output = TString(
+ "\x00\x00\x00\x00" ""
+ "\x07\x00\x00\x00" "subkey1"
+ "\x06\x00\x00\x00" "value1"
+
+ "\x04\x00\x00\x00" "key2"
+ "\x00\x00\x00\x00" ""
+ "\x06\x00\x00\x00" "value2"
+
+ "\x04\x00\x00\x00" "key3"
+ "\x07\x00\x00\x00" "subkey3"
+ "\x00\x00\x00\x00" "",
+
+ 9 * 4 + (7 + 6) + (4 + 6) + (4 + 7)); // all i32 + lengths of keys
+
+ EXPECT_EQ(output, OutputStream_.Str());
+}
+
+TEST_F(TSchemalessWriterForYamrTest, LenvalWithKeySwitch)
+{
+ Config_->HasSubkey = true;
+ Config_->Lenval = true;
+
+ auto controlAttributes = New<TControlAttributesConfig>();
+ controlAttributes->EnableKeySwitch = true;
+
+ Writer_ = CreateSchemalessWriterForYamr(
+ Config_,
+ NameTable_,
+ CreateAsyncAdapter(static_cast<IOutputStream*>(&OutputStream_)),
+ false, /* enableContextSaving */
+ controlAttributes,
+ 1 /* keyColumnCount */);
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedStringValue("key1", KeyId_));
+ row1.AddValue(MakeUnversionedStringValue("subkey1", SubkeyId_));
+ row1.AddValue(MakeUnversionedStringValue("value1", ValueId_));
+
+ TUnversionedRowBuilder row2;
+ row2.AddValue(MakeUnversionedStringValue("key2", KeyId_));
+ row2.AddValue(MakeUnversionedStringValue("subkey21", SubkeyId_));
+ row2.AddValue(MakeUnversionedStringValue("value21", ValueId_));
+
+ TUnversionedRowBuilder row3;
+ row3.AddValue(MakeUnversionedStringValue("key2", KeyId_));
+ row3.AddValue(MakeUnversionedStringValue("subkey22", SubkeyId_));
+ row3.AddValue(MakeUnversionedStringValue("value22", ValueId_));
+
+ std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow(), row3.GetRow() };
+ EXPECT_EQ(true, Writer_->Write(rows));
+
+ TUnversionedRowBuilder row4;
+ row4.AddValue(MakeUnversionedStringValue("key3", KeyId_));
+ row4.AddValue(MakeUnversionedStringValue("subkey3", SubkeyId_));
+ row4.AddValue(MakeUnversionedStringValue("value3", ValueId_));
+
+ rows = { row4.GetRow() };
+ EXPECT_EQ(true, Writer_->Write(rows));
+
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString output = TString(
+ "\x04\x00\x00\x00" "key1"
+ "\x07\x00\x00\x00" "subkey1"
+ "\x06\x00\x00\x00" "value1"
+
+ "\xfe\xff\xff\xff" // key switch
+
+ "\x04\x00\x00\x00" "key2"
+ "\x08\x00\x00\x00" "subkey21"
+ "\x07\x00\x00\x00" "value21"
+
+ "\x04\x00\x00\x00" "key2"
+ "\x08\x00\x00\x00" "subkey22"
+ "\x07\x00\x00\x00" "value22"
+
+ "\xfe\xff\xff\xff"
+
+ "\x04\x00\x00\x00" "key3"
+ "\x07\x00\x00\x00" "subkey3"
+ "\x06\x00\x00\x00" "value3",
+
+ 14 * 4 + (4 + 7 + 6) + (4 + 8 + 7) + (4 + 8 + 7) + (4 + 7 + 6)); // all i32 + lengths of keys
+
+ EXPECT_EQ(output, OutputStream_.Str());
+}
+
+TEST_F(TSchemalessWriterForYamrTest, LenvalWithTableIndex)
+{
+ Config_->EnableTableIndex = true;
+ Config_->Lenval = true;
+
+ auto controlAttributes = New<TControlAttributesConfig>();
+ controlAttributes->EnableTableIndex = true;
+ CreateStandardWriter(controlAttributes);
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedStringValue("key1", KeyId_));
+ row1.AddValue(MakeUnversionedStringValue("value1", ValueId_));
+ row1.AddValue(MakeUnversionedInt64Value(42, TableIndexId_));
+
+ TUnversionedRowBuilder row2;
+ row2.AddValue(MakeUnversionedStringValue("key2", KeyId_));
+ row2.AddValue(MakeUnversionedStringValue("value2", ValueId_));
+ row2.AddValue(MakeUnversionedInt64Value(42, TableIndexId_));
+
+ std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow() };
+ EXPECT_EQ(true, Writer_->Write(rows));
+
+ TUnversionedRowBuilder row3;
+ row3.AddValue(MakeUnversionedStringValue("key3", KeyId_));
+ row3.AddValue(MakeUnversionedStringValue("value3", ValueId_));
+ row3.AddValue(MakeUnversionedInt64Value(23, TableIndexId_));
+
+ rows = { row3.GetRow() };
+ EXPECT_EQ(true, Writer_->Write(rows));
+
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString output(
+ "\xff\xff\xff\xff" "\x2a\x00\x00\x00" // 42
+
+ "\x04\x00\x00\x00" "key1"
+ "\x06\x00\x00\x00" "value1"
+
+ "\x04\x00\x00\x00" "key2"
+ "\x06\x00\x00\x00" "value2"
+
+ "\xff\xff\xff\xff" "\x17\x00\x00\x00" // 23
+
+ "\x04\x00\x00\x00" "key3"
+ "\x06\x00\x00\x00" "value3",
+
+ 10 * 4 + 3 * (4 + 6));
+
+ EXPECT_EQ(output, OutputStream_.Str());
+}
+
+TEST_F(TSchemalessWriterForYamrTest, LenvalWithRangeAndRowIndex)
+{
+ Config_->Lenval = true;
+
+ auto controlAttributes = New<TControlAttributesConfig>();
+ controlAttributes->EnableRowIndex = true;
+ controlAttributes->EnableRangeIndex = true;
+ CreateStandardWriter(controlAttributes);
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedStringValue("key1", KeyId_));
+ row1.AddValue(MakeUnversionedStringValue("value1", ValueId_));
+ row1.AddValue(MakeUnversionedInt64Value(42, RangeIndexId_));
+ row1.AddValue(MakeUnversionedInt64Value(23, RowIndexId_));
+
+ TUnversionedRowBuilder row2;
+ row2.AddValue(MakeUnversionedStringValue("key2", KeyId_));
+ row2.AddValue(MakeUnversionedStringValue("value2", ValueId_));
+ row2.AddValue(MakeUnversionedInt64Value(42, RangeIndexId_));
+ row2.AddValue(MakeUnversionedInt64Value(24, RowIndexId_));
+
+ std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow() };
+ EXPECT_EQ(true, Writer_->Write(rows));
+
+ TUnversionedRowBuilder row3;
+ row3.AddValue(MakeUnversionedStringValue("key3", KeyId_));
+ row3.AddValue(MakeUnversionedStringValue("value3", ValueId_));
+ row3.AddValue(MakeUnversionedInt64Value(42, RangeIndexId_));
+ row3.AddValue(MakeUnversionedInt64Value(25, RowIndexId_));
+
+ rows = { row3.GetRow() };
+ EXPECT_EQ(true, Writer_->Write(rows));
+
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString output(
+ "\xfd\xff\xff\xff" "\x2a\x00\x00\x00" // 42
+ "\xfc\xff\xff\xff" "\x17\x00\x00\x00\x00\x00\x00\x00" // 23
+
+ "\x04\x00\x00\x00" "key1"
+ "\x06\x00\x00\x00" "value1"
+
+ "\x04\x00\x00\x00" "key2"
+ "\x06\x00\x00\x00" "value2"
+
+ "\x04\x00\x00\x00" "key3"
+ "\x06\x00\x00\x00" "value3",
+
+ 11 * 4 + 3 * (4 + 6));
+
+ EXPECT_EQ(output, OutputStream_.Str());
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT::NFormats
diff --git a/yt/yt/library/formats/unittests/yamred_dsv_parser_ut.cpp b/yt/yt/library/formats/unittests/yamred_dsv_parser_ut.cpp
new file mode 100644
index 0000000000..41183ca5f0
--- /dev/null
+++ b/yt/yt/library/formats/unittests/yamred_dsv_parser_ut.cpp
@@ -0,0 +1,185 @@
+#include <yt/yt/core/test_framework/framework.h>
+
+#include <yt/yt/core/test_framework/yson_consumer_mock.h>
+
+#include <yt/yt/library/formats/yamred_dsv_parser.h>
+
+namespace NYT::NFormats {
+namespace {
+
+using namespace NYson;
+
+using ::testing::InSequence;
+using ::testing::StrictMock;
+using ::testing::NiceMock;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TYamredDsvParserTest, Simple)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key_a"));
+ EXPECT_CALL(Mock, OnStringScalar("1"));
+ EXPECT_CALL(Mock, OnKeyedItem("key_b"));
+ EXPECT_CALL(Mock, OnStringScalar("2"));
+ EXPECT_CALL(Mock, OnKeyedItem("subkey_x"));
+ EXPECT_CALL(Mock, OnStringScalar("3"));
+ EXPECT_CALL(Mock, OnKeyedItem("a"));
+ EXPECT_CALL(Mock, OnStringScalar("5"));
+ EXPECT_CALL(Mock, OnKeyedItem("b"));
+ EXPECT_CALL(Mock, OnStringScalar("6"));
+ EXPECT_CALL(Mock, OnEndMap());
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key_a"));
+ EXPECT_CALL(Mock, OnStringScalar("7"));
+ EXPECT_CALL(Mock, OnKeyedItem("key_b"));
+ EXPECT_CALL(Mock, OnStringScalar("8"));
+ EXPECT_CALL(Mock, OnKeyedItem("subkey_x"));
+ EXPECT_CALL(Mock, OnStringScalar("9"));
+ EXPECT_CALL(Mock, OnKeyedItem("b"));
+ EXPECT_CALL(Mock, OnStringScalar("max\tignat"));
+ EXPECT_CALL(Mock, OnKeyedItem("a"));
+ EXPECT_CALL(Mock, OnStringScalar("100"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input =
+ "1 2\t3\ta=5\tb=6\n"
+ "7 8\t9\tb=max\\tignat\ta=100\n";
+
+ auto config = New<TYamredDsvFormatConfig>();
+ config->HasSubkey = true;
+ config->KeyColumnNames.push_back("key_a");
+ config->KeyColumnNames.push_back("key_b");
+ config->SubkeyColumnNames.push_back("subkey_x");
+
+ ParseYamredDsv(input, &Mock, config);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TYamredDsvParserTest, EmptyField)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar(""));
+ EXPECT_CALL(Mock, OnKeyedItem("subkey"));
+ EXPECT_CALL(Mock, OnStringScalar("0 1"));
+ EXPECT_CALL(Mock, OnKeyedItem("a"));
+ EXPECT_CALL(Mock, OnStringScalar("b"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input = "\t0 1\ta=b\n";
+
+ auto config = New<TYamredDsvFormatConfig>();
+ config->HasSubkey = true;
+ config->KeyColumnNames.push_back("key");
+ config->SubkeyColumnNames.push_back("subkey");
+
+ ParseYamredDsv(input, &Mock, config);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TYamredDsvParserTest, Escaping)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("\t"));
+ EXPECT_CALL(Mock, OnKeyedItem("subkey"));
+ EXPECT_CALL(Mock, OnStringScalar("0\n1"));
+ EXPECT_CALL(Mock, OnKeyedItem("a"));
+ EXPECT_CALL(Mock, OnStringScalar("\tb\nc"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input = "\\t\t0\\n1\ta=\\tb\\nc\n";
+
+ auto config = New<TYamredDsvFormatConfig>();
+ config->HasSubkey = true;
+ config->EnableEscaping = true;
+ config->KeyColumnNames.push_back("key");
+ config->SubkeyColumnNames.push_back("subkey");
+
+ ParseYamredDsv(input, &Mock, config);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TYamredDsvParserTest, Lenval)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("a"));
+ EXPECT_CALL(Mock, OnKeyedItem("subkey"));
+ EXPECT_CALL(Mock, OnStringScalar("bc"));
+ EXPECT_CALL(Mock, OnKeyedItem("d"));
+ EXPECT_CALL(Mock, OnStringScalar("e"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input = TString(
+ "\x01\x00\x00\x00" "a"
+ "\x02\x00\x00\x00" "bc"
+ "\x03\x00\x00\x00" "d=e",
+ 3 * 4 + 1 + 2 + 3);
+
+ auto config = New<TYamredDsvFormatConfig>();
+ config->Lenval = true;
+ config->HasSubkey = true;
+ config->KeyColumnNames.push_back("key");
+ config->SubkeyColumnNames.push_back("subkey");
+
+ ParseYamredDsv(input, &Mock, config);
+}
+
+TEST(TYamredDsvParserTest, EOM)
+{
+ StrictMock<TMockYsonConsumer> Mock;
+ InSequence dummy;
+
+ EXPECT_CALL(Mock, OnListItem());
+ EXPECT_CALL(Mock, OnBeginMap());
+ EXPECT_CALL(Mock, OnKeyedItem("key"));
+ EXPECT_CALL(Mock, OnStringScalar("a"));
+ EXPECT_CALL(Mock, OnKeyedItem("subkey"));
+ EXPECT_CALL(Mock, OnStringScalar("bc"));
+ EXPECT_CALL(Mock, OnKeyedItem("d"));
+ EXPECT_CALL(Mock, OnStringScalar("e"));
+ EXPECT_CALL(Mock, OnEndMap());
+
+ TString input = TString(
+ "\x01\x00\x00\x00" "a"
+ "\x02\x00\x00\x00" "bc"
+ "\x03\x00\x00\x00" "d=e"
+ "\xfb\xff\xff\xff" "\x01\x00\x00\x00\x00\x00\x00\x00",
+ 3 * 4 + 1 + 2 + 3 + 12);
+
+ auto config = New<TYamredDsvFormatConfig>();
+ config->Lenval = true;
+ config->EnableEom = true;
+ config->HasSubkey = true;
+ config->KeyColumnNames.push_back("key");
+ config->SubkeyColumnNames.push_back("subkey");
+
+ ParseYamredDsv(input, &Mock, config);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT::NFormats
diff --git a/yt/yt/library/formats/unittests/yamred_dsv_writer_ut.cpp b/yt/yt/library/formats/unittests/yamred_dsv_writer_ut.cpp
new file mode 100644
index 0000000000..fc5f28639e
--- /dev/null
+++ b/yt/yt/library/formats/unittests/yamred_dsv_writer_ut.cpp
@@ -0,0 +1,424 @@
+#include <yt/yt/core/test_framework/framework.h>
+
+#include <yt/yt/client/table_client/unversioned_row.h>
+#include <yt/yt/client/table_client/name_table.h>
+
+#include <yt/yt/library/formats/yamred_dsv_writer.h>
+
+#include <yt/yt/core/concurrency/async_stream.h>
+
+#include <util/string/vector.h>
+
+#include <cstdio>
+
+
+namespace NYT::NFormats {
+namespace {
+
+using VectorStrok = TVector<TString>;
+
+////////////////////////////////////////////////////////////////////////////////
+
+using namespace NYTree;
+using namespace NYson;
+using namespace NConcurrency;
+using namespace NTableClient;
+
+class TSchemalessWriterForYamredDsvTest
+ : public ::testing::Test
+{
+protected:
+ TNameTablePtr NameTable_;
+ TYamredDsvFormatConfigPtr Config_;
+ IUnversionedRowsetWriterPtr Writer_;
+
+ TStringStream OutputStream_;
+
+ int KeyAId_;
+ int KeyBId_;
+ int KeyCId_;
+ int ValueXId_;
+ int ValueYId_;
+ int TableIndexId_;
+ int RangeIndexId_;
+ int RowIndexId_;
+
+ TSchemalessWriterForYamredDsvTest()
+ {
+ NameTable_ = New<TNameTable>();
+ KeyAId_ = NameTable_->RegisterName("key_a");
+ KeyBId_ = NameTable_->RegisterName("key_b");
+ KeyCId_ = NameTable_->RegisterName("key_c");
+ ValueXId_ = NameTable_->RegisterName("value_x");
+ ValueYId_ = NameTable_->RegisterName("value_y");
+ TableIndexId_ = NameTable_->RegisterName(TableIndexColumnName);
+ RowIndexId_ = NameTable_->RegisterName(RowIndexColumnName);
+ RangeIndexId_ = NameTable_->RegisterName(RangeIndexColumnName);
+ Config_ = New<TYamredDsvFormatConfig>();
+ }
+
+ void CreateStandardWriter(TControlAttributesConfigPtr controlAttributes = New<TControlAttributesConfig>())
+ {
+ Writer_ = CreateSchemalessWriterForYamredDsv(
+ Config_,
+ NameTable_,
+ CreateAsyncAdapter(static_cast<IOutputStream*>(&OutputStream_)),
+ false, /* enableContextSaving */
+ controlAttributes,
+ 0 /* keyColumnCount */);
+ }
+
+ // Splits output into key and sorted vector of values that are entries of the last YAMR column.
+ // Returns true if success (there are >= 2 values after splitting by field separator), otherwise false.
+ bool ExtractKeyValue(TString output, TString& key, VectorStrok& value, char fieldSeparator = '\t')
+ {
+ char delimiter[2] = {fieldSeparator, 0};
+ // Splitting by field separator.
+ value = SplitString(output, delimiter, 0 /* maxFields */, KEEP_EMPTY_TOKENS);
+ // We should at least have key and the rest of values.
+ if (value.size() < 2)
+ return false;
+ key = value[0];
+ value.erase(value.begin());
+ std::sort(value.begin(), value.end());
+ return true;
+ }
+
+ // The same function as previous, version with subkey.
+ bool ExtractKeySubkeyValue(TString output, TString& key, TString& subkey, VectorStrok& value, char fieldSeparator = '\t')
+ {
+ char delimiter[2] = {fieldSeparator, 0};
+ // Splitting by field separator.
+ value = SplitString(output, delimiter, 0 /* maxFields */, KEEP_EMPTY_TOKENS);
+ // We should at least have key, subkey and the rest of values.
+ if (value.size() < 3)
+ return false;
+ key = value[0];
+ subkey = value[1];
+ value.erase(value.begin(), value.end());
+ std::sort(value.begin(), value.end());
+ return true;
+ }
+
+ // Compares output and expected output ignoring the order of entries in YAMR value column.
+ void CompareKeyValue(TString output, TString expected, char recordSeparator = '\n', char fieldSeparator = '\t')
+ {
+ char delimiter[2] = {recordSeparator, 0};
+ VectorStrok outputRows = SplitString(output, delimiter, 0 /* maxFields */ , KEEP_EMPTY_TOKENS);
+ VectorStrok expectedRows = SplitString(expected, delimiter, 0 /* maxFields */, KEEP_EMPTY_TOKENS);
+ EXPECT_EQ(outputRows.size(), expectedRows.size());
+ // Since there is \n after each row, there will be an extra empty string in both vectors.
+ EXPECT_EQ(outputRows.back(), "");
+ ASSERT_EQ(expectedRows.back(), "");
+ outputRows.pop_back();
+ expectedRows.pop_back();
+
+ TString outputKey;
+ TString expectedKey;
+ VectorStrok outputValue;
+ VectorStrok expectedValue;
+ for (int rowIndex = 0; rowIndex < static_cast<int>(outputRows.size()); rowIndex++) {
+ EXPECT_TRUE(ExtractKeyValue(outputRows[rowIndex], outputKey, outputValue, fieldSeparator));
+ ASSERT_TRUE(ExtractKeyValue(expectedRows[rowIndex], expectedKey, expectedValue, fieldSeparator));
+ EXPECT_EQ(outputKey, expectedKey);
+ EXPECT_EQ(outputValue, expectedValue);
+ }
+ }
+
+ // The same function as previous, version with subkey.
+ void CompareKeySubkeyValue(TString output, TString expected, char recordSeparator = '\n', char fieldSeparator = '\t')
+ {
+ char delimiter[2] = {recordSeparator, 0};
+ VectorStrok outputRows = SplitString(output, delimiter, 0 /* maxFields */ , KEEP_EMPTY_TOKENS);
+ VectorStrok expectedRows = SplitString(expected, delimiter, 0 /* maxFields */, KEEP_EMPTY_TOKENS);
+ EXPECT_EQ(outputRows.size(), expectedRows.size());
+ // Since there is \n after each row, there will be an extra empty string in both vectors.
+ EXPECT_EQ(outputRows.back(), "");
+ ASSERT_EQ(expectedRows.back(), "");
+ outputRows.pop_back();
+ expectedRows.pop_back();
+
+ TString outputKey;
+ TString expectedKey;
+ TString outputSubkey;
+ TString expectedSubkey;
+ VectorStrok outputValue;
+ VectorStrok expectedValue;
+ for (int rowIndex = 0; rowIndex < static_cast<int>(outputRows.size()); rowIndex++) {
+ EXPECT_TRUE(ExtractKeySubkeyValue(outputRows[rowIndex], outputKey, outputSubkey, outputValue, fieldSeparator));
+ ASSERT_TRUE(ExtractKeySubkeyValue(expectedRows[rowIndex], expectedKey, expectedSubkey, expectedValue, fieldSeparator));
+ EXPECT_EQ(outputKey, expectedKey);
+ EXPECT_EQ(outputSubkey, expectedSubkey);
+ EXPECT_EQ(outputValue, expectedValue);
+ }
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST_F(TSchemalessWriterForYamredDsvTest, Simple)
+{
+ Config_->KeyColumnNames.emplace_back("key_a");
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedStringValue("a1", KeyAId_));
+ row1.AddValue(MakeUnversionedStringValue("x", ValueXId_));
+ row1.AddValue(MakeUnversionedSentinelValue(EValueType::Null, ValueYId_));
+
+ // Ignore system columns.
+ row1.AddValue(MakeUnversionedInt64Value(2, TableIndexId_));
+ row1.AddValue(MakeUnversionedInt64Value(42, RowIndexId_));
+ row1.AddValue(MakeUnversionedInt64Value(1, RangeIndexId_));
+
+ TUnversionedRowBuilder row2;
+ row2.AddValue(MakeUnversionedStringValue("a2", KeyAId_));
+ row2.AddValue(MakeUnversionedStringValue("y", ValueYId_));
+ row2.AddValue(MakeUnversionedStringValue("b", KeyBId_));
+
+ std::vector<TUnversionedRow> rows = {row1.GetRow(), row2.GetRow()};
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString expectedOutput =
+ "a1\tvalue_x=x\n"
+ "a2\tvalue_y=y\tkey_b=b\n";
+
+ TString output = OutputStream_.Str();
+
+ CompareKeyValue(expectedOutput, output);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST_F(TSchemalessWriterForYamredDsvTest, SimpleWithSubkey)
+{
+ Config_->HasSubkey = true;
+ Config_->KeyColumnNames.emplace_back("key_a");
+ Config_->KeyColumnNames.emplace_back("key_b");
+ Config_->SubkeyColumnNames.emplace_back("key_c");
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedStringValue("a", KeyAId_));
+ row1.AddValue(MakeUnversionedStringValue("b1", KeyBId_));
+ row1.AddValue(MakeUnversionedStringValue("c", KeyCId_));
+
+ TUnversionedRowBuilder row2;
+ row2.AddValue(MakeUnversionedStringValue("a", KeyAId_));
+ row2.AddValue(MakeUnversionedStringValue("b2", KeyBId_));
+ row2.AddValue(MakeUnversionedStringValue("c", KeyCId_));
+
+ std::vector<TUnversionedRow> rows = {row1.GetRow(), row2.GetRow()};
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString expectedOutput =
+ "a b1\tc\t\n"
+ "a b2\tc\t\n";
+
+ TString output = OutputStream_.Str();
+
+ CompareKeySubkeyValue(expectedOutput, output);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST_F(TSchemalessWriterForYamredDsvTest, Lenval)
+{
+ Config_->Lenval = true;
+ Config_->HasSubkey = true;
+ Config_->EnableTableIndex = true;
+ Config_->KeyColumnNames.emplace_back("key_a");
+ Config_->KeyColumnNames.emplace_back("key_b");
+ Config_->SubkeyColumnNames.emplace_back("key_c");
+
+ auto controlAttributes = New<TControlAttributesConfig>();
+ controlAttributes->EnableTableIndex = true;
+ controlAttributes->EnableRowIndex = true;
+ controlAttributes->EnableRangeIndex = true;
+ CreateStandardWriter(controlAttributes);
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedStringValue("a", KeyAId_));
+ row1.AddValue(MakeUnversionedStringValue("b1", KeyBId_));
+ row1.AddValue(MakeUnversionedStringValue("c", KeyCId_));
+ row1.AddValue(MakeUnversionedStringValue("x", ValueXId_));
+
+ row1.AddValue(MakeUnversionedInt64Value(42, TableIndexId_));
+ row1.AddValue(MakeUnversionedInt64Value(23, RangeIndexId_));
+ row1.AddValue(MakeUnversionedInt64Value(17, RowIndexId_));
+
+ TUnversionedRowBuilder row2;
+ row2.AddValue(MakeUnversionedStringValue("a", KeyAId_));
+ row2.AddValue(MakeUnversionedStringValue("b2", KeyBId_));
+ row2.AddValue(MakeUnversionedStringValue("c", KeyCId_));
+
+ row2.AddValue(MakeUnversionedInt64Value(42, TableIndexId_));
+ row2.AddValue(MakeUnversionedInt64Value(23, RangeIndexId_));
+ row2.AddValue(MakeUnversionedInt64Value(18, RowIndexId_));
+
+ std::vector<TUnversionedRow> rows = {row1.GetRow(), row2.GetRow()};
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString expectedOutput = TString(
+ "\xff\xff\xff\xff" "\x2a\x00\x00\x00" // Table index.
+ "\xfd\xff\xff\xff" "\x17\x00\x00\x00" // Range index.
+ "\xfc\xff\xff\xff" "\x11\x00\x00\x00\x00\x00\x00\x00" // Row index.
+
+ "\x04\x00\x00\x00" "a b1"
+ "\x01\x00\x00\x00" "c"
+ "\x09\x00\x00\x00" "value_x=x"
+
+ "\x04\x00\x00\x00" "a b2"
+ "\x01\x00\x00\x00" "c"
+ "\x00\x00\x00\x00" "",
+
+ 13 * 4 + 4 + 1 + 9 + 4 + 1 + 0);
+
+ TString output = OutputStream_.Str();
+ EXPECT_EQ(expectedOutput, output)
+ << "expected length: " << expectedOutput.length()
+ << ", "
+ << "actual length: " << output.length();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST_F(TSchemalessWriterForYamredDsvTest, Escaping)
+{
+ Config_->KeyColumnNames.emplace_back("key_a");
+ Config_->KeyColumnNames.emplace_back("key_b");
+ int columnWithEscapedNameId = NameTable_->GetIdOrRegisterName("value\t_t");
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedStringValue("a\n", KeyAId_));
+ row1.AddValue(MakeUnversionedStringValue("\nb\t", KeyBId_));
+ row1.AddValue(MakeUnversionedStringValue("\nva\\lue\t", columnWithEscapedNameId));
+
+ std::vector<TUnversionedRow> rows = {row1.GetRow()};
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString expectedOutput = "a\\n \\nb\\t\tvalue\\t_t=\\nva\\\\lue\\t\n";
+ TString output = OutputStream_.Str();
+
+ EXPECT_EQ(expectedOutput, output);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST_F(TSchemalessWriterForYamredDsvTest, SkippedKey)
+{
+ Config_->KeyColumnNames.emplace_back("key_a");
+ Config_->KeyColumnNames.emplace_back("key_b");
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row;
+ row.AddValue(MakeUnversionedStringValue("b", KeyBId_));
+
+ std::vector<TUnversionedRow> rows = { row.GetRow() };
+
+ EXPECT_FALSE(Writer_->Write(rows));
+
+ EXPECT_THROW(Writer_->Close()
+ .Get()
+ .ThrowOnError(), std::exception);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST_F(TSchemalessWriterForYamredDsvTest, SkippedSubkey)
+{
+ Config_->HasSubkey = true;
+ Config_->KeyColumnNames.emplace_back("key_a");
+ Config_->SubkeyColumnNames.emplace_back("key_c");
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row;
+ row.AddValue(MakeUnversionedStringValue("a", KeyAId_));
+
+ std::vector<TUnversionedRow> rows = { row.GetRow() };
+
+ EXPECT_FALSE(Writer_->Write(rows));
+
+ EXPECT_THROW(Writer_->Close()
+ .Get()
+ .ThrowOnError(), std::exception);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST_F(TSchemalessWriterForYamredDsvTest, NonStringValues)
+{
+ Config_->HasSubkey = true;
+ Config_->KeyColumnNames.emplace_back("key_a");
+ Config_->SubkeyColumnNames.emplace_back("key_c");
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row;
+ row.AddValue(MakeUnversionedInt64Value(-42, KeyAId_));
+ row.AddValue(MakeUnversionedUint64Value(18, KeyCId_));
+ row.AddValue(MakeUnversionedBooleanValue(true, KeyBId_));
+ row.AddValue(MakeUnversionedDoubleValue(3.14, ValueXId_));
+ row.AddValue(MakeUnversionedStringValue("yt", ValueYId_));
+
+ std::vector<TUnversionedRow> rows = { row.GetRow() };
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString expectedOutput = "-42\t18\tkey_b=true\tvalue_x=3.14\tvalue_y=yt\n";
+ TString output = OutputStream_.Str();
+
+ EXPECT_EQ(expectedOutput, output);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST_F(TSchemalessWriterForYamredDsvTest, ErasingSubkeyColumnsWhenHasSubkeyIsFalse)
+{
+ Config_->KeyColumnNames.emplace_back("key_a");
+ Config_->SubkeyColumnNames.emplace_back("key_b");
+ // Config->HasSubkey = false by default.
+ CreateStandardWriter();
+
+ TUnversionedRowBuilder row1;
+ row1.AddValue(MakeUnversionedStringValue("a", KeyAId_));
+ row1.AddValue(MakeUnversionedStringValue("b", KeyBId_));
+ row1.AddValue(MakeUnversionedStringValue("c", KeyCId_));
+ row1.AddValue(MakeUnversionedStringValue("x", ValueXId_));
+
+ std::vector<TUnversionedRow> rows = {row1.GetRow()};
+
+ EXPECT_EQ(true, Writer_->Write(rows));
+ Writer_->Close()
+ .Get()
+ .ThrowOnError();
+
+ TString expectedOutput = "a\tkey_c=c\tvalue_x=x\n";
+ TString output = OutputStream_.Str();
+
+ EXPECT_EQ(expectedOutput, output);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT::NFormats
diff --git a/yt/yt/library/formats/unittests/yson_helpers.cpp b/yt/yt/library/formats/unittests/yson_helpers.cpp
new file mode 100644
index 0000000000..669585caf7
--- /dev/null
+++ b/yt/yt/library/formats/unittests/yson_helpers.cpp
@@ -0,0 +1,29 @@
+#include "yson_helpers.h"
+
+#include <yt/yt/core/ytree/convert.h>
+#include <yt/yt/core/ytree/node.h>
+#include <yt/yt/core/yson/string.h>
+
+namespace NYT {
+
+using namespace NYson;
+using namespace NYTree;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TString CanonizeYson(TStringBuf input)
+{
+ auto node = ConvertToNode(TYsonString(input));
+ auto binaryYson = ConvertToYsonString(node);
+
+ TStringStream out;
+ {
+ TYsonWriter writer(&out, NYson::EYsonFormat::Pretty);
+ ParseYsonStringBuffer(binaryYson.AsStringBuf(), EYsonType::Node, &writer);
+ }
+ return out.Str();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/yt/library/program/private.h b/yt/yt/library/formats/unittests/yson_helpers.h
index e6e06faf63..d123d40447 100644
--- a/yt/yt/library/program/private.h
+++ b/yt/yt/library/formats/unittests/yson_helpers.h
@@ -1,14 +1,12 @@
#pragma once
-#include "public.h"
-
-#include <yt/yt/core/logging/log.h>
+#include <util/generic/string.h>
namespace NYT {
////////////////////////////////////////////////////////////////////////////////
-YT_DEFINE_GLOBAL(const NLogging::TLogger, ProgramLogger, "Program");
+TString CanonizeYson(TStringBuf yson);
////////////////////////////////////////////////////////////////////////////////
diff --git a/yt/yt/library/monitoring/http_integration.cpp b/yt/yt/library/monitoring/http_integration.cpp
deleted file mode 100644
index 25fe9ad304..0000000000
--- a/yt/yt/library/monitoring/http_integration.cpp
+++ /dev/null
@@ -1,209 +0,0 @@
-#include "http_integration.h"
-
-#include "monitoring_manager.h"
-
-#include <yt/yt/build/build.h>
-
-#include <yt/yt/core/json/config.h>
-#include <yt/yt/core/json/json_writer.h>
-
-#include <yt/yt/core/ytree/fluent.h>
-
-#include <yt/yt/core/yson/parser.h>
-#include <yt/yt/core/yson/consumer.h>
-
-#include <yt/yt/core/concurrency/scheduler.h>
-
-#include <yt/yt/core/ytree/helpers.h>
-#include <yt/yt/core/ytree/virtual.h>
-#include <yt/yt/core/ytree/ypath_detail.h>
-#include <yt/yt/core/ytree/ypath_proxy.h>
-
-#include <yt/yt/core/http/http.h>
-#include <yt/yt/core/http/helpers.h>
-#include <yt/yt/core/http/server.h>
-
-#include <yt/yt/core/bus/tcp/dispatcher.h>
-
-#include <yt/yt/core/misc/ref_counted_tracker_statistics_producer.h>
-
-#include <yt/yt/library/profiling/solomon/exporter.h>
-
-#ifdef _linux_
-#include <yt/yt/library/ytprof/http/handler.h>
-#include <yt/yt/library/ytprof/build_info.h>
-
-#include <yt/yt/library/backtrace_introspector/http/handler.h>
-#endif
-
-#include <library/cpp/cgiparam/cgiparam.h>
-
-#include <util/string/vector.h>
-
-namespace NYT::NMonitoring {
-
-using namespace NYTree;
-using namespace NYson;
-using namespace NHttp;
-using namespace NConcurrency;
-using namespace NJson;
-
-////////////////////////////////////////////////////////////////////////////////
-
-DEFINE_ENUM(EVerb,
- (Get)
- (List)
-);
-
-////////////////////////////////////////////////////////////////////////////////
-
-void Initialize(
- const NHttp::IServerPtr& monitoringServer,
- const NProfiling::TSolomonExporterConfigPtr& config,
- TMonitoringManagerPtr* monitoringManager,
- NYTree::IMapNodePtr* orchidRoot)
-{
- *monitoringManager = New<TMonitoringManager>();
- (*monitoringManager)->Register("/ref_counted", CreateRefCountedTrackerStatisticsProducer());
- (*monitoringManager)->Register("/solomon", BIND([] (NYson::IYsonConsumer* consumer) {
- auto tags = NProfiling::TSolomonRegistry::Get()->GetDynamicTags();
-
- BuildYsonFluently(consumer)
- .BeginMap()
- .Item("dynamic_tags").Value(THashMap<TString, TString>(tags.begin(), tags.end()))
- .EndMap();
- }));
- (*monitoringManager)->Start();
-
- *orchidRoot = NYTree::GetEphemeralNodeFactory(true)->CreateMap();
- SetNodeByYPath(
- *orchidRoot,
- "/monitoring",
- CreateVirtualNode((*monitoringManager)->GetService()));
- SetNodeByYPath(
- *orchidRoot,
- "/tcp_dispatcher",
- CreateVirtualNode(NYT::NBus::TTcpDispatcher::Get()->GetOrchidService()));
-
-#ifdef _linux_
- auto buildInfo = NYTProf::TBuildInfo::GetDefault();
- buildInfo.BinaryVersion = GetVersion();
-
- SetNodeByYPath(
- *orchidRoot,
- "/build_info",
- NYTree::BuildYsonNodeFluently()
- .BeginMap()
- .Item("arc_revision").Value(buildInfo.ArcRevision)
- .Item("binary_version").Value(buildInfo.BinaryVersion)
- .Item("build_type").Value(buildInfo.BuildType)
- .EndMap());
-#endif
-
- if (monitoringServer) {
- auto exporter = New<NProfiling::TSolomonExporter>(config);
- exporter->Register("/solomon", monitoringServer);
- exporter->Start();
-
- SetNodeByYPath(
- *orchidRoot,
- "/sensors",
- CreateVirtualNode(exporter->GetSensorService()));
-
-#ifdef _linux_
- NYTProf::Register(monitoringServer, "/ytprof", buildInfo);
- NBacktraceIntrospector::Register(monitoringServer, "/backtrace");
-#endif
- monitoringServer->AddHandler(
- "/orchid/",
- GetOrchidYPathHttpHandler(*orchidRoot));
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-class TYPathHttpHandler
- : public IHttpHandler
-{
-public:
- explicit TYPathHttpHandler(IYPathServicePtr service)
- : Service_(std::move(service))
- { }
-
- void HandleRequest(
- const IRequestPtr& req,
- const IResponseWriterPtr& rsp) override
- {
- const TStringBuf orchidPrefix = "/orchid";
-
- TString path{req->GetUrl().Path};
- if (!path.StartsWith(orchidPrefix)) {
- THROW_ERROR_EXCEPTION("HTTP request must start with %Qv prefix",
- orchidPrefix)
- << TErrorAttribute("path", path);
- }
-
- path = path.substr(orchidPrefix.size(), TString::npos);
- TCgiParameters params(req->GetUrl().RawQuery);
-
- auto verb = EVerb::Get;
-
- auto options = CreateEphemeralAttributes();
- for (const auto& param : params) {
- if (param.first == "verb") {
- verb = ParseEnum<EVerb>(param.second);
- } else {
- // Just a check, IAttributeDictionary takes raw YSON anyway.
- try {
- ValidateYson(TYsonString(param.second), DefaultYsonParserNestingLevelLimit);
- } catch (const std::exception& ex) {
- THROW_ERROR_EXCEPTION("Error parsing value of query parameter %Qv",
- param.first)
- << ex;
- }
-
- options->SetYson(param.first, TYsonString(param.second));
- }
- }
-
- TYsonString result;
- switch (verb) {
- case EVerb::Get: {
- auto ypathReq = TYPathProxy::Get(path);
- ToProto(ypathReq->mutable_options(), *options);
- auto ypathRsp = WaitFor(ExecuteVerb(Service_, ypathReq))
- .ValueOrThrow();
- result = TYsonString(ypathRsp->value());
- break;
- }
- case EVerb::List: {
- auto ypathReq = TYPathProxy::List(path);
- auto ypathRsp = WaitFor(ExecuteVerb(Service_, ypathReq))
- .ValueOrThrow();
- result = TYsonString(ypathRsp->value());
- break;
- }
- default:
- YT_ABORT();
- }
-
- rsp->SetStatus(EStatusCode::OK);
- NHttp::ReplyJson(rsp, [&] (NYson::IYsonConsumer* writer) {
- Serialize(result, writer);
- });
- WaitFor(rsp->Close())
- .ThrowOnError();
- }
-
-private:
- const IYPathServicePtr Service_;
-};
-
-IHttpHandlerPtr GetOrchidYPathHttpHandler(const IYPathServicePtr& service)
-{
- return WrapYTException(New<TYPathHttpHandler>(service));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NMonitoring
diff --git a/yt/yt/library/monitoring/http_integration.h b/yt/yt/library/monitoring/http_integration.h
deleted file mode 100644
index 48c12ca8a8..0000000000
--- a/yt/yt/library/monitoring/http_integration.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#pragma once
-
-#include "public.h"
-
-#include <yt/yt/core/ytree/ypath_service.h>
-
-#include <yt/yt/core/http/public.h>
-
-#include <yt/yt/library/profiling/solomon/public.h>
-
-namespace NYT::NMonitoring {
-
-////////////////////////////////////////////////////////////////////////////////
-
-void Initialize(
- const NHttp::IServerPtr& monitoringServer,
- const NProfiling::TSolomonExporterConfigPtr& solomonExporterConfig,
- TMonitoringManagerPtr* monitoringManager,
- NYTree::IMapNodePtr* orchidRoot);
-
-NHttp::IHttpHandlerPtr CreateTracingHttpHandler();
-
-NHttp::IHttpHandlerPtr GetOrchidYPathHttpHandler(
- const NYTree::IYPathServicePtr& service);
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NMonitoring
diff --git a/yt/yt/library/monitoring/monitoring_manager.cpp b/yt/yt/library/monitoring/monitoring_manager.cpp
deleted file mode 100644
index 263443060b..0000000000
--- a/yt/yt/library/monitoring/monitoring_manager.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-#include "monitoring_manager.h"
-#include "private.h"
-
-#include <yt/yt/core/concurrency/action_queue.h>
-#include <yt/yt/core/concurrency/periodic_executor.h>
-
-#include <yt/yt/core/ytree/convert.h>
-#include <yt/yt/core/ytree/ephemeral_node_factory.h>
-#include <yt/yt/core/ytree/node.h>
-#include <yt/yt/core/ytree/tree_visitor.h>
-#include <yt/yt/core/ytree/ypath_detail.h>
-#include <yt/yt/core/ytree/ypath_client.h>
-
-#include <yt/yt/library/profiling/sensor.h>
-
-namespace NYT::NMonitoring {
-
-using namespace NYTree;
-using namespace NYPath;
-using namespace NYson;
-using namespace NConcurrency;
-
-////////////////////////////////////////////////////////////////////////////////
-
-static constexpr auto& Logger = MonitoringLogger;
-
-static const auto UpdatePeriod = TDuration::Seconds(3);
-static const auto EmptyRoot = GetEphemeralNodeFactory()->CreateMap();
-
-////////////////////////////////////////////////////////////////////////////////
-
-class TMonitoringManager::TImpl
- : public TRefCounted
-{
-public:
- void Register(const TYPath& path, TYsonProducer producer)
- {
- auto guard = Guard(SpinLock_);
- YT_VERIFY(PathToProducer_.emplace(path, producer).second);
- }
-
- void Unregister(const TYPath& path)
- {
- auto guard = Guard(SpinLock_);
- YT_VERIFY(PathToProducer_.erase(path) == 1);
- }
-
- IYPathServicePtr GetService()
- {
- return New<TYPathService>(this);
- }
-
- void Start()
- {
- auto guard = Guard(SpinLock_);
-
- YT_VERIFY(!Started_);
-
- PeriodicExecutor_ = New<TPeriodicExecutor>(
- ActionQueue_->GetInvoker(),
- BIND(&TImpl::Update, MakeWeak(this)),
- UpdatePeriod);
- PeriodicExecutor_->Start();
-
- Started_ = true;
- }
-
- void Stop()
- {
- auto guard = Guard(SpinLock_);
-
- if (!Started_)
- return;
-
- Started_ = false;
- YT_UNUSED_FUTURE(PeriodicExecutor_->Stop());
- Root_.Reset();
- }
-
-private:
- class TYPathService
- : public TYPathServiceBase
- {
- public:
- explicit TYPathService(TIntrusivePtr<TImpl> owner)
- : Owner_(std::move(owner))
- { }
-
- TResolveResult Resolve(const TYPath& path, const IYPathServiceContextPtr& /*context*/) override
- {
- return TResolveResultThere{Owner_->GetRoot(), path};
- }
-
- private:
- const TIntrusivePtr<TImpl> Owner_;
-
- };
-
- bool Started_ = false;
- TActionQueuePtr ActionQueue_ = New<TActionQueue>("Monitoring");
- TPeriodicExecutorPtr PeriodicExecutor_;
-
- YT_DECLARE_SPIN_LOCK(NThreading::TSpinLock, SpinLock_);
- THashMap<TString, NYson::TYsonProducer> PathToProducer_;
- IMapNodePtr Root_;
-
- void Update()
- {
- YT_LOG_DEBUG("Started updating monitoring state");
-
- YT_PROFILE_TIMING("/monitoring/update_time") {
- auto newRoot = GetEphemeralNodeFactory()->CreateMap();
-
- THashMap<TString, NYson::TYsonProducer> pathToProducer;;
- {
- auto guard = Guard(SpinLock_);
- pathToProducer = PathToProducer_;
- }
-
- for (const auto& [path, producer] : pathToProducer) {
- auto value = ConvertToYsonString(producer);
- SyncYPathSet(newRoot, path, value);
- }
-
- if (Started_) {
- auto guard = Guard(SpinLock_);
- std::swap(Root_, newRoot);
- }
- }
- YT_LOG_DEBUG("Finished updating monitoring state");
- }
-
- IMapNodePtr GetRoot()
- {
- auto guard = Guard(SpinLock_);
- return Root_ ? Root_ : EmptyRoot;
- }
-};
-
-DEFINE_REFCOUNTED_TYPE(TMonitoringManager)
-
-////////////////////////////////////////////////////////////////////////////////
-
-TMonitoringManager::TMonitoringManager()
- : Impl_(New<TImpl>())
-{ }
-
-TMonitoringManager::~TMonitoringManager() = default;
-
-void TMonitoringManager::Register(const TYPath& path, TYsonProducer producer)
-{
- Impl_->Register(path, producer);
-}
-
-void TMonitoringManager::Unregister(const TYPath& path)
-{
- Impl_->Unregister(path);
-}
-
-IYPathServicePtr TMonitoringManager::GetService()
-{
- return Impl_->GetService();
-}
-
-void TMonitoringManager::Start()
-{
- Impl_->Start();
-}
-
-void TMonitoringManager::Stop()
-{
- Impl_->Stop();
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NMonitoring
diff --git a/yt/yt/library/monitoring/monitoring_manager.h b/yt/yt/library/monitoring/monitoring_manager.h
deleted file mode 100644
index b2582bbe70..0000000000
--- a/yt/yt/library/monitoring/monitoring_manager.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#pragma once
-
-#include "public.h"
-
-#include <yt/yt/core/yson/consumer.h>
-#include <yt/yt/core/yson/producer.h>
-
-#include <yt/yt/core/ypath/public.h>
-
-#include <yt/yt/core/ytree/public.h>
-
-namespace NYT::NMonitoring {
-
-////////////////////////////////////////////////////////////////////////////////
-
-//! Exposes a tree assembled from results returned by a set of
-//! registered NYson::TYsonProducer-s.
-/*!
- * \note
- * The results are cached and periodically updated.
- */
-class TMonitoringManager
- : public TRefCounted
-{
-public:
- TMonitoringManager();
- ~TMonitoringManager();
-
- //! Registers a new #producer for a given #path.
- void Register(const NYPath::TYPath& path, NYson::TYsonProducer producer);
-
- //! Unregisters an existing producer for the specified #path.
- void Unregister(const NYPath::TYPath& path);
-
- //! Returns the service representing the whole tree.
- /*!
- * \note The service is thread-safe.
- */
- NYTree::IYPathServicePtr GetService();
-
- //! Starts periodic updates.
- void Start();
-
- //! Stops periodic updates.
- void Stop();
-
-private:
- class TImpl;
- TIntrusivePtr<TImpl> Impl_;
-
-};
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NMonitoring
diff --git a/yt/yt/library/monitoring/private.h b/yt/yt/library/monitoring/private.h
deleted file mode 100644
index 61809bdb68..0000000000
--- a/yt/yt/library/monitoring/private.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#pragma once
-
-#include "public.h"
-
-#include <yt/yt/core/logging/log.h>
-
-namespace NYT::NMonitoring {
-
-////////////////////////////////////////////////////////////////////////////////
-
-YT_DEFINE_GLOBAL(const NLogging::TLogger, MonitoringLogger, "Monitoring");
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NJournalClient
diff --git a/yt/yt/library/monitoring/public.h b/yt/yt/library/monitoring/public.h
deleted file mode 100644
index 3514bdd858..0000000000
--- a/yt/yt/library/monitoring/public.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#pragma once
-
-#include <yt/yt/core/misc/public.h>
-
-namespace NYT::NMonitoring {
-
-////////////////////////////////////////////////////////////////////////////////
-
-DECLARE_REFCOUNTED_CLASS(TMonitoringManager)
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NMonitoring
diff --git a/yt/yt/library/monitoring/ya.make b/yt/yt/library/monitoring/ya.make
deleted file mode 100644
index c2fccd99ac..0000000000
--- a/yt/yt/library/monitoring/ya.make
+++ /dev/null
@@ -1,27 +0,0 @@
-LIBRARY()
-
-INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
-
-SRCS(
- http_integration.cpp
- monitoring_manager.cpp
-)
-
-PEERDIR(
- yt/yt/core
- yt/yt/build
- yt/yt/library/profiling
- yt/yt/library/profiling/solomon
- library/cpp/cgiparam
-)
-
-IF (OS_LINUX)
- PEERDIR(
- yt/yt/library/ytprof
- yt/yt/library/ytprof/http
-
- yt/yt/library/backtrace_introspector/http
- )
-ENDIF()
-
-END()
diff --git a/yt/yt/library/oom/oom.cpp b/yt/yt/library/oom/oom.cpp
deleted file mode 100644
index 56714260ce..0000000000
--- a/yt/yt/library/oom/oom.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-#include "oom.h"
-
-#include <thread>
-#include <mutex>
-
-#include <yt/yt/core/misc/proc.h>
-#include <yt/yt/core/misc/ref_counted_tracker.h>
-
-#include <library/cpp/yt/assert/assert.h>
-#include <library/cpp/yt/logging/logger.h>
-
-#include <yt/yt/library/ytprof/heap_profiler.h>
-#include <yt/yt/library/ytprof/profile.h>
-
-#include <util/datetime/base.h>
-#include <util/system/file.h>
-#include <util/stream/output.h>
-#include <util/stream/file.h>
-#include <util/string/split.h>
-#include <util/system/fs.h>
-
-namespace NYT {
-
-////////////////////////////////////////////////////////////////////////////////
-
-namespace {
-
-YT_DEFINE_GLOBAL(const NYT::NLogging::TLogger, Logger, "OOM");
-
-const char* TCMallocStats[] = {
- "tcmalloc.per_cpu_caches_active",
- "generic.virtual_memory_used",
- "generic.physical_memory_used",
- "generic.bytes_in_use_by_app",
- "generic.heap_size",
- "tcmalloc.central_cache_free",
- "tcmalloc.cpu_free",
- "tcmalloc.page_heap_free",
- "tcmalloc.page_heap_unmapped",
- "tcmalloc.page_algorithm",
- "tcmalloc.max_total_thread_cache_bytes",
- "tcmalloc.thread_cache_free",
- "tcmalloc.thread_cache_count",
- "tcmalloc.local_bytes",
- "tcmalloc.external_fragmentation_bytes",
- "tcmalloc.metadata_bytes",
- "tcmalloc.transfer_cache_free",
- "tcmalloc.hard_usage_limit_bytes",
- "tcmalloc.desired_usage_limit_bytes",
- "tcmalloc.required_bytes",
-};
-
-void OomWatchdog(TOomWatchdogOptions options)
-{
- while (true) {
- auto rss = GetProcessMemoryUsage().Rss;
-
- if (options.MemoryLimit && static_cast<i64>(rss) > *options.MemoryLimit) {
- auto profile = NYTProf::CaptureHeapProfile(tcmalloc::ProfileType::kHeap);
-
- TFileOutput output(options.HeapDumpPath);
- NYTProf::WriteCompressedProfile(&output, profile);
- output.Finish();
-
- auto rctDump = TRefCountedTracker::Get()->GetDebugInfo();
- for (const auto& line : StringSplitter(rctDump).Split('\n')) {
- YT_LOG_DEBUG("RCT %v", line.Token());
- }
-
- auto parseMemoryAmount = [] (const TStringBuf strValue) {
- const TStringBuf kbSuffix = " kB";
- YT_VERIFY(strValue.EndsWith(kbSuffix));
- auto startPos = strValue.find_first_not_of(' ');
- auto valueString = strValue.substr(
- startPos,
- strValue.size() - kbSuffix.size() - startPos);
- return FromString<ui64>(valueString) * 1_KB;
- };
-
- ui64 rssAnon = 0;
- ui64 rssFile = 0;
- ui64 rssShmem = 0;
-
- TFileInput statusFile(Format("/proc/self/status"));
- TString line;
- while (statusFile.ReadLine(line)) {
- const TStringBuf rssAnonHeader = "RssAnon:\t";
- if (line.StartsWith(rssAnonHeader)) {
- rssAnon = parseMemoryAmount(line.substr(rssAnonHeader.size()));
- continue;
- }
-
- const TStringBuf rssFileHeader = "RssFile:\t";
- if (line.StartsWith(rssFileHeader)) {
- rssFile = parseMemoryAmount(line.substr(rssFileHeader.size()));
- continue;
- }
-
- const TStringBuf rssShmemHeader = "RssShmem:\t";
- if (line.StartsWith(rssShmemHeader)) {
- rssShmem = parseMemoryAmount(line.substr(rssShmemHeader.size()));
- continue;
- }
- }
-
- YT_LOG_DEBUG("Memory statistis (RssTotal: %v, RssAnon: %v, RssFile %v, RssShmem: %v, TCMalloc: %v)",
- rss,
- rssAnon,
- rssFile,
- rssShmem,
- MakeFormattableView(
- TRange(TCMallocStats),
- [&] (auto* builder, auto metric) {
- auto value = tcmalloc::MallocExtension::GetNumericProperty(metric);
- builder->AppendFormat("%v: %v", metric, value);
- }));
-
- YT_LOG_FATAL("Early OOM triggered (MemoryUsage: %v, MemoryLimit: %v, HeapDump: %v, CurrentWorkingDirectory: %v)",
- rss,
- *options.MemoryLimit,
- options.HeapDumpPath,
- NFs::CurrentWorkingDirectory());
- }
-
- Sleep(TDuration::MilliSeconds(10));
- }
-}
-
-} // namespace
-
-////////////////////////////////////////////////////////////////////////////////
-
-void EnableEarlyOomWatchdog(TOomWatchdogOptions options)
-{
- static std::once_flag onceFlag;
-
- std::call_once(onceFlag, [options] {
- std::thread(OomWatchdog, options).detach();
- });
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT
diff --git a/yt/yt/library/oom/oom.h b/yt/yt/library/oom/oom.h
deleted file mode 100644
index 7a5892918a..0000000000
--- a/yt/yt/library/oom/oom.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#pragma once
-
-#include <optional>
-
-#include <util/generic/string.h>
-
-namespace NYT {
-
-////////////////////////////////////////////////////////////////////////////////
-
-struct TOomWatchdogOptions
-{
- std::optional<i64> MemoryLimit;
- TString HeapDumpPath = "oom.pb.gz";
-};
-
-void EnableEarlyOomWatchdog(TOomWatchdogOptions options);
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT
diff --git a/yt/yt/library/oom/unittests/oom_ut.cpp b/yt/yt/library/oom/unittests/oom_ut.cpp
deleted file mode 100644
index 78f0182973..0000000000
--- a/yt/yt/library/oom/unittests/oom_ut.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-#include <gtest/gtest.h>
-
-#include <yt/yt/library/oom/oom.h>
-
-#include <util/datetime/base.h>
-#include <util/system/fs.h>
-#include <util/generic/size_literals.h>
-
-namespace NYT {
-namespace {
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(TEarlyOomTest, Crash)
-{
- auto checkOom = [] {
- EnableEarlyOomWatchdog(TOomWatchdogOptions{
- .MemoryLimit = 0,
- });
-
- Sleep(TDuration::Seconds(5));
- };
-
- ASSERT_DEATH(checkOom(), "");
-
- ASSERT_TRUE(NFs::Exists("oom.pb.gz"));
-}
-
-TEST(TEarlyOomTest, NoCrash)
-{
- EnableEarlyOomWatchdog(TOomWatchdogOptions{
- .MemoryLimit = 1_GB,
- });
-
- Sleep(TDuration::Seconds(5));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace
-} // namespace NYT
diff --git a/yt/yt/library/oom/ya.make b/yt/yt/library/oom/ya.make
deleted file mode 100644
index f4845495d8..0000000000
--- a/yt/yt/library/oom/ya.make
+++ /dev/null
@@ -1,20 +0,0 @@
-LIBRARY()
-
-INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
-
-SRCS(
- oom.cpp
-)
-
-PEERDIR(
- yt/yt/core
- yt/yt/library/ytprof
-)
-
-END()
-
-IF (OS_LINUX AND NOT SANITIZER_TYPE)
- RECURSE(
- unittests
- )
-ENDIF()
diff --git a/yt/yt/library/process/config.cpp b/yt/yt/library/process/config.cpp
new file mode 100644
index 0000000000..9099aca7f0
--- /dev/null
+++ b/yt/yt/library/process/config.cpp
@@ -0,0 +1,34 @@
+#include "config.h"
+
+namespace NYT::NPipes {
+
+using namespace NYTree;
+
+////////////////////////////////////////////////////////////////////////////////
+
+void TIODispatcherConfig::Register(TRegistrar registrar)
+{
+ registrar.Parameter("thread_pool_polling_period", &TThis::ThreadPoolPollingPeriod)
+ .Default(TDuration::MilliSeconds(10));
+}
+
+TIODispatcherConfigPtr TIODispatcherConfig::ApplyDynamic(
+ const TIODispatcherDynamicConfigPtr& dynamicConfig) const
+{
+ auto mergedConfig = CloneYsonStruct(MakeStrong(this));
+ UpdateYsonStructField(mergedConfig->ThreadPoolPollingPeriod, dynamicConfig->ThreadPoolPollingPeriod);
+ mergedConfig->Postprocess();
+ return mergedConfig;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void TIODispatcherDynamicConfig::Register(TRegistrar registrar)
+{
+ registrar.Parameter("thread_pool_polling_period", &TThis::ThreadPoolPollingPeriod)
+ .Optional();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NPipes
diff --git a/yt/yt/library/process/config.h b/yt/yt/library/process/config.h
new file mode 100644
index 0000000000..84be0ef7be
--- /dev/null
+++ b/yt/yt/library/process/config.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include "public.h"
+
+#include <yt/yt/core/ytree/yson_struct.h>
+
+namespace NYT::NPipes {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TIODispatcherConfig
+ : public NYTree::TYsonStruct
+{
+public:
+ TDuration ThreadPoolPollingPeriod;
+
+ TIODispatcherConfigPtr ApplyDynamic(const TIODispatcherDynamicConfigPtr& dynamicConfig) const;
+
+ REGISTER_YSON_STRUCT(TIODispatcherConfig);
+
+ static void Register(TRegistrar registrar);
+};
+
+DEFINE_REFCOUNTED_TYPE(TIODispatcherConfig)
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TIODispatcherDynamicConfig
+ : public NYTree::TYsonStruct
+{
+public:
+ std::optional<TDuration> ThreadPoolPollingPeriod;
+
+ REGISTER_YSON_STRUCT(TIODispatcherDynamicConfig);
+
+ static void Register(TRegistrar registrar);
+};
+
+DEFINE_REFCOUNTED_TYPE(TIODispatcherDynamicConfig)
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NPipes
diff --git a/yt/yt/library/process/configure_io_dispatcher.cpp b/yt/yt/library/process/configure_io_dispatcher.cpp
new file mode 100644
index 0000000000..d2f834b0f2
--- /dev/null
+++ b/yt/yt/library/process/configure_io_dispatcher.cpp
@@ -0,0 +1,41 @@
+#include "io_dispatcher.h"
+#include "config.h"
+
+#include <yt/yt/core/misc/configurable_singleton_def.h>
+
+namespace NYT::NPipes {
+
+using namespace NYTree;
+
+////////////////////////////////////////////////////////////////////////////////
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TIODispatcherConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TIODispatcherDynamicConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void ConfigureSingleton(const TIODispatcherConfigPtr& config)
+{
+ TIODispatcher::Get()->Configure(config);
+}
+
+void ReconfigureSingleton(
+ const TIODispatcherConfigPtr& config,
+ const TIODispatcherDynamicConfigPtr& dynamicConfig)
+{
+ TIODispatcher::Get()->Configure(config->ApplyDynamic(dynamicConfig));
+}
+
+YT_DEFINE_RECONFIGURABLE_SINGLETON(
+ "io_dispatcher",
+ TIODispatcherConfig,
+ TIODispatcherDynamicConfig);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NPipes
diff --git a/yt/yt/library/process/io_dispatcher.cpp b/yt/yt/library/process/io_dispatcher.cpp
index c6e7e2f67f..96e1f88087 100644
--- a/yt/yt/library/process/io_dispatcher.cpp
+++ b/yt/yt/library/process/io_dispatcher.cpp
@@ -1,5 +1,7 @@
#include "io_dispatcher.h"
+#include "config.h"
+
#include <yt/yt/core/concurrency/thread_pool_poller.h>
#include <yt/yt/core/concurrency/poller.h>
@@ -9,14 +11,6 @@ using namespace NConcurrency;
////////////////////////////////////////////////////////////////////////////////
-void TIODispatcherConfig::Register(TRegistrar registrar)
-{
- registrar.Parameter("thread_pool_polling_period", &TThis::ThreadPoolPollingPeriod)
- .Default(TDuration::MilliSeconds(10));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
TIODispatcher::TIODispatcher()
: Poller_(BIND([] { return CreateThreadPoolPoller(1, "Pipes"); }))
{ }
diff --git a/yt/yt/library/process/io_dispatcher.h b/yt/yt/library/process/io_dispatcher.h
index 32fd92f0ac..3c47bddf78 100644
--- a/yt/yt/library/process/io_dispatcher.h
+++ b/yt/yt/library/process/io_dispatcher.h
@@ -12,32 +12,15 @@ namespace NYT::NPipes {
////////////////////////////////////////////////////////////////////////////////
-class TIODispatcherConfig
- : public NYTree::TYsonStruct
-{
-public:
- TDuration ThreadPoolPollingPeriod;
-
- REGISTER_YSON_STRUCT(TIODispatcherConfig);
-
- static void Register(TRegistrar registrar);
-};
-
-DEFINE_REFCOUNTED_TYPE(TIODispatcherConfig)
-
-////////////////////////////////////////////////////////////////////////////////
-
class TIODispatcher
{
public:
- ~TIODispatcher();
-
static TIODispatcher* Get();
+ ~TIODispatcher();
void Configure(const TIODispatcherConfigPtr& config);
IInvokerPtr GetInvoker();
-
NConcurrency::IPollerPtr GetPoller();
private:
diff --git a/yt/yt/library/process/public.h b/yt/yt/library/process/public.h
index fd4193f80d..76cfff1340 100644
--- a/yt/yt/library/process/public.h
+++ b/yt/yt/library/process/public.h
@@ -1,6 +1,8 @@
#pragma once
-#include <yt/yt/core/misc/public.h>
+#include <yt/yt/core/misc/configurable_singleton_decl.h>
+
+#include <library/cpp/yt/memory/ref_counted.h>
namespace NYT::NPipes {
@@ -10,6 +12,10 @@ DECLARE_REFCOUNTED_CLASS(TNamedPipe)
DECLARE_REFCOUNTED_CLASS(TNamedPipeConfig)
DECLARE_REFCOUNTED_CLASS(TIODispatcherConfig)
+DECLARE_REFCOUNTED_CLASS(TIODispatcherDynamicConfig)
+
+
+YT_DECLARE_RECONFIGURABLE_SINGLETON(TIODispatcherConfig, TIODispatcherDynamicConfig);
////////////////////////////////////////////////////////////////////////////////
diff --git a/yt/yt/library/process/unittests/pipes_ut.cpp b/yt/yt/library/process/unittests/pipes_ut.cpp
deleted file mode 100644
index f0c371dd30..0000000000
--- a/yt/yt/library/process/unittests/pipes_ut.cpp
+++ /dev/null
@@ -1,432 +0,0 @@
-#include <yt/yt/core/test_framework/framework.h>
-
-#include <yt/yt/core/concurrency/action_queue.h>
-#include <yt/yt/core/concurrency/scheduler.h>
-
-#include <yt/yt/core/misc/blob.h>
-#include <yt/yt/core/misc/proc.h>
-
-#include <yt/yt/core/net/connection.h>
-
-#include <yt/yt/library/process/pipe.h>
-
-#include <random>
-
-namespace NYT::NPipes {
-
-////////////////////////////////////////////////////////////////////////////////
-
-using namespace NConcurrency;
-using namespace NNet;
-
-#ifndef _win_
-
-//! NB: You can't set size smaller than that of a page.
-constexpr int SmallPipeCapacity = 4096;
-
-TEST(TPipeIOHolder, CanInstantiate)
-{
- auto pipe = TPipeFactory().Create();
-
- auto readerHolder = pipe.CreateAsyncReader();
- auto writerHolder = pipe.CreateAsyncWriter();
-
- readerHolder->Abort().Get();
- writerHolder->Abort().Get();
-}
-
-TEST(TPipeTest, PrematureEOF)
-{
- auto pipe = TNamedPipe::Create("./namedpipe");
- auto reader = pipe->CreateAsyncReader();
-
- auto buffer = TSharedMutableRef::Allocate(1024 * 1024);
- EXPECT_THROW(reader->Read(buffer).WithTimeout(TDuration::Seconds(1)).Get().ValueOrThrow(), TErrorException);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TBlob ReadAll(IConnectionReaderPtr reader, bool useWaitFor)
-{
- auto buffer = TSharedMutableRef::Allocate(1_MB, {.InitializeStorage = false});
- auto whole = TBlob(GetRefCountedTypeCookie<TDefaultBlobTag>());
-
- while (true) {
- TErrorOr<size_t> result;
- auto future = reader->Read(buffer);
- if (useWaitFor) {
- result = WaitFor(future);
- } else {
- result = future.Get();
- }
-
- if (result.ValueOrThrow() == 0) {
- break;
- }
-
- whole.Append(buffer.Begin(), result.Value());
- }
- return whole;
-}
-
-void WriteAll(IConnectionWriterPtr writer, const char* data, size_t size, size_t blockSize)
-{
- while (size > 0) {
- const size_t currentBlockSize = std::min(blockSize, size);
- auto buffer = TSharedRef(data, currentBlockSize, nullptr);
- auto error = WaitFor(writer->Write(buffer));
- THROW_ERROR_EXCEPTION_IF_FAILED(error);
- size -= currentBlockSize;
- data += currentBlockSize;
- }
-
- {
- auto error = WaitFor(writer->Close());
- THROW_ERROR_EXCEPTION_IF_FAILED(error);
- }
-}
-
-TEST(TAsyncWriterTest, AsyncCloseFail)
-{
- auto pipe = TPipeFactory().Create();
-
- auto reader = pipe.CreateAsyncReader();
- auto writer = pipe.CreateAsyncWriter();
-
- auto queue = New<NConcurrency::TActionQueue>();
- auto readFromPipe =
- BIND(&ReadAll, reader, false)
- .AsyncVia(queue->GetInvoker())
- .Run();
-
- int length = 200*1024;
- auto buffer = TSharedMutableRef::Allocate(length);
- ::memset(buffer.Begin(), 'a', buffer.Size());
-
- auto writeResult = writer->Write(buffer).Get();
-
- EXPECT_TRUE(writeResult.IsOK())
- << ToString(writeResult);
-
- auto error = writer->Close();
-
- auto readResult = readFromPipe.Get();
- ASSERT_TRUE(readResult.IsOK())
- << ToString(readResult);
-
- auto closeStatus = error.Get();
-}
-
-TEST(TAsyncWriterTest, WriteFailed)
-{
- auto pipe = TPipeFactory().Create();
- auto reader = pipe.CreateAsyncReader();
- auto writer = pipe.CreateAsyncWriter();
-
- int length = 200*1024;
- auto buffer = TSharedMutableRef::Allocate(length);
- ::memset(buffer.Begin(), 'a', buffer.Size());
-
- auto asyncWriteResult = writer->Write(buffer);
- YT_UNUSED_FUTURE(reader->Abort());
-
- EXPECT_FALSE(asyncWriteResult.Get().IsOK())
- << ToString(asyncWriteResult.Get());
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-class TPipeReadWriteTest
- : public ::testing::Test
-{
-protected:
- void SetUp() override
- {
- auto pipe = TPipeFactory().Create();
-
- Reader = pipe.CreateAsyncReader();
- Writer = pipe.CreateAsyncWriter();
- }
-
- void TearDown() override
- { }
-
- IConnectionReaderPtr Reader;
- IConnectionWriterPtr Writer;
-};
-
-class TNamedPipeReadWriteTest
- : public ::testing::Test
-{
-protected:
- void SetUp() override
- {
- auto pipe = TNamedPipe::Create("./namedpipe");
- Reader = pipe->CreateAsyncReader();
- Writer = pipe->CreateAsyncWriter();
- }
-
- void TearDown() override
- { }
-
- void SetUpWithCapacity(int capacity)
- {
- auto pipe = TNamedPipe::Create("./namedpipewcap", 0660, capacity);
- Reader = pipe->CreateAsyncReader();
- Writer = pipe->CreateAsyncWriter();
- }
-
- void SetUpWithDeliveryFence()
- {
- auto pipe = TNamedPipe::Create("./namedpipewcap", 0660);
- Reader = pipe->CreateAsyncReader();
- Writer = pipe->CreateAsyncWriter(/*useDeliveryFence*/ true);
- }
-
- IConnectionReaderPtr Reader;
- IConnectionWriterPtr Writer;
-};
-
-TEST_F(TPipeReadWriteTest, ReadSomethingSpin)
-{
- TString message("Hello pipe!\n");
- auto buffer = TSharedRef::FromString(message);
- Writer->Write(buffer).Get().ThrowOnError();
- Writer->Close().Get().ThrowOnError();
-
- auto data = TSharedMutableRef::Allocate(1);
- auto whole = TBlob(GetRefCountedTypeCookie<TDefaultBlobTag>());
-
- while (true) {
- auto result = Reader->Read(data).Get();
- if (result.ValueOrThrow() == 0) {
- break;
- }
- whole.Append(data.Begin(), result.Value());
- }
-
- EXPECT_EQ(message, TString(whole.Begin(), whole.End()));
-}
-
-TEST_F(TNamedPipeReadWriteTest, ReadSomethingSpin)
-{
- TString message("Hello pipe!\n");
- auto buffer = TSharedRef::FromString(message);
-
- Writer->Write(buffer).Get().ThrowOnError();
- Writer->Close().Get().ThrowOnError();
-
- auto data = TSharedMutableRef::Allocate(1);
- auto whole = TBlob(GetRefCountedTypeCookie<TDefaultBlobTag>());
-
- while (true) {
- auto result = Reader->Read(data).Get();
- if (result.ValueOrThrow() == 0) {
- break;
- }
- whole.Append(data.Begin(), result.Value());
- }
- EXPECT_EQ(message, TString(whole.Begin(), whole.End()));
-}
-
-
-TEST_F(TPipeReadWriteTest, ReadSomethingWait)
-{
- TString message("Hello pipe!\n");
- auto buffer = TSharedRef::FromString(message);
- EXPECT_TRUE(Writer->Write(buffer).Get().IsOK());
- WaitFor(Writer->Close())
- .ThrowOnError();
- auto whole = ReadAll(Reader, false);
- EXPECT_EQ(message, TString(whole.Begin(), whole.End()));
-}
-
-TEST_F(TNamedPipeReadWriteTest, ReadSomethingWait)
-{
- TString message("Hello pipe!\n");
- auto buffer = TSharedRef::FromString(message);
- EXPECT_TRUE(Writer->Write(buffer).Get().IsOK());
- WaitFor(Writer->Close())
- .ThrowOnError();
- auto whole = ReadAll(Reader, false);
- EXPECT_EQ(message, TString(whole.Begin(), whole.End()));
-}
-
-TEST_F(TPipeReadWriteTest, ReadWrite)
-{
- TString text("Hello cruel world!\n");
- auto buffer = TSharedRef::FromString(text);
- Writer->Write(buffer).Get();
- auto errorsOnClose = Writer->Close();
-
- auto textFromPipe = ReadAll(Reader, false);
-
- auto error = errorsOnClose.Get();
- EXPECT_TRUE(error.IsOK()) << error.GetMessage();
- EXPECT_EQ(text, TString(textFromPipe.Begin(), textFromPipe.End()));
-}
-
-TEST_F(TNamedPipeReadWriteTest, ReadWrite)
-{
- TString text("Hello cruel world!\n");
- auto buffer = TSharedRef::FromString(text);
- Writer->Write(buffer).Get();
- auto errorsOnClose = Writer->Close();
-
- auto textFromPipe = ReadAll(Reader, false);
-
- auto error = errorsOnClose.Get();
- EXPECT_TRUE(error.IsOK()) << error.GetMessage();
- EXPECT_EQ(text, TString(textFromPipe.Begin(), textFromPipe.End()));
-}
-
-TEST_F(TNamedPipeReadWriteTest, CapacityJustWorks)
-{
- SetUpWithCapacity(SmallPipeCapacity);
-
- TString text(5, 'a');
- text.push_back('\n');
- auto writeBuffer = TSharedRef::FromString(text);
-
- auto writeFuture = Writer->Write(writeBuffer);
- EXPECT_TRUE(writeFuture.Get().IsOK());
-
- auto readBuffer = TSharedMutableRef::Allocate(5000, {.InitializeStorage = false});
- auto readResult = Reader->Read(readBuffer).Get();
-
- EXPECT_EQ(text, TString(readBuffer.Begin(), readResult.Value()));
-}
-
-TEST_F(TNamedPipeReadWriteTest, CapacityOverflow)
-{
- SetUpWithCapacity(SmallPipeCapacity);
- auto readerQueue = New<NConcurrency::TActionQueue>("Reader");
-
- TString text(5000, 'a');
- text.push_back('\n');
- auto writeBuffer = TSharedRef::FromString(text);
- auto writeFuture = Writer->Write(writeBuffer);
-
- TDelayedExecutor::WaitForDuration(TDuration::Seconds(1));
- EXPECT_FALSE(writeFuture.IsSet());
-
- auto readFuture = BIND([&] {
- auto readBuffer = TSharedMutableRef::Allocate(6000, {.InitializeStorage = false});
- auto readResult = Reader->Read(readBuffer).Get();
-
- EXPECT_TRUE(readResult.IsOK());
- EXPECT_EQ(text.substr(0, 4096), TString(readBuffer.Begin(), readResult.Value()));
- })
- .AsyncVia(readerQueue->GetInvoker())
- .Run();
-
- EXPECT_TRUE(readFuture.Get().IsOK());
- EXPECT_TRUE(writeFuture.Get().IsOK());
-}
-
-TEST_F(TNamedPipeReadWriteTest, CapacityDontDiscardSurplus)
-{
- SetUpWithCapacity(SmallPipeCapacity);
- auto readerQueue = New<NConcurrency::TActionQueue>("Reader");
- auto writerQueue = New<NConcurrency::TActionQueue>("Writer");
-
- TString text(5000, 'a');
- text.push_back('\n');
-
- auto writeFuture = BIND(&WriteAll, Writer, text.data(), text.size(), text.size())
- .AsyncVia(writerQueue->GetInvoker())
- .Run();
-
- TDelayedExecutor::WaitForDuration(TDuration::Seconds(1));
- EXPECT_FALSE(writeFuture.IsSet());
-
- auto readFuture = BIND(&ReadAll, Reader, false)
- .AsyncVia(readerQueue->GetInvoker())
- .Run();
-
- auto readResult = readFuture.Get().ValueOrThrow();
- EXPECT_EQ(text, TString(readResult.Begin(), readResult.End()));
-
- EXPECT_TRUE(writeFuture.Get().IsOK());
-}
-
-#if defined(_linux_)
-
-TEST_F(TNamedPipeReadWriteTest, DeliveryFencedWriteJustWorks)
-{
- SetUpWithDeliveryFence();
-
- TString text("aabbb");
- auto writeBuffer = TSharedRef::FromString(text);
- auto writeFuture = Writer->Write(writeBuffer);
-
- auto readBuffer = TSharedMutableRef::Allocate(2, {.InitializeStorage = false});
- auto readResult = Reader->Read(readBuffer).Get();
- EXPECT_EQ(TString("aa"), TString(readBuffer.Begin(), readResult.Value()));
-
- EXPECT_FALSE(writeFuture.IsSet());
-
- readBuffer = TSharedMutableRef::Allocate(10, {.InitializeStorage = false});
- readResult = Reader->Read(readBuffer).Get();
- EXPECT_EQ(TString("bbb"), TString(readBuffer.Begin(), readResult.Value()));
-
- // Future is set only after the entire buffer is read.
- EXPECT_TRUE(writeFuture.Get().IsOK());
-}
-
-#endif
-
-////////////////////////////////////////////////////////////////////////////////
-
-class TPipeBigReadWriteTest
- : public TPipeReadWriteTest
- , public ::testing::WithParamInterface<std::pair<size_t, size_t>>
-{ };
-
-TEST_P(TPipeBigReadWriteTest, RealReadWrite)
-{
- size_t dataSize, blockSize;
- std::tie(dataSize, blockSize) = GetParam();
-
- auto queue = New<NConcurrency::TActionQueue>();
-
- std::vector<char> data(dataSize, 'a');
-
- YT_UNUSED_FUTURE(BIND([&] {
- auto dice = std::bind(
- std::uniform_int_distribution<int>(0, 127),
- std::default_random_engine());
- for (size_t i = 0; i < data.size(); ++i) {
- data[i] = dice();
- }
- })
- .AsyncVia(queue->GetInvoker()).Run());
-
- auto writeError = BIND(&WriteAll, Writer, data.data(), data.size(), blockSize)
- .AsyncVia(queue->GetInvoker())
- .Run();
- auto readFromPipe = BIND(&ReadAll, Reader, true)
- .AsyncVia(queue->GetInvoker())
- .Run();
-
- auto textFromPipe = readFromPipe.Get().ValueOrThrow();
- EXPECT_EQ(data.size(), textFromPipe.Size());
- auto result = std::mismatch(textFromPipe.Begin(), textFromPipe.End(), data.begin());
- EXPECT_TRUE(std::equal(textFromPipe.Begin(), textFromPipe.End(), data.begin())) <<
- (result.first - textFromPipe.Begin()) << " " << (int)(*result.first);
-}
-
-INSTANTIATE_TEST_SUITE_P(
- ValueParametrized,
- TPipeBigReadWriteTest,
- ::testing::Values(
- std::pair(2000 * 4096, 4096),
- std::pair(100 * 4096, 10000),
- std::pair(100 * 4096, 100),
- std::pair(100, 4096)));
-
-#endif
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NPipes
diff --git a/yt/yt/library/process/unittests/process_ut.cpp b/yt/yt/library/process/unittests/process_ut.cpp
deleted file mode 100644
index 61508c487f..0000000000
--- a/yt/yt/library/process/unittests/process_ut.cpp
+++ /dev/null
@@ -1,242 +0,0 @@
-#include <yt/yt/library/process/process.h>
-
-#include <yt/yt/core/test_framework/framework.h>
-
-#include <yt/yt/core/actions/bind.h>
-
-#include <yt/yt/core/concurrency/action_queue.h>
-#include <yt/yt/core/concurrency/delayed_executor.h>
-#include <yt/yt/core/concurrency/scheduler.h>
-
-#include <yt/yt/core/net/connection.h>
-
-#include <library/cpp/yt/system/handle_eintr.h>
-
-namespace NYT {
-namespace {
-
-using namespace NConcurrency;
-
-////////////////////////////////////////////////////////////////////////////////
-
-#if defined(_unix_) and not defined(_asan_enabled_)
-
-TEST(TProcessTest, Basic)
-{
- auto p = New<TSimpleProcess>("/bin/ls");
- TFuture<void> finished;
-
- ASSERT_NO_THROW(finished = p->Spawn());
- ASSERT_TRUE(p->IsStarted());
- auto error = WaitFor(finished);
- EXPECT_TRUE(error.IsOK()) << ToString(error);
- EXPECT_TRUE(p->IsFinished());
-}
-
-// NB: We cannot rely on 'ls' and 'sleep' in arcadia tests.
-TEST(TProcessTest, RunFromPathEnv)
-{
- auto p = New<TSimpleProcess>("/bin/ls", false);
- TFuture<void> finished;
-
- ASSERT_NO_THROW(finished = p->Spawn());
- ASSERT_TRUE(p->IsStarted());
- auto error = WaitFor(finished);
- EXPECT_TRUE(error.IsOK()) << ToString(error);
- EXPECT_TRUE(p->IsFinished());
-}
-
-TEST(TProcessTest, PollDuration)
-{
- auto p = New<TSimpleProcess>("/bin/sleep", true, TDuration::MilliSeconds(1));
- p->AddArgument("0.1");
-
- auto error = WaitFor(p->Spawn());
- EXPECT_TRUE(error.IsOK()) << ToString(error);
- EXPECT_TRUE(p->IsFinished());
-}
-
-TEST(TProcessTest, InvalidPath)
-{
- auto p = New<TSimpleProcess>("/some/bad/path/binary");
-
- TFuture<void> finished;
- ASSERT_NO_THROW(finished = p->Spawn());
- ASSERT_FALSE(p->IsStarted());
- auto error = WaitFor(finished);
- EXPECT_FALSE(p->IsFinished());
- EXPECT_FALSE(error.IsOK());
-}
-
-TEST(TProcessTest, StdOut)
-{
- auto p = New<TSimpleProcess>("/bin/date");
-
- auto outStream = p->GetStdOutReader();
- TFuture<void> finished;
- ASSERT_NO_THROW(finished = p->Spawn());
- ASSERT_TRUE(p->IsStarted());
- auto error = WaitFor(finished);
- EXPECT_TRUE(error.IsOK()) << ToString(error);
- EXPECT_TRUE(p->IsFinished());
-
- auto buffer = TSharedMutableRef::Allocate(4_KB, {.InitializeStorage = false});
- auto future = outStream->Read(buffer);
- auto result = WaitFor(future);
- size_t sz = result.ValueOrThrow();
- EXPECT_TRUE(sz > 0);
-}
-
-TEST(TSimpleProcess, GetCommandLine1)
-{
- auto p = New<TSimpleProcess>("/bin/bash");
- EXPECT_EQ("/bin/bash", p->GetCommandLine());
- p->AddArgument("-c");
- EXPECT_EQ("/bin/bash -c", p->GetCommandLine());
- p->AddArgument("exit 0");
- EXPECT_EQ("/bin/bash -c \"exit 0\"", p->GetCommandLine());
-}
-
-TEST(TProcessBase, GetCommandLine2)
-{
- auto p = New<TSimpleProcess>("/bin/bash");
- EXPECT_EQ("/bin/bash", p->GetCommandLine());
- p->AddArgument("-c");
- EXPECT_EQ("/bin/bash -c", p->GetCommandLine());
- p->AddArgument("\"quoted\"");
- EXPECT_EQ("/bin/bash -c \"\\\"quoted\\\"\"", p->GetCommandLine());
-}
-
-TEST(TProcessTest, ProcessReturnCode0)
-{
- auto p = New<TSimpleProcess>("/bin/bash");
- p->AddArgument("-c");
- p->AddArgument("exit 0");
-
- TFuture<void> finished;
- ASSERT_NO_THROW(finished = p->Spawn());
- ASSERT_TRUE(p->IsStarted());
- auto error = WaitFor(finished);
- EXPECT_TRUE(error.IsOK()) << ToString(error);
- EXPECT_TRUE(p->IsFinished());
-}
-
-TEST(TProcessTest, ProcessReturnCode123)
-{
- auto p = New<TSimpleProcess>("/bin/bash");
- p->AddArgument("-c");
- p->AddArgument("exit 123");
-
- TFuture<void> finished;
- ASSERT_NO_THROW(finished = p->Spawn());
- ASSERT_TRUE(p->IsStarted());
- auto error = WaitFor(finished);
- EXPECT_EQ(EProcessErrorCode::NonZeroExitCode, error.GetCode());
- EXPECT_EQ(123, error.Attributes().Get<int>("exit_code"));
- EXPECT_TRUE(p->IsFinished());
-}
-
-TEST(TProcessTest, Params1)
-{
- auto p = New<TSimpleProcess>("/bin/bash");
- p->AddArgument("-c");
- p->AddArgument("if test 3 -gt 1; then exit 7; fi");
-
- auto error = WaitFor(p->Spawn());
- EXPECT_FALSE(error.IsOK());
- EXPECT_TRUE(p->IsFinished());
-}
-
-TEST(TProcessTest, Params2)
-{
- auto p = New<TSimpleProcess>("/bin/bash");
- p->AddArgument("-c");
- p->AddArgument("if test 1 -gt 3; then exit 7; fi");
-
- auto error = WaitFor(p->Spawn());
- EXPECT_TRUE(error.IsOK()) << ToString(error);
- EXPECT_TRUE(p->IsFinished());
-}
-
-TEST(TProcessTest, InheritEnvironment)
-{
- const char* name = "SPAWN_TEST_ENV_VAR";
- const char* value = "42";
- setenv(name, value, 1);
-
- auto p = New<TSimpleProcess>("/bin/bash");
- p->AddArgument("-c");
- p->AddArgument("if test $SPAWN_TEST_ENV_VAR = 42; then exit 7; fi");
-
- auto error = WaitFor(p->Spawn());
- EXPECT_FALSE(error.IsOK());
- EXPECT_TRUE(p->IsFinished());
-
- unsetenv(name);
-}
-
-TEST(TProcessTest, Kill)
-{
- auto p = New<TSimpleProcess>("/bin/sleep");
- p->AddArgument("5");
-
- auto finished = p->Spawn();
-
- NConcurrency::TDelayedExecutor::Submit(
- BIND([&] {
- p->Kill(SIGKILL);
- }),
- TDuration::MilliSeconds(100));
-
- auto error = WaitFor(finished);
- EXPECT_FALSE(error.IsOK());
- EXPECT_TRUE(p->IsFinished());
-}
-
-TEST(TProcessTest, KillFinished)
-{
- auto p = New<TSimpleProcess>("/bin/bash");
- p->AddArgument("-c");
- p->AddArgument("true");
-
- auto finished = p->Spawn();
-
- auto error = WaitFor(finished);
- EXPECT_TRUE(error.IsOK());
-
- p->Kill(SIGKILL);
-}
-
-TEST(TProcessTest, KillZombie)
-{
- auto p = New<TSimpleProcess>("/bin/bash");
- p->AddArgument("-c");
- p->AddArgument("/bin/sleep 1; /bin/true");
-
- auto finished = p->Spawn();
-
- siginfo_t infop;
- auto res = HandleEintr(::waitid, P_PID, p->GetProcessId(), &infop, WEXITED | WNOWAIT);
-
- if (res == 0) {
- EXPECT_EQ(p->GetProcessId(), infop.si_pid);
- } else {
- // NB(arkady-e1ppa): Sometimes child process will run
- // just fine and yet will be invisible to waitid
- // on some platforms.
- // Cause of this is still unknown.
- EXPECT_EQ(errno, ECHILD);
- }
-
- p->Kill(SIGKILL);
- auto error = WaitFor(finished);
- EXPECT_TRUE(error.IsOK())
- << ToString(error);
-}
-
-#endif
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace
-} // namespace NYT
diff --git a/yt/yt/library/process/unittests/subprocess_ut.cpp b/yt/yt/library/process/unittests/subprocess_ut.cpp
deleted file mode 100644
index ff7cf7aa08..0000000000
--- a/yt/yt/library/process/unittests/subprocess_ut.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-#include <yt/yt/core/test_framework/framework.h>
-
-#include <yt/yt/core/actions/future.h>
-
-#include <yt/yt/core/concurrency/action_queue.h>
-
-#include <yt/yt/library/process/subprocess.h>
-
-namespace NYT {
-namespace {
-
-using namespace NConcurrency;
-
-////////////////////////////////////////////////////////////////////////////////
-
-#if defined(_unix_) and not defined(_asan_enabled_)
-
-TEST(TSubprocessTest, Basic)
-{
- TSubprocess subprocess("/bin/bash");
-
- subprocess.AddArgument("-c");
- subprocess.AddArgument("true");
-
- auto result = subprocess.Execute();
- EXPECT_TRUE(result.Status.IsOK());
-}
-
-TEST(TSubprocessTest, PipeOutput)
-{
- TSubprocess subprocess("/bin/echo");
-
- subprocess.AddArgument("hello");
-
- auto result = subprocess.Execute();
- EXPECT_TRUE(result.Status.IsOK());
- TString output(result.Output.Begin(), result.Output.End());
- EXPECT_TRUE(output == "hello\n") << output;
-}
-
-TEST(TSubprocessTest, PipeStdin)
-{
- auto queue = New<TActionQueue>();
-
- BIND([] {
- TSubprocess subprocess("/bin/cat");
- subprocess.AddArgument("-");
-
- auto input = TString("TEST test TEST");
- auto inputRef = TSharedRef::FromString(input);
- auto result = subprocess.Execute(inputRef);
- EXPECT_TRUE(result.Status.IsOK());
-
- TString output(result.Output.Begin(), result.Output.End());
- EXPECT_EQ(input, output);
- }).AsyncVia(queue->GetInvoker()).Run().Get().ThrowOnError();
-}
-
-TEST(TSubprocessTest, PipeBigOutput)
-{
- auto queue = New<TActionQueue>();
-
- auto result = BIND([] {
- TSubprocess subprocess("/bin/bash");
-
- subprocess.AddArgument("-c");
- subprocess.AddArgument("for i in `/usr/bin/seq 100000`; do echo hello; done; echo world");
-
- auto result = subprocess.Execute();
- return result.Status.IsOK();
- }).AsyncVia(queue->GetInvoker()).Run().Get().Value();
-
- EXPECT_TRUE(result);
-}
-
-TEST(TSubprocessTest, PipeBigError)
-{
- auto queue = New<TActionQueue>();
-
- auto result = BIND([] {
- TSubprocess subprocess("/bin/bash");
-
- subprocess.AddArgument("-c");
- subprocess.AddArgument("for i in `/usr/bin/seq 100000`; do echo hello 1>&2; done; echo world");
-
- auto result = subprocess.Execute();
- return result;
- }).AsyncVia(queue->GetInvoker()).Run().Get().Value();
-
- EXPECT_TRUE(result.Status.IsOK());
- EXPECT_EQ(6*100000, std::ssize(result.Error));
-}
-
-TEST(TSubprocessTest, BinaryNotFound)
-{
- auto queue = New<TActionQueue>();
-
- auto result = BIND([] {
- TSubprocess subprocess("does-not-exist");
- return subprocess.Execute();
- }).AsyncVia(queue->GetInvoker()).Run().Get().Value();
-
- EXPECT_FALSE(result.Status.IsOK());
-}
-
-#endif
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace
-} // namespace NYT
diff --git a/yt/yt/library/process/unittests/ya.make b/yt/yt/library/process/unittests/ya.make
deleted file mode 100644
index 149d9eee1f..0000000000
--- a/yt/yt/library/process/unittests/ya.make
+++ /dev/null
@@ -1,22 +0,0 @@
-GTEST(unittester-library-process)
-
-INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
-
-SRCS(
- pipes_ut.cpp
- process_ut.cpp
- subprocess_ut.cpp
-)
-
-INCLUDE(${ARCADIA_ROOT}/yt/opensource.inc)
-
-PEERDIR(
- yt/yt/build
- yt/yt/core
- yt/yt/core/test_framework
- yt/yt/library/process
-)
-
-SIZE(MEDIUM)
-
-END()
diff --git a/yt/yt/library/process/ya.make b/yt/yt/library/process/ya.make
index 79763c7267..6b3ea41ca2 100644
--- a/yt/yt/library/process/ya.make
+++ b/yt/yt/library/process/ya.make
@@ -3,6 +3,8 @@ LIBRARY()
INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
SRCS(
+ config.cpp
+ GLOBAL configure_io_dispatcher.cpp
io_dispatcher.cpp
pipe.cpp
process.cpp
diff --git a/yt/yt/library/profiling/resource_tracker/configure_resource_tracker.cpp b/yt/yt/library/profiling/resource_tracker/configure_resource_tracker.cpp
new file mode 100644
index 0000000000..4b25b3f6f4
--- /dev/null
+++ b/yt/yt/library/profiling/resource_tracker/configure_resource_tracker.cpp
@@ -0,0 +1,28 @@
+#include "resource_tracker.h"
+#include "config.h"
+
+#include <yt/yt/core/misc/configurable_singleton_def.h>
+
+namespace NYT::NProfiling {
+
+using namespace NYTree;
+
+////////////////////////////////////////////////////////////////////////////////
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TResourceTrackerConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void ConfigureSingleton(const TResourceTrackerConfigPtr& config)
+{
+ TResourceTracker::Configure(config);
+}
+
+YT_DEFINE_CONFIGURABLE_SINGLETON(
+ "resource_tracker",
+ TResourceTrackerConfig);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NProfiling
diff --git a/yt/yt/library/profiling/resource_tracker/public.h b/yt/yt/library/profiling/resource_tracker/public.h
index 36786770fa..febd29d563 100644
--- a/yt/yt/library/profiling/resource_tracker/public.h
+++ b/yt/yt/library/profiling/resource_tracker/public.h
@@ -1,6 +1,8 @@
#pragma once
-#include <yt/yt/core/misc/public.h>
+#include <yt/yt/core/misc/configurable_singleton_decl.h>
+
+#include <library/cpp/yt/memory/ref_counted.h>
namespace NYT::NProfiling {
@@ -8,6 +10,8 @@ namespace NYT::NProfiling {
DECLARE_REFCOUNTED_STRUCT(TResourceTrackerConfig)
+YT_DECLARE_CONFIGURABLE_SINGLETON(TResourceTrackerConfig);
+
////////////////////////////////////////////////////////////////////////////////
} // namespace NYT::NProfiling
diff --git a/yt/yt/library/profiling/resource_tracker/resource_tracker.cpp b/yt/yt/library/profiling/resource_tracker/resource_tracker.cpp
index 1c45abfbbb..138f89434a 100644
--- a/yt/yt/library/profiling/resource_tracker/resource_tracker.cpp
+++ b/yt/yt/library/profiling/resource_tracker/resource_tracker.cpp
@@ -33,7 +33,6 @@ namespace NYT::NProfiling {
////////////////////////////////////////////////////////////////////////////////
using namespace NYPath;
-using namespace NYTree;
using namespace NProfiling;
using namespace NConcurrency;
diff --git a/yt/yt/library/profiling/resource_tracker/ya.make b/yt/yt/library/profiling/resource_tracker/ya.make
index 62287a3fb7..009f21eaa4 100644
--- a/yt/yt/library/profiling/resource_tracker/ya.make
+++ b/yt/yt/library/profiling/resource_tracker/ya.make
@@ -4,6 +4,7 @@ INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
SRCS(
config.cpp
+ GLOBAL configure_resource_tracker.cpp
resource_tracker.cpp
)
diff --git a/yt/yt/library/profiling/solomon/config.cpp b/yt/yt/library/profiling/solomon/config.cpp
index e714548804..7b9666ab2e 100644
--- a/yt/yt/library/profiling/solomon/config.cpp
+++ b/yt/yt/library/profiling/solomon/config.cpp
@@ -45,6 +45,9 @@ void TSolomonExporterConfig::Register(TRegistrar registrar)
registrar.Parameter("convert_counters_to_delta_gauge", &TThis::ConvertCountersToDeltaGauge)
.Default(false);
+ registrar.Parameter("enable_histogram_compat", &TThis::EnableHistogramCompat)
+ .Default(false);
+
registrar.Parameter("export_summary", &TThis::ExportSummary)
.Default(false);
registrar.Parameter("export_summary_as_max", &TThis::ExportSummaryAsMax)
diff --git a/yt/yt/library/profiling/solomon/config.h b/yt/yt/library/profiling/solomon/config.h
index 42b5b4548e..0ff84c96d2 100644
--- a/yt/yt/library/profiling/solomon/config.h
+++ b/yt/yt/library/profiling/solomon/config.h
@@ -41,6 +41,7 @@ struct TSolomonExporterConfig
bool ConvertCountersToRateForSolomon;
bool RenameConvertedCounters;
bool ConvertCountersToDeltaGauge;
+ bool EnableHistogramCompat;
bool ExportSummary;
bool ExportSummaryAsMax;
diff --git a/yt/yt/library/profiling/solomon/exporter.cpp b/yt/yt/library/profiling/solomon/exporter.cpp
index aee542660a..c06a7e4ac7 100644
--- a/yt/yt/library/profiling/solomon/exporter.cpp
+++ b/yt/yt/library/profiling/solomon/exporter.cpp
@@ -636,6 +636,9 @@ void TSolomonExporter::DoHandleShard(
if (Config_->ConvertCountersToDeltaGauge && outputEncodingContext.IsSolomonPull) {
options.ConvertCountersToDeltaGauge = true;
}
+ if (Config_->EnableHistogramCompat && outputEncodingContext.IsSolomonPull) {
+ options.EnableHistogramCompat = true;
+ }
options.EnableSolomonAggregationWorkaround = outputEncodingContext.IsSolomonPull;
options.Times = readWindow;
diff --git a/yt/yt/library/profiling/solomon/helpers.cpp b/yt/yt/library/profiling/solomon/helpers.cpp
index 056daaa330..7f4be734a5 100644
--- a/yt/yt/library/profiling/solomon/helpers.cpp
+++ b/yt/yt/library/profiling/solomon/helpers.cpp
@@ -1,8 +1,13 @@
#include "helpers.h"
+#include "percpu.h"
#include "private.h"
+#include "producer.h"
+#include "sensor_set.h"
#include <yt/yt/core/http/http.h>
+#include <yt/yt/core/misc/ref_counted_tracker.h>
+
#include <library/cpp/monlib/encode/json/json.h>
#include <library/cpp/monlib/encode/spack/spack_v1.h>
#include <library/cpp/monlib/encode/prometheus/prometheus.h>
@@ -73,6 +78,38 @@ TOutputEncodingContext CreateOutputEncodingContextFromHeaders(const THeadersPtr&
return context;
}
+i64 GetCountersBytesAlive()
+{
+ auto* tracker = TRefCountedTracker::Get();
+ i64 usage = 0;
+
+ usage += tracker->GetBytesAlive(GetRefCountedTypeKey<TSimpleCounter>());
+ usage += tracker->GetBytesAlive(GetRefCountedTypeKey<TPerCpuCounter>());
+ usage += tracker->GetBytesAlive(GetRefCountedTypeKey<TCounterState>());
+
+ usage += tracker->GetBytesAlive(GetRefCountedTypeKey<TSimpleTimeCounter>());
+ usage += tracker->GetBytesAlive(GetRefCountedTypeKey<TPerCpuTimeCounter>());
+ usage += tracker->GetBytesAlive(GetRefCountedTypeKey<TTimeCounterState>());
+
+ usage += tracker->GetBytesAlive(GetRefCountedTypeKey<TSimpleGauge>());
+ usage += tracker->GetBytesAlive(GetRefCountedTypeKey<TPerCpuGauge>());
+ usage += tracker->GetBytesAlive(GetRefCountedTypeKey<TGaugeState>());
+
+ usage += tracker->GetBytesAlive(GetRefCountedTypeKey<TSimpleSummary<double>>());
+ usage += tracker->GetBytesAlive(GetRefCountedTypeKey<TPerCpuSummary<double>>());
+ usage += tracker->GetBytesAlive(GetRefCountedTypeKey<TSimpleSummary<TDuration>>());
+ usage += tracker->GetBytesAlive(GetRefCountedTypeKey<TPerCpuSummary<TDuration>>());
+ usage += tracker->GetBytesAlive(GetRefCountedTypeKey<TSummaryState>());
+
+ usage += tracker->GetBytesAlive(GetRefCountedTypeKey<TProducerState>());
+ usage += tracker->GetBytesAlive(GetRefCountedTypeKey<THistogram>());
+
+ usage += tracker->GetBytesAlive(GetRefCountedTypeKey<TTimerSummaryState>());
+ usage += tracker->GetBytesAlive(GetRefCountedTypeKey<THistogramState>());
+
+ return usage;
+}
+
////////////////////////////////////////////////////////////////////////////////
} // namespace NYT::NProfiling
diff --git a/yt/yt/library/profiling/solomon/helpers.h b/yt/yt/library/profiling/solomon/helpers.h
index 4713fd20b7..fd2c841b3f 100644
--- a/yt/yt/library/profiling/solomon/helpers.h
+++ b/yt/yt/library/profiling/solomon/helpers.h
@@ -27,6 +27,8 @@ void FillResponseHeaders(const TOutputEncodingContext& outputEncodingContext, co
//! Creates output encoder according to request headers.
TOutputEncodingContext CreateOutputEncodingContextFromHeaders(const NHttp::THeadersPtr& headers);
+i64 GetCountersBytesAlive();
+
////////////////////////////////////////////////////////////////////////////////
} // namespace NYT::NProfiling
diff --git a/yt/yt/library/program/config.cpp b/yt/yt/library/program/config.cpp
index 371c7ced70..762feea0f2 100644
--- a/yt/yt/library/program/config.cpp
+++ b/yt/yt/library/program/config.cpp
@@ -18,67 +18,6 @@ void THeapProfilerConfig::Register(TRegistrar registrar)
////////////////////////////////////////////////////////////////////////////////
-void TSingletonsConfig::Register(TRegistrar registrar)
-{
- registrar.Parameter("fiber_manager", &TThis::FiberManager)
- .DefaultNew();
- registrar.Parameter("address_resolver", &TThis::AddressResolver)
- .DefaultNew();
- registrar.Parameter("tcp_dispatcher", &TThis::TcpDispatcher)
- .DefaultNew();
- registrar.Parameter("io_dispatcher", &TThis::IODispatcher)
- .DefaultNew();
- registrar.Parameter("rpc_dispatcher", &TThis::RpcDispatcher)
- .DefaultNew();
- registrar.Parameter("grpc_dispatcher", &TThis::GrpcDispatcher)
- .DefaultNew();
- registrar.Parameter("yp_service_discovery", &TThis::YPServiceDiscovery)
- .DefaultNew();
- registrar.Parameter("logging", &TThis::Logging)
- .DefaultCtor([] { return NLogging::TLogManagerConfig::CreateDefault(); })
- .ResetOnLoad();
- registrar.Parameter("jaeger", &TThis::Jaeger)
- .DefaultNew();
- registrar.Parameter("tcmalloc", &TThis::TCMalloc)
- .DefaultNew();
- registrar.Parameter("stockpile", &TThis::Stockpile)
- .DefaultNew();
- registrar.Parameter("enable_ref_counted_tracker_profiling", &TThis::EnableRefCountedTrackerProfiling)
- .Default(true);
- registrar.Parameter("resource_tracker", &TThis::ResourceTracker)
- .DefaultNew();
- registrar.Parameter("heap_profiler", &TThis::HeapProfiler)
- .DefaultNew();
- registrar.Parameter("protobuf_interop", &TThis::ProtobufInterop)
- .DefaultNew();
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-void TSingletonsDynamicConfig::Register(TRegistrar registrar)
-{
- registrar.Parameter("fiber_manager", &TThis::FiberManager)
- .DefaultNew();
- registrar.Parameter("tcp_dispatcher", &TThis::TcpDispatcher)
- .DefaultNew();
- registrar.Parameter("io_dispatcher", &TThis::IODispatcher)
- .Optional();
- registrar.Parameter("rpc_dispatcher", &TThis::RpcDispatcher)
- .DefaultNew();
- registrar.Parameter("logging", &TThis::Logging)
- .DefaultNew();
- registrar.Parameter("jaeger", &TThis::Jaeger)
- .DefaultNew();
- registrar.Parameter("tcmalloc", &TThis::TCMalloc)
- .Optional();
- registrar.Parameter("stockpile", &TThis::Stockpile)
- .DefaultNew();
- registrar.Parameter("protobuf_interop", &TThis::ProtobufInterop)
- .DefaultNew();
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
void WarnForUnrecognizedOptionsImpl(
const NLogging::TLogger& logger,
const IMapNodePtr& unrecognized)
diff --git a/yt/yt/library/program/config.h b/yt/yt/library/program/config.h
index 1b6e0ffd64..38f6eb65ee 100644
--- a/yt/yt/library/program/config.h
+++ b/yt/yt/library/program/config.h
@@ -2,32 +2,9 @@
#include "public.h"
-#include <yt/yt/core/concurrency/config.h>
-
#include <yt/yt/core/ytree/yson_struct.h>
-#include <yt/yt/core/net/config.h>
-
-#include <yt/yt/core/rpc/config.h>
-#include <yt/yt/core/rpc/grpc/config.h>
-
-#include <yt/yt/core/bus/tcp/config.h>
-
-#include <yt/yt/core/logging/config.h>
-
-#include <yt/yt/core/service_discovery/yp/config.h>
-
-#include <yt/yt/core/yson/config.h>
-
-#include <yt/yt/library/process/io_dispatcher.h>
-
-#include <yt/yt/library/tracing/jaeger/tracer.h>
-
-#include <yt/yt/library/profiling/resource_tracker/config.h>
-
-#include <yt/yt/library/tcmalloc/config.h>
-
-#include <yt/yt/library/stockpile/config.h>
+#include <yt/yt/core/misc/configurable_singleton_def.h>
namespace NYT {
@@ -53,58 +30,6 @@ DEFINE_REFCOUNTED_TYPE(THeapProfilerConfig)
////////////////////////////////////////////////////////////////////////////////
-class TSingletonsConfig
- : public virtual NYTree::TYsonStruct
-{
-public:
- NConcurrency::TFiberManagerConfigPtr FiberManager;
- NNet::TAddressResolverConfigPtr AddressResolver;
- NBus::TTcpDispatcherConfigPtr TcpDispatcher;
- NPipes::TIODispatcherConfigPtr IODispatcher;
- NRpc::TDispatcherConfigPtr RpcDispatcher;
- NRpc::NGrpc::TDispatcherConfigPtr GrpcDispatcher;
- NServiceDiscovery::NYP::TServiceDiscoveryConfigPtr YPServiceDiscovery;
- NLogging::TLogManagerConfigPtr Logging;
- NTracing::TJaegerTracerConfigPtr Jaeger;
- NTCMalloc::TTCMallocConfigPtr TCMalloc;
- TStockpileConfigPtr Stockpile;
- bool EnableRefCountedTrackerProfiling;
- NProfiling::TResourceTrackerConfigPtr ResourceTracker;
- THeapProfilerConfigPtr HeapProfiler;
- NYson::TProtobufInteropConfigPtr ProtobufInterop;
-
- REGISTER_YSON_STRUCT(TSingletonsConfig);
-
- static void Register(TRegistrar registrar);
-};
-
-DEFINE_REFCOUNTED_TYPE(TSingletonsConfig)
-
-////////////////////////////////////////////////////////////////////////////////
-
-class TSingletonsDynamicConfig
- : public virtual NYTree::TYsonStruct
-{
-public:
- NConcurrency::TFiberManagerDynamicConfigPtr FiberManager;
- NBus::TTcpDispatcherDynamicConfigPtr TcpDispatcher;
- NPipes::TIODispatcherConfigPtr IODispatcher;
- NRpc::TDispatcherDynamicConfigPtr RpcDispatcher;
- NLogging::TLogManagerDynamicConfigPtr Logging;
- NTracing::TJaegerTracerDynamicConfigPtr Jaeger;
- NTCMalloc::TTCMallocConfigPtr TCMalloc;
- TStockpileDynamicConfigPtr Stockpile;
- NYson::TProtobufInteropDynamicConfigPtr ProtobufInterop;
-
- REGISTER_YSON_STRUCT(TSingletonsDynamicConfig);
-
- static void Register(TRegistrar registrar);
-};
-
-DEFINE_REFCOUNTED_TYPE(TSingletonsDynamicConfig)
-
-////////////////////////////////////////////////////////////////////////////////
-
// NB: These functions should not be called from bootstrap
// config validator since logger is not set up yet.
void WarnForUnrecognizedOptions(
diff --git a/yt/yt/library/program/helpers.cpp b/yt/yt/library/program/helpers.cpp
index b46bdd9786..1911b327b5 100644
--- a/yt/yt/library/program/helpers.cpp
+++ b/yt/yt/library/program/helpers.cpp
@@ -1,124 +1,37 @@
#include "helpers.h"
#include "config.h"
-#include "private.h"
-
-#include <yt/yt/core/misc/lazy_ptr.h>
-#include <yt/yt/core/misc/ref_counted_tracker.h>
-#include <yt/yt/core/misc/ref_counted_tracker_profiler.h>
-
-#include <yt/yt/core/bus/tcp/dispatcher.h>
-
-#include <yt/yt/core/concurrency/fiber_manager.h>
-
-#include <yt/yt/library/tracing/jaeger/tracer.h>
#include <yt/yt/library/profiling/perf/event_counter_profiler.h>
-#include <yt/yt/library/profiling/resource_tracker/resource_tracker.h>
-
-#include <yt/yt/library/tcmalloc/tcmalloc_manager.h>
+#include <yt/yt/core/misc/ref_counted_tracker_profiler.h>
#include <yt/yt/core/logging/log_manager.h>
-#include <yt/yt/core/concurrency/execution_stack.h>
-#include <yt/yt/core/concurrency/fiber_scheduler_thread.h>
-#include <yt/yt/core/concurrency/periodic_executor.h>
-
#include <yt/yt/core/net/address.h>
-#include <yt/yt/core/yson/protobuf_interop.h>
-
-#include <yt/yt/core/rpc/dispatcher.h>
-#include <yt/yt/core/rpc/grpc/dispatcher.h>
-
-#include <yt/yt/core/service_discovery/yp/service_discovery.h>
-
-#include <library/cpp/yt/memory/atomic_intrusive_ptr.h>
-
-#include <util/string/split.h>
-#include <util/system/thread.h>
-
-#include <mutex>
-#include <thread>
-
namespace NYT {
-using namespace NConcurrency;
-using namespace NThreading;
-using namespace NTCMalloc;
-
////////////////////////////////////////////////////////////////////////////////
void ConfigureSingletons(const TSingletonsConfigPtr& config)
{
- TFiberManager::Configure(config->FiberManager);
+ TSingletonManager::Configure(config);
+ // TODO(babenko): move to server program base
NLogging::TLogManager::Get()->EnableReopenOnSighup();
- if (!NLogging::TLogManager::Get()->IsConfiguredFromEnv()) {
- NLogging::TLogManager::Get()->Configure(config->Logging);
- }
- NNet::TAddressResolver::Get()->Configure(config->AddressResolver);
// By default, server components must have a reasonable FQDN.
// Failure to do so may result in issues like YT-4561.
+ // TODO(babenko): move to server program base
NNet::TAddressResolver::Get()->EnsureLocalHostName();
- NBus::TTcpDispatcher::Get()->Configure(config->TcpDispatcher);
-
- NPipes::TIODispatcher::Get()->Configure(config->IODispatcher);
-
- NRpc::TDispatcher::Get()->Configure(config->RpcDispatcher);
-
- NRpc::NGrpc::TDispatcher::Get()->Configure(config->GrpcDispatcher);
-
- NRpc::TDispatcher::Get()->SetServiceDiscovery(
- NServiceDiscovery::NYP::CreateServiceDiscovery(config->YPServiceDiscovery));
-
- NTracing::SetGlobalTracer(New<NTracing::TJaegerTracer>(config->Jaeger));
-
+ // TODO(babenko): move to server program base
NProfiling::EnablePerfEventCounterProfiling();
-
- NTCMalloc::TTCMallocManager::Configure(config->TCMalloc);
-
- TStockpileManager::Reconfigure(*config->Stockpile);
-
- if (config->EnableRefCountedTrackerProfiling) {
- EnableRefCountedTrackerProfiling();
- }
-
- NProfiling::TResourceTracker::Configure(config->ResourceTracker);
-
- NYson::SetProtobufInteropConfig(config->ProtobufInterop);
}
-void ReconfigureSingletons(const TSingletonsConfigPtr& config, const TSingletonsDynamicConfigPtr& dynamicConfig)
+void ReconfigureSingletons(const TSingletonsDynamicConfigPtr& dynamicConfig)
{
- TFiberManager::Configure(config->FiberManager->ApplyDynamic(dynamicConfig->FiberManager));
-
- if (!NLogging::TLogManager::Get()->IsConfiguredFromEnv()) {
- NLogging::TLogManager::Get()->Configure(
- config->Logging->ApplyDynamic(dynamicConfig->Logging),
- /*sync*/ false);
- }
-
- auto tracer = NTracing::GetGlobalTracer();
- if (auto jaeger = DynamicPointerCast<NTracing::TJaegerTracer>(tracer); jaeger) {
- jaeger->Configure(config->Jaeger->ApplyDynamic(dynamicConfig->Jaeger));
- }
-
- NBus::TTcpDispatcher::Get()->Configure(config->TcpDispatcher->ApplyDynamic(dynamicConfig->TcpDispatcher));
-
- NPipes::TIODispatcher::Get()->Configure(dynamicConfig->IODispatcher ? dynamicConfig->IODispatcher : config->IODispatcher);
-
- NRpc::TDispatcher::Get()->Configure(config->RpcDispatcher->ApplyDynamic(dynamicConfig->RpcDispatcher));
-
- NTCMalloc::TTCMallocManager::Configure(dynamicConfig->TCMalloc
- ? config->TCMalloc->ApplyDynamic(dynamicConfig->TCMalloc)
- : config->TCMalloc);
-
- TStockpileManager::Reconfigure(*config->Stockpile->ApplyDynamic(dynamicConfig->Stockpile));
-
- NYson::SetProtobufInteropConfig(config->ProtobufInterop->ApplyDynamic(dynamicConfig->ProtobufInterop));
+ TSingletonManager::Reconfigure(dynamicConfig);
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/yt/yt/library/program/helpers.h b/yt/yt/library/program/helpers.h
index 7cbf696109..7d0842179d 100644
--- a/yt/yt/library/program/helpers.h
+++ b/yt/yt/library/program/helpers.h
@@ -7,9 +7,7 @@ namespace NYT {
////////////////////////////////////////////////////////////////////////////////
void ConfigureSingletons(const TSingletonsConfigPtr& config);
-void ReconfigureSingletons(
- const TSingletonsConfigPtr& config,
- const TSingletonsDynamicConfigPtr& dynamicConfig);
+void ReconfigureSingletons(const TSingletonsDynamicConfigPtr& dynamicConfig);
////////////////////////////////////////////////////////////////////////////////
diff --git a/yt/yt/library/program/program.cpp b/yt/yt/library/program/program.cpp
index ff74660266..5c13a6d09c 100644
--- a/yt/yt/library/program/program.cpp
+++ b/yt/yt/library/program/program.cpp
@@ -18,8 +18,6 @@
#include <yt/yt/library/profiling/tcmalloc/profiler.h>
-#include <library/cpp/yt/stockpile/stockpile.h>
-
#include <library/cpp/yt/system/exit.h>
#include <library/cpp/yt/backtrace/absl_unwinder/absl_unwinder.h>
diff --git a/yt/yt/library/program/program.h b/yt/yt/library/program/program.h
index cd8bf61554..1f47ce93b7 100644
--- a/yt/yt/library/program/program.h
+++ b/yt/yt/library/program/program.h
@@ -2,8 +2,6 @@
#include <yt/yt/core/misc/public.h>
-#include <library/cpp/yt/stockpile/stockpile.h>
-
#include <library/cpp/getopt/last_getopt.h>
#include <yt/yt/core/yson/string.h>
diff --git a/yt/yt/library/program/public.h b/yt/yt/library/program/public.h
index 34231b1373..e45512239b 100644
--- a/yt/yt/library/program/public.h
+++ b/yt/yt/library/program/public.h
@@ -8,8 +8,6 @@ namespace NYT {
DECLARE_REFCOUNTED_CLASS(TBuildInfo)
DECLARE_REFCOUNTED_CLASS(TRpcConfig)
-DECLARE_REFCOUNTED_CLASS(TSingletonsConfig)
-DECLARE_REFCOUNTED_CLASS(TSingletonsDynamicConfig)
DECLARE_REFCOUNTED_CLASS(THeapSizeLimitConfig)
DECLARE_REFCOUNTED_CLASS(THeapProfilerConfig)
diff --git a/yt/yt/library/program/ya.make b/yt/yt/library/program/ya.make
index 5e07ac0d66..eac249a168 100644
--- a/yt/yt/library/program/ya.make
+++ b/yt/yt/library/program/ya.make
@@ -15,19 +15,12 @@ SRCS(
PEERDIR(
yt/yt/core
- yt/yt/core/service_discovery/yp
- yt/yt/library/monitoring
- yt/yt/library/oom
- yt/yt/library/profiling/solomon
yt/yt/library/profiling/tcmalloc
yt/yt/library/profiling/perf
- yt/yt/library/stockpile
yt/yt/library/ytprof
- yt/yt/library/tcmalloc
- yt/yt/library/tracing/jaeger
- library/cpp/yt/mlock
- library/cpp/yt/stockpile
+ yt/yt/library/tcmalloc # for tcmalloc singleton
library/cpp/yt/string
+ library/cpp/yt/system
library/cpp/yt/backtrace/absl_unwinder
library/cpp/getopt/small
)
diff --git a/yt/yt/library/stockpile/config.cpp b/yt/yt/library/stockpile/config.cpp
deleted file mode 100644
index 4a2fc69971..0000000000
--- a/yt/yt/library/stockpile/config.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-#include "config.h"
-
-namespace NYT {
-
-using namespace NYTree;
-
-////////////////////////////////////////////////////////////////////////////////
-
-void TStockpileConfig::Register(TRegistrar registrar)
-{
- registrar.BaseClassParameter("buffer_size", &TThis::BufferSize)
- .Default(DefaultBufferSize)
- .GreaterThan(0);
- registrar.BaseClassParameter("thread_count", &TThis::ThreadCount)
- .Default(DefaultThreadCount);
- registrar.BaseClassParameter("strategy", &TThis::Strategy)
- .Default(DefaultStrategy);
- registrar.BaseClassParameter("period", &TThis::Period)
- .Default(DefaultPeriod);
-}
-
-TStockpileConfigPtr TStockpileConfig::ApplyDynamic(const TStockpileDynamicConfigPtr& dynamicConfig) const
-{
- auto mergedConfig = CloneYsonStruct(MakeStrong(this));
-
- if (dynamicConfig->BufferSize) {
- mergedConfig->BufferSize = *dynamicConfig->BufferSize;
- }
- if (dynamicConfig->ThreadCount) {
- mergedConfig->ThreadCount = *dynamicConfig->ThreadCount;
- }
- if (dynamicConfig->Strategy) {
- mergedConfig->Strategy = *dynamicConfig->Strategy;
- }
- if (dynamicConfig->Period) {
- mergedConfig->Period = *dynamicConfig->Period;
- }
-
- mergedConfig->Postprocess();
- return mergedConfig;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-void TStockpileDynamicConfig::Register(TRegistrar registrar)
-{
- registrar.BaseClassParameter("buffer_size", &TThis::BufferSize)
- .Optional()
- .GreaterThan(0);
- registrar.BaseClassParameter("thread_count", &TThis::ThreadCount)
- .Optional()
- .GreaterThanOrEqual(0);
- registrar.BaseClassParameter("strategy", &TThis::Strategy)
- .Optional();
- registrar.BaseClassParameter("period", &TThis::Period)
- .Optional();
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT
diff --git a/yt/yt/library/stockpile/config.h b/yt/yt/library/stockpile/config.h
deleted file mode 100644
index 7d12b5bfa4..0000000000
--- a/yt/yt/library/stockpile/config.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#pragma once
-
-#include "public.h"
-
-#include <yt/yt/core/ytree/yson_struct.h>
-
-#include <library/cpp/yt/stockpile/stockpile.h>
-
-namespace NYT {
-
-////////////////////////////////////////////////////////////////////////////////
-
-struct TStockpileConfig
- : public TStockpileOptions
- , public NYTree::TYsonStruct
-{
- TStockpileConfigPtr ApplyDynamic(const TStockpileDynamicConfigPtr& dynamicConfig) const;
-
- REGISTER_YSON_STRUCT(TStockpileConfig);
-
- static void Register(TRegistrar registrar);
-};
-
-DEFINE_REFCOUNTED_TYPE(TStockpileConfig)
-
-////////////////////////////////////////////////////////////////////////////////
-
-struct TStockpileDynamicConfig
- : public NYTree::TYsonStruct
-{
- std::optional<i64> BufferSize;
- std::optional<int> ThreadCount;
- std::optional<EStockpileStrategy> Strategy;
- std::optional<TDuration> Period;
-
- REGISTER_YSON_STRUCT(TStockpileDynamicConfig);
-
- static void Register(TRegistrar registrar);
-};
-
-DEFINE_REFCOUNTED_TYPE(TStockpileDynamicConfig)
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT
diff --git a/yt/yt/library/stockpile/public.h b/yt/yt/library/stockpile/public.h
deleted file mode 100644
index f71c1dc7e4..0000000000
--- a/yt/yt/library/stockpile/public.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#pragma once
-
-#include <yt/yt/core/misc/public.h>
-
-namespace NYT {
-
-////////////////////////////////////////////////////////////////////////////////
-
-DECLARE_REFCOUNTED_STRUCT(TStockpileConfig)
-DECLARE_REFCOUNTED_STRUCT(TStockpileDynamicConfig)
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT
diff --git a/yt/yt/library/stockpile/ya.make b/yt/yt/library/stockpile/ya.make
deleted file mode 100644
index 9529fab0fb..0000000000
--- a/yt/yt/library/stockpile/ya.make
+++ /dev/null
@@ -1,14 +0,0 @@
-LIBRARY()
-
-INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
-
-SRCS(
- config.cpp
-)
-
-PEERDIR(
- yt/yt/core
- library/cpp/yt/stockpile
-)
-
-END()
diff --git a/yt/yt/library/tcmalloc/configure_tcmalloc_manager.cpp b/yt/yt/library/tcmalloc/configure_tcmalloc_manager.cpp
new file mode 100644
index 0000000000..d5947bf185
--- /dev/null
+++ b/yt/yt/library/tcmalloc/configure_tcmalloc_manager.cpp
@@ -0,0 +1,36 @@
+#include "tcmalloc_manager.h"
+#include "config.h"
+
+#include <yt/yt/core/misc/configurable_singleton_def.h>
+
+namespace NYT::NTCMalloc {
+
+using namespace NYTree;
+
+////////////////////////////////////////////////////////////////////////////////
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TTCMallocConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void ConfigureSingleton(const TTCMallocConfigPtr& config)
+{
+ TTCMallocManager::Configure(config);
+}
+
+void ReconfigureSingleton(
+ const TTCMallocConfigPtr& config,
+ const TTCMallocConfigPtr& dynamicConfig)
+{
+ TTCMallocManager::Configure(config->ApplyDynamic(dynamicConfig));
+}
+
+YT_DEFINE_RECONFIGURABLE_SINGLETON(
+ "tcmalloc",
+ TTCMallocConfig,
+ TTCMallocConfig);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NTCMalloc
diff --git a/yt/yt/library/tcmalloc/public.h b/yt/yt/library/tcmalloc/public.h
index 620be42454..f6703f918c 100644
--- a/yt/yt/library/tcmalloc/public.h
+++ b/yt/yt/library/tcmalloc/public.h
@@ -1,6 +1,8 @@
#pragma once
-#include <yt/yt/core/misc/public.h>
+#include <yt/yt/core/misc/configurable_singleton_decl.h>
+
+#include <library/cpp/yt/memory/ref_counted.h>
namespace NYT::NTCMalloc {
@@ -9,6 +11,8 @@ namespace NYT::NTCMalloc {
DECLARE_REFCOUNTED_STRUCT(TTCMallocConfig)
DECLARE_REFCOUNTED_STRUCT(THeapSizeLimitConfig)
+YT_DECLARE_RECONFIGURABLE_SINGLETON(TTCMallocConfig, TTCMallocConfig);
+
////////////////////////////////////////////////////////////////////////////////
} // namespace NYT::NTCMalloc
diff --git a/yt/yt/library/tcmalloc/ya.make b/yt/yt/library/tcmalloc/ya.make
index 4042a08971..35e68d60f8 100644
--- a/yt/yt/library/tcmalloc/ya.make
+++ b/yt/yt/library/tcmalloc/ya.make
@@ -5,6 +5,7 @@ INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
SRCS(
config.cpp
tcmalloc_manager.cpp
+ GLOBAL configure_tcmalloc_manager.cpp
)
PEERDIR(
diff --git a/yt/yt/library/tracing/jaeger/configure_tracer.cpp b/yt/yt/library/tracing/jaeger/configure_tracer.cpp
new file mode 100644
index 0000000000..376e8ea9a4
--- /dev/null
+++ b/yt/yt/library/tracing/jaeger/configure_tracer.cpp
@@ -0,0 +1,43 @@
+#include "tracer.h"
+
+#include <yt/yt/core/misc/configurable_singleton_def.h>
+
+namespace NYT::NTracing {
+
+using namespace NYTree;
+
+////////////////////////////////////////////////////////////////////////////////
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TJaegerTracerConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void SetupSingletonConfigParameter(TYsonStructParameter<TJaegerTracerDynamicConfigPtr>& parameter)
+{
+ parameter.DefaultNew();
+}
+
+void ConfigureSingleton(const TJaegerTracerConfigPtr& config)
+{
+ SetGlobalTracer(New<TJaegerTracer>(config));
+}
+
+void ReconfigureSingleton(
+ const TJaegerTracerConfigPtr& config,
+ const TJaegerTracerDynamicConfigPtr& dynamicConfig)
+{
+ auto tracer = NTracing::GetGlobalTracer();
+ auto jaegerTracer = DynamicPointerCast<NTracing::TJaegerTracer>(tracer);
+ YT_VERIFY(jaegerTracer);
+ jaegerTracer->Configure(config->ApplyDynamic(dynamicConfig));
+}
+
+YT_DEFINE_RECONFIGURABLE_SINGLETON(
+ "jaeger",
+ TJaegerTracerConfig,
+ TJaegerTracerDynamicConfig);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NTracing
diff --git a/yt/yt/library/tracing/jaeger/public.h b/yt/yt/library/tracing/jaeger/public.h
index 1d9506cacf..9eeaead1d9 100644
--- a/yt/yt/library/tracing/jaeger/public.h
+++ b/yt/yt/library/tracing/jaeger/public.h
@@ -1,6 +1,8 @@
#pragma once
-#include <yt/yt/core/misc/public.h>
+#include <yt/yt/core/misc/configurable_singleton_decl.h>
+
+#include <library/cpp/yt/memory/ref_counted.h>
namespace NYT::NTracing {
@@ -12,6 +14,8 @@ DECLARE_REFCOUNTED_CLASS(TSamplerConfig)
DECLARE_REFCOUNTED_CLASS(TJaegerTracerDynamicConfig)
DECLARE_REFCOUNTED_CLASS(TJaegerTracerConfig)
+YT_DECLARE_RECONFIGURABLE_SINGLETON(TJaegerTracerConfig, TJaegerTracerDynamicConfig);
+
////////////////////////////////////////////////////////////////////////////////
} // namespace NYT::NTracing
diff --git a/yt/yt/library/tracing/jaeger/tracer.cpp b/yt/yt/library/tracing/jaeger/tracer.cpp
index 090f413790..f29a8be167 100644
--- a/yt/yt/library/tracing/jaeger/tracer.cpp
+++ b/yt/yt/library/tracing/jaeger/tracer.cpp
@@ -1,3 +1,4 @@
+
#include "tracer.h"
#include "private.h"
diff --git a/yt/yt/library/tracing/jaeger/ya.make b/yt/yt/library/tracing/jaeger/ya.make
index b85e518305..14f1877294 100644
--- a/yt/yt/library/tracing/jaeger/ya.make
+++ b/yt/yt/library/tracing/jaeger/ya.make
@@ -10,8 +10,10 @@ PEERDIR(
SRCS(
model.proto
+
sampler.cpp
- GLOBAL tracer.cpp
+ tracer.cpp
+ GLOBAL configure_tracer.cpp
)
END()
diff --git a/yt/yt/library/tvm/service/unittests/ya.make b/yt/yt/library/tvm/service/unittests/ya.make
deleted file mode 100644
index 23ac522bd0..0000000000
--- a/yt/yt/library/tvm/service/unittests/ya.make
+++ /dev/null
@@ -1,19 +0,0 @@
-GTEST(unittester-library-auth_tvm)
-
-INCLUDE(${ARCADIA_ROOT}/yt/opensource.inc)
-
-PEERDIR(
- yt/yt/build
-
- yt/yt/core/test_framework
-
- yt/yt/library/tvm/service
-)
-
-EXPLICIT_DATA()
-
-IF(NOT OPENSOURCE)
- INCLUDE(ya_non_opensource.inc)
-ENDIF()
-
-END()