aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornadya02 <nadya02@yandex-team.com>2023-09-07 11:57:32 +0300
committernadya02 <nadya02@yandex-team.com>2023-09-07 13:04:37 +0300
commitde934b312a042991ebf8875dd27c27bfe9dfd872 (patch)
tree860e6e2d982d770640ff0e3e5c421184a5949249
parent3c6960025c3079d4e9a4150c0b962d694571f9ae (diff)
downloadydb-de934b312a042991ebf8875dd27c27bfe9dfd872.tar.gz
YT-19430: Move formats from client to library
-rw-r--r--yt/yt/client/driver/driver.cpp9
-rw-r--r--yt/yt/client/driver/journal_commands.cpp2
-rw-r--r--yt/yt/client/driver/query_commands.cpp2
-rw-r--r--yt/yt/client/driver/queue_commands.cpp2
-rw-r--r--yt/yt/client/driver/ya.make2
-rw-r--r--yt/yt/client/formats/format.cpp570
-rw-r--r--yt/yt/client/formats/format.h88
-rw-r--r--yt/yt/client/formats/ya.make27
-rw-r--r--yt/yt/client/table_client/adapters.h2
-rw-r--r--yt/yt/client/table_client/unittests/serialization_ut.cpp3
-rw-r--r--yt/yt/client/table_client/unittests/ya.make2
-rw-r--r--yt/yt/client/unittests/check_type_compatibility_ut.cpp2
-rw-r--r--yt/yt/client/unittests/dsv_parser_ut.cpp365
-rw-r--r--yt/yt/client/unittests/dsv_writer_ut.cpp316
-rw-r--r--yt/yt/client/unittests/format_writer_ut.h36
-rw-r--r--yt/yt/client/unittests/logical_type_ut.cpp2
-rw-r--r--yt/yt/client/unittests/protobuf_format_ut.cpp4657
-rw-r--r--yt/yt/client/unittests/protobuf_format_ut.proto255
-rw-r--r--yt/yt/client/unittests/row_helpers.cpp70
-rw-r--r--yt/yt/client/unittests/row_helpers.h111
-rw-r--r--yt/yt/client/unittests/schema_ut.cpp2
-rw-r--r--yt/yt/client/unittests/schemaful_dsv_parser_ut.cpp259
-rw-r--r--yt/yt/client/unittests/schemaful_dsv_writer_ut.cpp344
-rw-r--r--yt/yt/client/unittests/skiff_format_ut.cpp3006
-rw-r--r--yt/yt/client/unittests/skiff_yson_converter_ut.cpp728
-rw-r--r--yt/yt/client/unittests/validate_logical_type_ut.cpp2
-rw-r--r--yt/yt/client/unittests/value_examples.cpp147
-rw-r--r--yt/yt/client/unittests/value_examples.h24
-rw-r--r--yt/yt/client/unittests/web_json_writer_ut.cpp1570
-rw-r--r--yt/yt/client/unittests/ya.make17
-rw-r--r--yt/yt/client/unittests/yamr_parser_ut.cpp606
-rw-r--r--yt/yt/client/unittests/yamr_writer_ut.cpp644
-rw-r--r--yt/yt/client/unittests/yamred_dsv_parser_ut.cpp187
-rw-r--r--yt/yt/client/unittests/yamred_dsv_writer_ut.cpp425
-rw-r--r--yt/yt/client/unittests/yson_helpers.cpp29
-rw-r--r--yt/yt/library/column_converters/boolean_column_converter.cpp100
-rw-r--r--yt/yt/library/column_converters/boolean_column_converter.h15
-rw-r--r--yt/yt/library/column_converters/column_converter.cpp91
-rw-r--r--yt/yt/library/column_converters/column_converter.h54
-rw-r--r--yt/yt/library/column_converters/floating_point_column_converter.cpp135
-rw-r--r--yt/yt/library/column_converters/floating_point_column_converter.h15
-rw-r--r--yt/yt/library/column_converters/helpers.cpp59
-rw-r--r--yt/yt/library/column_converters/helpers.h39
-rw-r--r--yt/yt/library/column_converters/integer_column_converter.cpp175
-rw-r--r--yt/yt/library/column_converters/integer_column_converter.h17
-rw-r--r--yt/yt/library/column_converters/null_column_converter.cpp49
-rw-r--r--yt/yt/library/column_converters/null_column_converter.h (renamed from yt/yt/client/unittests/yson_helpers.h)8
-rw-r--r--yt/yt/library/column_converters/string_column_converter.cpp375
-rw-r--r--yt/yt/library/column_converters/string_column_converter.h25
-rw-r--r--yt/yt/library/column_converters/ya.make19
-rw-r--r--yt/yt/library/formats/arrow_writer.cpp1065
-rw-r--r--yt/yt/library/formats/arrow_writer.h26
-rw-r--r--yt/yt/library/formats/dsv_parser.cpp (renamed from yt/yt/client/formats/dsv_parser.cpp)3
-rw-r--r--yt/yt/library/formats/dsv_parser.h (renamed from yt/yt/client/formats/dsv_parser.h)4
-rw-r--r--yt/yt/library/formats/dsv_writer.cpp (renamed from yt/yt/client/formats/dsv_writer.cpp)0
-rw-r--r--yt/yt/library/formats/dsv_writer.h (renamed from yt/yt/client/formats/dsv_writer.h)5
-rw-r--r--yt/yt/library/formats/escape.cpp (renamed from yt/yt/client/formats/escape.cpp)0
-rw-r--r--yt/yt/library/formats/escape.h (renamed from yt/yt/client/formats/escape.h)2
-rw-r--r--yt/yt/library/formats/format.cpp598
-rw-r--r--yt/yt/library/formats/format.h109
-rw-r--r--yt/yt/library/formats/helpers.cpp (renamed from yt/yt/client/formats/helpers.cpp)0
-rw-r--r--yt/yt/library/formats/helpers.h (renamed from yt/yt/client/formats/helpers.h)2
-rw-r--r--yt/yt/library/formats/lenval_control_constants.h (renamed from yt/yt/client/formats/lenval_control_constants.h)0
-rw-r--r--yt/yt/library/formats/private.h (renamed from yt/yt/client/formats/private.h)0
-rw-r--r--yt/yt/library/formats/protobuf.cpp (renamed from yt/yt/client/formats/protobuf.cpp)0
-rw-r--r--yt/yt/library/formats/protobuf.h (renamed from yt/yt/client/formats/protobuf.h)3
-rw-r--r--yt/yt/library/formats/protobuf_options.cpp (renamed from yt/yt/client/formats/protobuf_options.cpp)0
-rw-r--r--yt/yt/library/formats/protobuf_options.h (renamed from yt/yt/client/formats/protobuf_options.h)2
-rw-r--r--yt/yt/library/formats/protobuf_parser.cpp (renamed from yt/yt/client/formats/protobuf_parser.cpp)4
-rw-r--r--yt/yt/library/formats/protobuf_parser.h (renamed from yt/yt/client/formats/protobuf_parser.h)4
-rw-r--r--yt/yt/library/formats/protobuf_writer.cpp (renamed from yt/yt/client/formats/protobuf_writer.cpp)0
-rw-r--r--yt/yt/library/formats/protobuf_writer.h (renamed from yt/yt/client/formats/protobuf_writer.h)2
-rw-r--r--yt/yt/library/formats/schemaful_dsv_parser.cpp (renamed from yt/yt/client/formats/schemaful_dsv_parser.cpp)3
-rw-r--r--yt/yt/library/formats/schemaful_dsv_parser.h (renamed from yt/yt/client/formats/schemaful_dsv_parser.h)4
-rw-r--r--yt/yt/library/formats/schemaful_dsv_writer.cpp (renamed from yt/yt/client/formats/schemaful_dsv_writer.cpp)0
-rw-r--r--yt/yt/library/formats/schemaful_dsv_writer.h (renamed from yt/yt/client/formats/schemaful_dsv_writer.h)5
-rw-r--r--yt/yt/library/formats/schemaful_writer.cpp (renamed from yt/yt/client/formats/schemaful_writer.cpp)0
-rw-r--r--yt/yt/library/formats/schemaful_writer.h (renamed from yt/yt/client/formats/schemaful_writer.h)2
-rw-r--r--yt/yt/library/formats/schemaless_writer_adapter.cpp (renamed from yt/yt/client/formats/schemaless_writer_adapter.cpp)3
-rw-r--r--yt/yt/library/formats/schemaless_writer_adapter.h (renamed from yt/yt/client/formats/schemaless_writer_adapter.h)3
-rw-r--r--yt/yt/library/formats/skiff_parser.cpp (renamed from yt/yt/client/formats/skiff_parser.cpp)4
-rw-r--r--yt/yt/library/formats/skiff_parser.h (renamed from yt/yt/client/formats/skiff_parser.h)4
-rw-r--r--yt/yt/library/formats/skiff_writer.cpp (renamed from yt/yt/client/formats/skiff_writer.cpp)3
-rw-r--r--yt/yt/library/formats/skiff_writer.h (renamed from yt/yt/client/formats/skiff_writer.h)2
-rw-r--r--yt/yt/library/formats/skiff_yson_converter-inl.h (renamed from yt/yt/client/formats/skiff_yson_converter-inl.h)0
-rw-r--r--yt/yt/library/formats/skiff_yson_converter.cpp (renamed from yt/yt/client/formats/skiff_yson_converter.cpp)0
-rw-r--r--yt/yt/library/formats/skiff_yson_converter.h (renamed from yt/yt/client/formats/skiff_yson_converter.h)0
-rw-r--r--yt/yt/library/formats/unversioned_value_yson_writer.cpp (renamed from yt/yt/client/formats/unversioned_value_yson_writer.cpp)0
-rw-r--r--yt/yt/library/formats/unversioned_value_yson_writer.h (renamed from yt/yt/client/formats/unversioned_value_yson_writer.h)6
-rw-r--r--yt/yt/library/formats/versioned_writer.cpp (renamed from yt/yt/client/formats/versioned_writer.cpp)0
-rw-r--r--yt/yt/library/formats/versioned_writer.h (renamed from yt/yt/client/formats/versioned_writer.h)2
-rw-r--r--yt/yt/library/formats/web_json_writer.cpp (renamed from yt/yt/client/formats/web_json_writer.cpp)5
-rw-r--r--yt/yt/library/formats/web_json_writer.h (renamed from yt/yt/client/formats/web_json_writer.h)5
-rw-r--r--yt/yt/library/formats/ya.make48
-rw-r--r--yt/yt/library/formats/yamr_parser.cpp (renamed from yt/yt/client/formats/yamr_parser.cpp)0
-rw-r--r--yt/yt/library/formats/yamr_parser.h (renamed from yt/yt/client/formats/yamr_parser.h)4
-rw-r--r--yt/yt/library/formats/yamr_parser_base.cpp (renamed from yt/yt/client/formats/yamr_parser_base.cpp)3
-rw-r--r--yt/yt/library/formats/yamr_parser_base.h (renamed from yt/yt/client/formats/yamr_parser_base.h)2
-rw-r--r--yt/yt/library/formats/yamr_writer.cpp (renamed from yt/yt/client/formats/yamr_writer.cpp)0
-rw-r--r--yt/yt/library/formats/yamr_writer.h (renamed from yt/yt/client/formats/yamr_writer.h)5
-rw-r--r--yt/yt/library/formats/yamr_writer_base.cpp (renamed from yt/yt/client/formats/yamr_writer_base.cpp)0
-rw-r--r--yt/yt/library/formats/yamr_writer_base.h (renamed from yt/yt/client/formats/yamr_writer_base.h)5
-rw-r--r--yt/yt/library/formats/yamred_dsv_parser.cpp (renamed from yt/yt/client/formats/yamred_dsv_parser.cpp)0
-rw-r--r--yt/yt/library/formats/yamred_dsv_parser.h (renamed from yt/yt/client/formats/yamred_dsv_parser.h)4
-rw-r--r--yt/yt/library/formats/yamred_dsv_writer.cpp (renamed from yt/yt/client/formats/yamred_dsv_writer.cpp)0
-rw-r--r--yt/yt/library/formats/yamred_dsv_writer.h (renamed from yt/yt/client/formats/yamred_dsv_writer.h)5
-rw-r--r--yt/yt/library/formats/yql_yson_converter.cpp (renamed from yt/yt/client/formats/yql_yson_converter.cpp)0
-rw-r--r--yt/yt/library/formats/yql_yson_converter.h (renamed from yt/yt/client/formats/yql_yson_converter.h)2
-rw-r--r--yt/yt/library/formats/yson_map_to_unversioned_value.cpp (renamed from yt/yt/client/formats/yson_map_to_unversioned_value.cpp)0
-rw-r--r--yt/yt/library/formats/yson_map_to_unversioned_value.h (renamed from yt/yt/client/formats/yson_map_to_unversioned_value.h)2
-rw-r--r--yt/yt/library/formats/yson_parser.cpp (renamed from yt/yt/client/formats/yson_parser.cpp)3
-rw-r--r--yt/yt/library/formats/yson_parser.h (renamed from yt/yt/client/formats/yson_parser.h)2
-rw-r--r--yt/yt/library/logical_type_shortcuts/logical_type_shortcuts.h (renamed from yt/yt/client/unittests/logical_type_shortcuts.h)0
113 files changed, 3108 insertions, 14546 deletions
diff --git a/yt/yt/client/driver/driver.cpp b/yt/yt/client/driver/driver.cpp
index be39258e1a..e4e7abfb61 100644
--- a/yt/yt/client/driver/driver.cpp
+++ b/yt/yt/client/driver/driver.cpp
@@ -1,11 +1,11 @@
#include "driver.h"
#include "authentication_commands.h"
+#include "admin_commands.h"
#include "chaos_commands.h"
#include "command.h"
#include "config.h"
#include "cypress_commands.h"
-#include "admin_commands.h"
#include "etc_commands.h"
#include "file_commands.h"
#include "journal_commands.h"
@@ -17,10 +17,10 @@
#include "proxy_discovery_cache.h"
#include "query_commands.h"
-#include <yt/yt/client/api/transaction.h>
+#include <yt/yt/client/api/client_cache.h>
#include <yt/yt/client/api/connection.h>
#include <yt/yt/client/api/sticky_transaction_pool.h>
-#include <yt/yt/client/api/client_cache.h>
+#include <yt/yt/client/api/transaction.h>
#include <yt/yt/client/api/rpc_proxy/connection_impl.h>
@@ -30,8 +30,11 @@
#include <yt/yt/core/tracing/trace_context.h>
+#include <yt/yt/library/formats/format.h>
+
#include <yt/yt/library/tvm/tvm_base.h>
+
namespace NYT::NDriver {
using namespace NYTree;
diff --git a/yt/yt/client/driver/journal_commands.cpp b/yt/yt/client/driver/journal_commands.cpp
index a3a4406e8d..ad6944fa3d 100644
--- a/yt/yt/client/driver/journal_commands.cpp
+++ b/yt/yt/client/driver/journal_commands.cpp
@@ -10,6 +10,8 @@
#include <yt/yt/client/formats/format.h>
#include <yt/yt/client/formats/parser.h>
+#include <yt/yt/library/formats/format.h>
+
#include <yt/yt/core/concurrency/scheduler.h>
#include <yt/yt/core/misc/blob_output.h>
diff --git a/yt/yt/client/driver/query_commands.cpp b/yt/yt/client/driver/query_commands.cpp
index 756c273175..14efed7825 100644
--- a/yt/yt/client/driver/query_commands.cpp
+++ b/yt/yt/client/driver/query_commands.cpp
@@ -4,6 +4,8 @@
#include <yt/yt/client/formats/config.h>
+#include <yt/yt/library/formats/format.h>
+
#include <yt/yt/core/ytree/fluent.h>
#include <yt/yt/core/ytree/convert.h>
diff --git a/yt/yt/client/driver/queue_commands.cpp b/yt/yt/client/driver/queue_commands.cpp
index 771fce09e1..89fbba7389 100644
--- a/yt/yt/client/driver/queue_commands.cpp
+++ b/yt/yt/client/driver/queue_commands.cpp
@@ -3,6 +3,8 @@
#include <yt/yt/client/api/config.h>
+#include <yt/yt/library/formats/format.h>
+
namespace NYT::NDriver {
using namespace NConcurrency;
diff --git a/yt/yt/client/driver/ya.make b/yt/yt/client/driver/ya.make
index d08c35855e..6e411686d0 100644
--- a/yt/yt/client/driver/ya.make
+++ b/yt/yt/client/driver/ya.make
@@ -25,7 +25,7 @@ SRCS(
PEERDIR(
yt/yt/client
- yt/yt/client/formats
+ yt/yt/library/formats
)
END()
diff --git a/yt/yt/client/formats/format.cpp b/yt/yt/client/formats/format.cpp
index 72db758f73..f6090edb62 100644
--- a/yt/yt/client/formats/format.cpp
+++ b/yt/yt/client/formats/format.cpp
@@ -1,22 +1,4 @@
#include "format.h"
-#include "parser.h"
-#include "dsv_parser.h"
-#include "dsv_writer.h"
-#include "protobuf_parser.h"
-#include "protobuf_writer.h"
-#include "schemaful_dsv_parser.h"
-#include "schemaful_dsv_writer.h"
-#include "schemaful_writer.h"
-#include "web_json_writer.h"
-#include "schemaless_writer_adapter.h"
-#include "skiff_parser.h"
-#include "skiff_writer.h"
-#include "versioned_writer.h"
-#include "yamred_dsv_parser.h"
-#include "yamred_dsv_writer.h"
-#include "yamr_parser.h"
-#include "yamr_writer.h"
-#include "yson_parser.h"
#include <yt/yt/client/table_client/name_table.h>
#include <yt/yt/client/table_client/table_consumer.h>
@@ -36,13 +18,8 @@
namespace NYT::NFormats {
-using namespace NConcurrency;
using namespace NYTree;
using namespace NYson;
-using namespace NJson;
-using namespace NTableClient;
-using namespace NSkiffExt;
-using namespace NComplexTypes;
////////////////////////////////////////////////////////////////////////////////
@@ -109,551 +86,4 @@ void Deserialize(TFormat& value, NYson::TYsonPullParserCursor* cursor)
////////////////////////////////////////////////////////////////////////////////
-namespace {
-
-EYsonType DataTypeToYsonType(EDataType dataType)
-{
- switch (dataType) {
- case EDataType::Structured:
- return EYsonType::Node;
- case EDataType::Tabular:
- return EYsonType::ListFragment;
- default:
- THROW_ERROR_EXCEPTION("Data type %Qlv is not supported by YSON",
- dataType);
- }
-}
-
-std::unique_ptr<IFlushableYsonConsumer> CreateConsumerForYson(
- EDataType dataType,
- const IAttributeDictionary& attributes,
- IZeroCopyOutput* output)
-{
- auto config = ConvertTo<TYsonFormatConfigPtr>(&attributes);
- return CreateYsonWriter(
- output,
- config->Format,
- DataTypeToYsonType(dataType),
- config->Format == EYsonFormat::Binary);
-}
-
-std::unique_ptr<IFlushableYsonConsumer> CreateConsumerForJson(
- EDataType dataType,
- const IAttributeDictionary& attributes,
- IOutputStream* output)
-{
- auto config = ConvertTo<TJsonFormatConfigPtr>(&attributes);
- return CreateJsonConsumer(output, DataTypeToYsonType(dataType), config);
-}
-
-std::unique_ptr<IFlushableYsonConsumer> CreateConsumerForDsv(
- EDataType dataType,
- const IAttributeDictionary& attributes,
- IOutputStream* output)
-{
- auto config = ConvertTo<TDsvFormatConfigPtr>(&attributes);
- switch (dataType) {
- case EDataType::Structured:
- return std::unique_ptr<IFlushableYsonConsumer>(new TDsvNodeConsumer(output, config));
-
- case EDataType::Tabular:
- case EDataType::Binary:
- case EDataType::Null:
- THROW_ERROR_EXCEPTION("Data type %Qlv is not supported by DSV",
- dataType);
-
- default:
- YT_ABORT();
- };
-}
-
-class TTableParserAdapter
- : public IParser
-{
-public:
- TTableParserAdapter(
- const TFormat& format,
- std::vector<IValueConsumer*> valueConsumers,
- int tableIndex)
- : TableConsumer_(new TTableConsumer(
- TYsonConverterConfig{
- .ComplexTypeMode = format.Attributes().Get("complex_type_mode", EComplexTypeMode::Named),
- .StringKeyedDictMode = format.Attributes().Get("string_keyed_dict_mode", EDictMode::Positional),
- .DecimalMode = format.Attributes().Get("decimal_mode", EDecimalMode::Binary),
- .TimeMode = format.Attributes().Get("time_mode", ETimeMode::Binary),
- .UuidMode = format.Attributes().Get("uuid_mode", EUuidMode::Binary),
- },
- valueConsumers,
- tableIndex))
- , Parser_(CreateParserForFormat(
- format,
- EDataType::Tabular,
- TableConsumer_.get()))
- { }
-
- void Read(TStringBuf data) override
- {
- Parser_->Read(data);
- }
-
- void Finish() override
- {
- Parser_->Finish();
- }
-
-private:
- const std::unique_ptr<IYsonConsumer> TableConsumer_;
- const std::unique_ptr<IParser> Parser_;
-};
-
-} // namespace
-
-std::unique_ptr<IFlushableYsonConsumer> CreateConsumerForFormat(
- const TFormat& format,
- EDataType dataType,
- IZeroCopyOutput* output)
-{
- switch (format.GetType()) {
- case EFormatType::Yson:
- return CreateConsumerForYson(dataType, format.Attributes(), output);
- case EFormatType::Json:
- return CreateConsumerForJson(dataType, format.Attributes(), output);
- case EFormatType::Dsv:
- return CreateConsumerForDsv(dataType, format.Attributes(), output);
- default:
- THROW_ERROR_EXCEPTION("Unsupported output format %Qlv",
- format.GetType());
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-template <class TWriter, class TConsumerAdapter>
-TIntrusivePtr<TWriter> CreateAdaptedWriterForYson(
- const IAttributeDictionary& attributes,
- TTableSchemaPtr schema,
- IAsyncOutputStreamPtr output)
-{
- auto config = ConvertTo<TYsonFormatConfigPtr>(&attributes);
- return New<TConsumerAdapter>(std::move(output), std::move(schema), [=] (IZeroCopyOutput* buffer) {
- if (config->Format == EYsonFormat::Binary) {
- return std::unique_ptr<IFlushableYsonConsumer>(new TBufferedBinaryYsonWriter(
- buffer,
- EYsonType::ListFragment,
- true));
- } else {
- return std::unique_ptr<IFlushableYsonConsumer>(new TYsonWriter(
- buffer,
- config->Format,
- EYsonType::ListFragment));
- }
- });
-}
-
-template <class TWriter, class TConsumerAdapter>
-TIntrusivePtr<TWriter> CreateAdaptedWriterForJson(
- const IAttributeDictionary& attributes,
- TTableSchemaPtr schema,
- IAsyncOutputStreamPtr output)
-{
- auto config = ConvertTo<TJsonFormatConfigPtr>(&attributes);
- return New<TConsumerAdapter>(std::move(output), std::move(schema), [&] (IOutputStream* buffer) {
- return CreateJsonConsumer(buffer, EYsonType::ListFragment, config);
- });
-}
-
-IUnversionedRowsetWriterPtr CreateSchemafulWriterForFormat(
- const TFormat& format,
- TTableSchemaPtr schema,
- IAsyncOutputStreamPtr output)
-{
- switch (format.GetType()) {
- case EFormatType::Yson:
- return CreateAdaptedWriterForYson<IUnversionedRowsetWriter, TSchemafulWriter>(format.Attributes(), std::move(schema), std::move(output));
- case EFormatType::Json:
- return CreateAdaptedWriterForJson<IUnversionedRowsetWriter, TSchemafulWriter>(format.Attributes(), std::move(schema), std::move(output));
- case EFormatType::SchemafulDsv:
- return CreateSchemafulWriterForSchemafulDsv(format.Attributes(), std::move(schema), std::move(output));
- case EFormatType::WebJson: {
- auto webJsonFormatConfig = ConvertTo<TWebJsonFormatConfigPtr>(&format.Attributes());
- webJsonFormatConfig->SkipSystemColumns = false;
-
- return CreateWriterForWebJson(
- std::move(webJsonFormatConfig),
- TNameTable::FromSchema(*schema),
- {schema},
- std::move(output));
- }
- default:
- THROW_ERROR_EXCEPTION("Unsupported output format %Qlv",
- format.GetType());
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-IVersionedWriterPtr CreateVersionedWriterForFormat(
- const TFormat& format,
- NTableClient::TTableSchemaPtr schema,
- NConcurrency::IAsyncOutputStreamPtr output)
-{
- switch (format.GetType()) {
- case EFormatType::Yson:
- return CreateAdaptedWriterForYson<IVersionedWriter, TVersionedWriter>(format.Attributes(), std::move(schema), std::move(output));
- case EFormatType::Json:
- return CreateAdaptedWriterForJson<IVersionedWriter, TVersionedWriter>(format.Attributes(), std::move(schema), std::move(output));
- default:
- THROW_ERROR_EXCEPTION("Unsupported output format %Qlv", format.GetType());
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-ISchemalessFormatWriterPtr CreateStaticTableWriterForFormat(
- const TFormat& format,
- TNameTablePtr nameTable,
- const std::vector<TTableSchemaPtr>& tableSchemas,
- NConcurrency::IAsyncOutputStreamPtr output,
- bool enableContextSaving,
- TControlAttributesConfigPtr controlAttributesConfig,
- int keyColumnCount)
-{
- switch (format.GetType()) {
- case EFormatType::Dsv:
- return CreateSchemalessWriterForDsv(
- format.Attributes(),
- nameTable,
- std::move(output),
- enableContextSaving,
- controlAttributesConfig,
- keyColumnCount);
- case EFormatType::Yamr:
- return CreateSchemalessWriterForYamr(
- format.Attributes(),
- nameTable,
- std::move(output),
- enableContextSaving,
- controlAttributesConfig,
- keyColumnCount);
- case EFormatType::YamredDsv:
- return CreateSchemalessWriterForYamredDsv(
- format.Attributes(),
- nameTable,
- std::move(output),
- enableContextSaving,
- controlAttributesConfig,
- keyColumnCount);
- case EFormatType::SchemafulDsv:
- return CreateSchemalessWriterForSchemafulDsv(
- format.Attributes(),
- nameTable,
- std::move(output),
- enableContextSaving,
- controlAttributesConfig,
- keyColumnCount);
- case EFormatType::Protobuf:
- return CreateWriterForProtobuf(
- format.Attributes(),
- tableSchemas,
- nameTable,
- std::move(output),
- enableContextSaving,
- controlAttributesConfig,
- keyColumnCount);
- case EFormatType::WebJson:
- return CreateWriterForWebJson(
- format.Attributes(),
- nameTable,
- tableSchemas,
- std::move(output));
- case EFormatType::Skiff:
- return CreateWriterForSkiff(
- format.Attributes(),
- nameTable,
- tableSchemas,
- std::move(output),
- enableContextSaving,
- controlAttributesConfig,
- keyColumnCount);
- default:
- auto adapter = New<TSchemalessWriterAdapter>(
- nameTable,
- std::move(output),
- enableContextSaving,
- controlAttributesConfig,
- keyColumnCount);
- adapter->Init(tableSchemas, format);
- return adapter;
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TYsonProducer CreateProducerForDsv(
- EDataType dataType,
- const IAttributeDictionary& attributes,
- IInputStream* input)
-{
- if (dataType != EDataType::Tabular) {
- THROW_ERROR_EXCEPTION("DSV is supported only for tabular data");
- }
- auto config = ConvertTo<TDsvFormatConfigPtr>(&attributes);
- return BIND([=] (IYsonConsumer* consumer) {
- ParseDsv(input, consumer, config);
- });
-}
-
-TYsonProducer CreateProducerForYamr(
- EDataType dataType,
- const IAttributeDictionary& attributes,
- IInputStream* input)
-{
- if (dataType != EDataType::Tabular) {
- THROW_ERROR_EXCEPTION("YAMR is supported only for tabular data");
- }
- auto config = ConvertTo<TYamrFormatConfigPtr>(&attributes);
- return BIND([=] (IYsonConsumer* consumer) {
- ParseYamr(input, consumer, config);
- });
-}
-
-TYsonProducer CreateProducerForYamredDsv(
- EDataType dataType,
- const IAttributeDictionary& attributes,
- IInputStream* input)
-{
- if (dataType != EDataType::Tabular) {
- THROW_ERROR_EXCEPTION("Yamred DSV is supported only for tabular data");
- }
- auto config = ConvertTo<TYamredDsvFormatConfigPtr>(&attributes);
- return BIND([=] (IYsonConsumer* consumer) {
- ParseYamredDsv(input, consumer, config);
- });
-}
-
-TYsonProducer CreateProducerForSchemafulDsv(
- EDataType dataType,
- const IAttributeDictionary& attributes,
- IInputStream* input)
-{
- if (dataType != EDataType::Tabular) {
- THROW_ERROR_EXCEPTION("Schemaful DSV is supported only for tabular data");
- }
- auto config = ConvertTo<TSchemafulDsvFormatConfigPtr>(&attributes);
- return BIND([=] (IYsonConsumer* consumer) {
- ParseSchemafulDsv(input, consumer, config);
- });
-}
-
-TYsonProducer CreateProducerForJson(
- EDataType dataType,
- const IAttributeDictionary& attributes,
- IInputStream* input)
-{
- auto ysonType = DataTypeToYsonType(dataType);
- auto config = ConvertTo<TJsonFormatConfigPtr>(&attributes);
- return BIND([=] (IYsonConsumer* consumer) {
- ParseJson(input, consumer, config, ysonType);
- });
-}
-
-TYsonProducer CreateProducerForYson(EDataType dataType, IInputStream* input)
-{
- auto ysonType = DataTypeToYsonType(dataType);
- return ConvertToProducer(TYsonInput(input, ysonType));
-}
-
-TYsonProducer CreateProducerForFormat(const TFormat& format, EDataType dataType, IInputStream* input)
-{
- switch (format.GetType()) {
- case EFormatType::Yson:
- return CreateProducerForYson(dataType, input);
- case EFormatType::Json:
- return CreateProducerForJson(dataType, format.Attributes(), input);
- case EFormatType::Dsv:
- return CreateProducerForDsv(dataType, format.Attributes(), input);
- case EFormatType::Yamr:
- return CreateProducerForYamr(dataType, format.Attributes(), input);
- case EFormatType::YamredDsv:
- return CreateProducerForYamredDsv(dataType, format.Attributes(), input);
- case EFormatType::SchemafulDsv:
- return CreateProducerForSchemafulDsv(dataType, format.Attributes(), input);
- default:
- THROW_ERROR_EXCEPTION("Unsupported input format %Qlv",
- format.GetType());
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-template<class TBase>
-struct TParserAdapter
- : public TBase
- , public IParser
-{
-public:
- template<class... TArgs>
- TParserAdapter(TArgs&&... args)
- : TBase(std::forward<TArgs>(args)...)
- { }
-
- void Read(TStringBuf data) override
- {
- TBase::Read(data);
- }
-
- void Finish() override
- {
- TBase::Finish();
- }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-
-std::unique_ptr<IParser> CreateParserForFormat(const TFormat& format, EDataType dataType, IYsonConsumer* consumer)
-{
- switch (format.GetType()) {
- case EFormatType::Yson:
- return CreateParserForYson(consumer, DataTypeToYsonType(dataType));
- case EFormatType::Json: {
- auto config = ConvertTo<TJsonFormatConfigPtr>(&format.Attributes());
- return std::unique_ptr<IParser>(new TParserAdapter<TJsonParser>(consumer, config, DataTypeToYsonType(dataType)));
- }
- case EFormatType::Dsv: {
- auto config = ConvertTo<TDsvFormatConfigPtr>(&format.Attributes());
- return CreateParserForDsv(consumer, config);
- }
- case EFormatType::Yamr: {
- auto config = ConvertTo<TYamrFormatConfigPtr>(&format.Attributes());
- return CreateParserForYamr(consumer, config);
- }
- case EFormatType::YamredDsv: {
- auto config = ConvertTo<TYamredDsvFormatConfigPtr>(&format.Attributes());
- return CreateParserForYamredDsv(consumer, config);
- }
- case EFormatType::SchemafulDsv: {
- auto config = ConvertTo<TSchemafulDsvFormatConfigPtr>(&format.Attributes());
- return CreateParserForSchemafulDsv(consumer, config);
- }
- default:
- THROW_ERROR_EXCEPTION("Unsupported input format %Qlv",
- format.GetType());
- }
-}
-
-std::vector<std::unique_ptr<IParser>> CreateParsersForFormat(
- const TFormat& format,
- const std::vector<IValueConsumer*>& valueConsumers)
-{
- std::vector<std::unique_ptr<IParser>> parsers;
-
- auto parserCount = std::ssize(valueConsumers);
- parsers.reserve(parserCount);
-
- switch (format.GetType()) {
- case EFormatType::Protobuf: {
- auto config = ConvertTo<TProtobufFormatConfigPtr>(&format.Attributes());
- // TODO(max42): implementation of CreateParserForProtobuf clones config
- // on each call, so this loop works in quadratic time. Fix that.
- for (int tableIndex = 0; tableIndex < parserCount; ++tableIndex) {
- parsers.emplace_back(CreateParserForProtobuf(valueConsumers[tableIndex], config, tableIndex));
- }
- break;
- }
- case EFormatType::Skiff: {
- auto config = ConvertTo<TSkiffFormatConfigPtr>(&format.Attributes());
- auto skiffSchemas = ParseSkiffSchemas(config->SkiffSchemaRegistry, config->TableSkiffSchemas);
- for (int tableIndex = 0; tableIndex < parserCount; ++tableIndex) {
- parsers.emplace_back(CreateParserForSkiff(valueConsumers[tableIndex], skiffSchemas, config, tableIndex));
- }
- break;
- }
- default:
- for (int tableIndex = 0; tableIndex < parserCount; ++tableIndex) {
- parsers.emplace_back(std::make_unique<TTableParserAdapter>(format, valueConsumers, tableIndex));
- }
- break;
- }
-
- return parsers;
-}
-
-std::unique_ptr<IParser> CreateParserForFormat(
- const TFormat& format,
- IValueConsumer* valueConsumer)
-{
- auto parsers = CreateParsersForFormat(format, {valueConsumer});
- return std::move(parsers.front());
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-void ConfigureEscapeTable(const TSchemafulDsvFormatConfigPtr& config, TEscapeTable* escapeTable)
-{
- std::vector<char> stopSymbols = {config->RecordSeparator, config->FieldSeparator};
- if (config->EnableEscaping) {
- stopSymbols.push_back(config->EscapingSymbol);
- escapeTable->EscapingSymbol = config->EscapingSymbol;
- }
- escapeTable->FillStops(stopSymbols);
-}
-
-void ConfigureEscapeTables(
- const TDsvFormatConfigBasePtr& config,
- bool addCarriageReturn,
- TEscapeTable* keyEscapeTable,
- TEscapeTable* valueEscapeTable)
-{
- std::vector<char> stopSymbols = {config->RecordSeparator, config->FieldSeparator, '\0'};
-
- if (config->EnableEscaping) {
- stopSymbols.push_back(config->EscapingSymbol);
- keyEscapeTable->EscapingSymbol = valueEscapeTable->EscapingSymbol = config->EscapingSymbol;
- }
-
- if (addCarriageReturn) {
- stopSymbols.push_back('\r');
- }
-
- valueEscapeTable->FillStops(stopSymbols);
-
- stopSymbols.push_back(config->KeyValueSeparator);
- keyEscapeTable->FillStops(stopSymbols);
-}
-
-void ConfigureEscapeTables(
- const TYamrFormatConfigBasePtr& config,
- bool enableKeyEscaping,
- bool enableValueEscaping,
- bool escapingForWriter,
- TEscapeTable* keyEscapeTable,
- TEscapeTable* valueEscapeTable)
-{
- std::vector<char> valueStopSymbols = {config->RecordSeparator};
- std::vector<char> keyStopSymbols = {config->RecordSeparator, config->FieldSeparator};
-
- if (enableKeyEscaping) {
- if (escapingForWriter) {
- keyStopSymbols.push_back('\0');
- keyStopSymbols.push_back('\r');
- }
- keyStopSymbols.push_back(config->EscapingSymbol);
- keyEscapeTable->EscapingSymbol = config->EscapingSymbol;
- }
-
- if (enableValueEscaping) {
- if (escapingForWriter) {
- valueStopSymbols.push_back('\0');
- valueStopSymbols.push_back('\r');
- }
- valueStopSymbols.push_back(config->EscapingSymbol);
- valueEscapeTable->EscapingSymbol = config->EscapingSymbol;
- }
-
- keyEscapeTable->FillStops(keyStopSymbols);
- valueEscapeTable->FillStops(valueStopSymbols);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
} // namespace NYT::NFormats
diff --git a/yt/yt/client/formats/format.h b/yt/yt/client/formats/format.h
index 752939aebe..749f428471 100644
--- a/yt/yt/client/formats/format.h
+++ b/yt/yt/client/formats/format.h
@@ -41,92 +41,4 @@ void Deserialize(TFormat& value, NYson::TYsonPullParserCursor* cursor);
////////////////////////////////////////////////////////////////////////////////
-struct ISchemalessFormatWriter
- : public NTableClient::IUnversionedRowsetWriter
-{
- virtual TBlob GetContext() const = 0;
-
- virtual i64 GetWrittenSize() const = 0;
-
- [[nodiscard]] virtual TFuture<void> Flush() = 0;
-
- virtual bool WriteBatch(NTableClient::IUnversionedRowBatchPtr rowBatch) = 0;
-};
-
-DEFINE_REFCOUNTED_TYPE(ISchemalessFormatWriter)
-
-////////////////////////////////////////////////////////////////////////////////
-
-// This function historically creates format for reading dynamic tables.
-// It slightly differs from format for static tables. :(
-NTableClient::IUnversionedRowsetWriterPtr CreateSchemafulWriterForFormat(
- const TFormat& Format,
- NTableClient::TTableSchemaPtr schema,
- NConcurrency::IAsyncOutputStreamPtr output);
-
-////////////////////////////////////////////////////////////////////////////////
-
-NTableClient::IVersionedWriterPtr CreateVersionedWriterForFormat(
- const TFormat& Format,
- NTableClient::TTableSchemaPtr schema,
- NConcurrency::IAsyncOutputStreamPtr output);
-
-////////////////////////////////////////////////////////////////////////////////
-
-ISchemalessFormatWriterPtr CreateStaticTableWriterForFormat(
- const TFormat& format,
- NTableClient::TNameTablePtr nameTable,
- const std::vector<NTableClient::TTableSchemaPtr>& tableSchemas,
- NConcurrency::IAsyncOutputStreamPtr output,
- bool enableContextSaving,
- TControlAttributesConfigPtr controlAttributesConfig,
- int keyColumnCount);
-
-////////////////////////////////////////////////////////////////////////////////
-
-std::unique_ptr<NYson::IFlushableYsonConsumer> CreateConsumerForFormat(
- const TFormat& format,
- EDataType dataType,
- IZeroCopyOutput* output);
-
-NYson::TYsonProducer CreateProducerForFormat(
- const TFormat& format,
- EDataType dataType,
- IInputStream* input);
-
-std::unique_ptr<IParser> CreateParserForFormat(
- const TFormat& format,
- EDataType dataType,
- NYson::IYsonConsumer* consumer);
-
-//! Create own parser for each value consumer.
-std::vector<std::unique_ptr<IParser>> CreateParsersForFormat(
- const TFormat& format,
- const std::vector<NTableClient::IValueConsumer*>& valueConsumers);
-
-//! Create parser for value consumer. Helper for previous method in singular case.
-std::unique_ptr<IParser> CreateParserForFormat(
- const TFormat& format,
- NTableClient::IValueConsumer* valueConsumer);
-
-////////////////////////////////////////////////////////////////////////////////
-
-void ConfigureEscapeTable(const TSchemafulDsvFormatConfigPtr& config, TEscapeTable* escapeTable);
-
-void ConfigureEscapeTables(
- const TDsvFormatConfigBasePtr& config,
- bool addCarriageReturn,
- TEscapeTable* keyEscapeTable,
- TEscapeTable* valueEscapeTable);
-
-void ConfigureEscapeTables(
- const TYamrFormatConfigBasePtr& config,
- bool enableKeyEscaping,
- bool enableValueEscaping,
- bool escapingForWriter,
- TEscapeTable* keyEscapeTable,
- TEscapeTable* valueEscapeTable);
-
-////////////////////////////////////////////////////////////////////////////////
-
} // namespace NYT::NFormats
diff --git a/yt/yt/client/formats/ya.make b/yt/yt/client/formats/ya.make
index 18eb0e8384..14efe5d5c4 100644
--- a/yt/yt/client/formats/ya.make
+++ b/yt/yt/client/formats/ya.make
@@ -4,35 +4,8 @@ INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
SRCS(
config.cpp
- dsv_parser.cpp
- dsv_writer.cpp
- escape.cpp
format.cpp
- helpers.cpp
parser.cpp
- protobuf.cpp
- protobuf_options.cpp
- protobuf_parser.cpp
- protobuf_writer.cpp
- schemaful_dsv_parser.cpp
- schemaful_dsv_writer.cpp
- schemaful_writer.cpp
- schemaless_writer_adapter.cpp
- web_json_writer.cpp
- skiff_parser.cpp
- skiff_yson_converter.cpp
- skiff_writer.cpp
- unversioned_value_yson_writer.cpp
- versioned_writer.cpp
- yamr_parser.cpp
- yamr_parser_base.cpp
- yamr_writer.cpp
- yamr_writer_base.cpp
- yamred_dsv_parser.cpp
- yamred_dsv_writer.cpp
- yson_parser.cpp
- yson_map_to_unversioned_value.cpp
- yql_yson_converter.cpp
)
PEERDIR(
diff --git a/yt/yt/client/table_client/adapters.h b/yt/yt/client/table_client/adapters.h
index b2baeb3486..9cafefb49e 100644
--- a/yt/yt/client/table_client/adapters.h
+++ b/yt/yt/client/table_client/adapters.h
@@ -5,7 +5,7 @@
#include <yt/yt/client/api/table_reader.h>
-#include <yt/yt/client/formats/format.h>
+#include <yt/yt/library/formats/format.h>
#include <yt/yt/core/concurrency/async_stream.h>
diff --git a/yt/yt/client/table_client/unittests/serialization_ut.cpp b/yt/yt/client/table_client/unittests/serialization_ut.cpp
index ed1c64a113..261b24cb1b 100644
--- a/yt/yt/client/table_client/unittests/serialization_ut.cpp
+++ b/yt/yt/client/table_client/unittests/serialization_ut.cpp
@@ -1,6 +1,7 @@
-#include <yt/yt/client/formats/format.h>
#include <yt/yt/client/table_client/schema.h>
+#include <yt/yt/library/formats/format.h>
+
#include <yt/yt/core/misc/blob_output.h>
#include <yt/yt/core/ytree/convert.h>
#include <yt/yt/core/test_framework/framework.h>
diff --git a/yt/yt/client/table_client/unittests/ya.make b/yt/yt/client/table_client/unittests/ya.make
index 87dff43ad2..d0a82ab469 100644
--- a/yt/yt/client/table_client/unittests/ya.make
+++ b/yt/yt/client/table_client/unittests/ya.make
@@ -14,7 +14,7 @@ INCLUDE(${ARCADIA_ROOT}/yt/opensource_tests.inc)
PEERDIR(
yt/yt/client
- yt/yt/client/formats
+ yt/yt/library/formats
yt/yt/client/table_client/unittests/helpers
yt/yt/client/unittests/mock
yt/yt/core/test_framework
diff --git a/yt/yt/client/unittests/check_type_compatibility_ut.cpp b/yt/yt/client/unittests/check_type_compatibility_ut.cpp
index 488ee42800..57b91cb3ea 100644
--- a/yt/yt/client/unittests/check_type_compatibility_ut.cpp
+++ b/yt/yt/client/unittests/check_type_compatibility_ut.cpp
@@ -1,4 +1,4 @@
-#include "logical_type_shortcuts.h"
+#include <yt/yt/library/logical_type_shortcuts/logical_type_shortcuts.h>
#include <yt/yt/core/test_framework/framework.h>
diff --git a/yt/yt/client/unittests/dsv_parser_ut.cpp b/yt/yt/client/unittests/dsv_parser_ut.cpp
deleted file mode 100644
index 0a0c724f9e..0000000000
--- a/yt/yt/client/unittests/dsv_parser_ut.cpp
+++ /dev/null
@@ -1,365 +0,0 @@
-#include <yt/yt/core/test_framework/framework.h>
-
-#include <yt/yt/core/test_framework/yson_consumer_mock.h>
-
-#include <yt/yt/client/formats/dsv_parser.h>
-
-namespace NYT::NFormats {
-namespace {
-
-using namespace NYson;
-
-using ::testing::InSequence;
-using ::testing::StrictMock;
-using ::testing::NiceMock;
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(TDsvParserTest, Simple)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("integer"));
- EXPECT_CALL(Mock, OnStringScalar("42"));
- EXPECT_CALL(Mock, OnKeyedItem("string"));
- EXPECT_CALL(Mock, OnStringScalar("some"));
- EXPECT_CALL(Mock, OnKeyedItem("double"));
- EXPECT_CALL(Mock, OnStringScalar("10"));
- EXPECT_CALL(Mock, OnEndMap());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("foo"));
- EXPECT_CALL(Mock, OnStringScalar("bar"));
- EXPECT_CALL(Mock, OnKeyedItem("one"));
- EXPECT_CALL(Mock, OnStringScalar("1"));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input =
- "integer=42\tstring=some\tdouble=10\n"
- "foo=bar\tone=1\n";
- ParseDsv(input, &Mock);
-}
-
-TEST(TDsvParserTest, EmptyInput)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- TString input = "";
- ParseDsv(input, &Mock);
-}
-
-TEST(TDsvParserTest, BinaryData)
-{
- StrictMock<TMockYsonConsumer> Mock;
-
- auto a = TString("\0\0\0\0", 4);
- auto b = TString("\x80\0\x16\xC8", 4);
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("ntr"));
- EXPECT_CALL(Mock, OnStringScalar(a));
- EXPECT_CALL(Mock, OnKeyedItem("xrp"));
- EXPECT_CALL(Mock, OnStringScalar(b));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input = "ntr=\\0\\0\\0\\0\txrp=\x80\\0\x16\xC8\n";
- ParseDsv(input, &Mock);
-}
-
-TEST(TDsvParserTest, EmptyRecord)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input = "\n";
- ParseDsv(input, &Mock);
-}
-
-TEST(TDsvParserTest, EmptyRecords)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnEndMap());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input = "\n\n";
- ParseDsv(input, &Mock);
-}
-
-TEST(TDsvParserTest, EmptyKeysAndValues)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem(""));
- EXPECT_CALL(Mock, OnStringScalar(""));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input = "=\n";
- ParseDsv(input, &Mock);
-}
-
-TEST(TDsvParserTest, UnescapedZeroInInput)
-{
- StrictMock<TMockYsonConsumer> Mock;
-
- TString input = TString("a\0b=v", 5);
- EXPECT_ANY_THROW(
- ParseDsv(input, &Mock);
- );
-}
-
-TEST(TDsvParserTest, ZerosAreNotTerminals)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- TString key = TString("a\0b", 3);
- TString value = TString("c\0d", 3);
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem(key));
- EXPECT_CALL(Mock, OnStringScalar(value));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input = "a\\0b=c\\0d\n";
- ParseDsv(input, &Mock);
-}
-
-TEST(TDsvParserTest, UnterminatedRecord)
-{
- NiceMock<TMockYsonConsumer> Mock;
-
- TString input = "a=b";
- EXPECT_ANY_THROW(
- ParseDsv(input, &Mock);
- );
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-class TTskvParserTest: public ::testing::Test
-{
-public:
- StrictMock<TMockYsonConsumer> Mock;
- NiceMock<TMockYsonConsumer> ErrorMock;
-
- TDsvFormatConfigPtr Config;
-
- void SetUp() override {
- Config = New<TDsvFormatConfig>();
- Config->LinePrefix = "tskv";
- }
-};
-
-TEST_F(TTskvParserTest, Simple)
-{
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnEndMap());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("id"));
- EXPECT_CALL(Mock, OnStringScalar("1"));
- EXPECT_CALL(Mock, OnKeyedItem("guid"));
- EXPECT_CALL(Mock, OnStringScalar("100500"));
- EXPECT_CALL(Mock, OnEndMap());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("id"));
- EXPECT_CALL(Mock, OnStringScalar("2"));
- EXPECT_CALL(Mock, OnKeyedItem("guid"));
- EXPECT_CALL(Mock, OnStringScalar("20025"));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input =
- "tskv\n"
- "tskv\tid=1\tguid=100500\t\n"
- "tskv\tid=2\tguid=20025\n";
- ParseDsv(input, &Mock, Config);
-}
-
-TEST_F(TTskvParserTest, SimpleWithNewLine)
-{
- InSequence dummy;
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("foo"));
- EXPECT_CALL(Mock, OnStringScalar("bar"));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input = "tskv\tfoo=bar\n";
- ParseDsv(input, &Mock, Config);
-}
-
-TEST_F(TTskvParserTest, Escaping)
-{
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnEndMap());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("a=b"));
- EXPECT_CALL(Mock, OnStringScalar("c=d or e=f"));
- EXPECT_CALL(Mock, OnEndMap());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key_with_\t,\r_and_\n"));
- EXPECT_CALL(Mock, OnStringScalar("value_with_\t,\\_and_\r\n"));
- EXPECT_CALL(Mock, OnKeyedItem("another_key"));
- EXPECT_CALL(Mock, OnStringScalar("another_value"));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input =
- "t\\s\\kv\n"
- "tskv" "\t" "a\\=b" "=" "c\\=d or e=f" "\n" // Note: unescaping is less strict
- "tskv" "\t"
- "key_with_\\t,\r_and_\\n"
- "="
- "value_with_\\t,\\\\_and_\\r\\n"
- "\t"
- "an\\other_\\key=anoth\\er_v\\alue"
- "\n";
-
- ParseDsv(input, &Mock, Config);
-}
-
-TEST_F(TTskvParserTest, DisabledEscaping)
-{
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnEndMap());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("a\\"));
- EXPECT_CALL(Mock, OnStringScalar("b\\t=c\\=d or e=f\\0"));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input =
- "tskv\t\\x\\y\n"
- "tskv" "\t" "a\\=b\\t" "=" "c\\=d or e=f\\0" "\n";
-
- Config->EnableEscaping = false;
-
- ParseDsv(input, &Mock, Config);
-}
-
-TEST_F(TTskvParserTest, AllowedUnescapedSymbols)
-{
- Config->LinePrefix = "prefix_with_=";
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("just_key"));
- EXPECT_CALL(Mock, OnStringScalar("value_with_="));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input = "prefix_with_=" "\t" "just_key" "=" "value_with_=" "\n";
- ParseDsv(input, &Mock, Config);
-}
-
-TEST_F(TTskvParserTest, UndefinedValues)
-{
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnEndMap());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("a"));
- EXPECT_CALL(Mock, OnStringScalar("b"));
- EXPECT_CALL(Mock, OnEndMap());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input =
- "tskv" "\t" "tskv" "\t" "tskv" "\n"
- "tskv\t" "some_key" "\t\t\t" "a=b" "\t" "another_key" "\n" // Note: consequent \t
- "tskv\n";
- ParseDsv(input, &Mock, Config);
-}
-
-
-TEST_F(TTskvParserTest, OnlyLinePrefix)
-{
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input = "tskv\n";
- ParseDsv(input, &Mock, Config);
-}
-
-TEST_F(TTskvParserTest, OnlyLinePrefixAndTab)
-{
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input = "tskv\t\n";
- ParseDsv(input, &Mock, Config);
-}
-
-TEST_F(TTskvParserTest, NotFinishedLinePrefix)
-{
- TString input = "tsk";
-
- EXPECT_ANY_THROW(
- ParseDsv(input, &ErrorMock, Config)
- );
-}
-
-TEST_F(TTskvParserTest, WrongLinePrefix)
-{
- TString input =
- "tskv\ta=b\n"
- "tZkv\tc=d\te=f\n"
- "tskv\ta=b\n";
-
- EXPECT_ANY_THROW(
- ParseDsv(input, &ErrorMock, Config);
- );
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace
-} // namespace NYT::NDriver
diff --git a/yt/yt/client/unittests/dsv_writer_ut.cpp b/yt/yt/client/unittests/dsv_writer_ut.cpp
deleted file mode 100644
index b5f96caacd..0000000000
--- a/yt/yt/client/unittests/dsv_writer_ut.cpp
+++ /dev/null
@@ -1,316 +0,0 @@
-#include <yt/yt/core/test_framework/framework.h>
-
-#include <yt/yt/client/formats/dsv_parser.h>
-#include <yt/yt/client/formats/dsv_writer.h>
-
-#include <yt/yt/client/table_client/name_table.h>
-#include <yt/yt/client/table_client/unversioned_row.h>
-
-#include <yt/yt/core/concurrency/async_stream.h>
-
-namespace NYT::NFormats {
-namespace {
-
-using namespace NYTree;
-using namespace NYson;
-using namespace NConcurrency;
-using namespace NTableClient;
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(TDsvWriterTest, StringScalar)
-{
- TStringStream outputStream;
- TDsvNodeConsumer consumer(&outputStream);
-
- consumer.OnStringScalar("0-2-xb-1234");
- EXPECT_EQ("0-2-xb-1234", outputStream.Str());
-}
-
-TEST(TDsvWriterTest, ListContainingDifferentTypes)
-{
- TStringStream outputStream;
- TDsvNodeConsumer consumer(&outputStream);
-
- consumer.OnBeginList();
- consumer.OnListItem();
- consumer.OnInt64Scalar(100);
- consumer.OnListItem();
- consumer.OnStringScalar("foo");
- consumer.OnListItem();
- consumer.OnListItem();
- consumer.OnBeginMap();
- consumer.OnKeyedItem("a");
- consumer.OnStringScalar("10");
- consumer.OnKeyedItem("b");
- consumer.OnStringScalar("c");
- consumer.OnEndMap();
- consumer.OnEndList();
-
- TString output =
- "100\n"
- "foo\n"
- "\n"
- "a=10\tb=c\n";
-
- EXPECT_EQ(output, outputStream.Str());
-}
-
-TEST(TDsvWriterTest, ListInsideList)
-{
- TStringStream outputStream;
- TDsvNodeConsumer consumer(&outputStream);
-
- consumer.OnBeginList();
- consumer.OnListItem();
- EXPECT_ANY_THROW(consumer.OnBeginList());
-}
-
-TEST(TDsvWriterTest, ListInsideMap)
-{
- TStringStream outputStream;
- TDsvNodeConsumer consumer(&outputStream);
-
- consumer.OnBeginMap();
- consumer.OnKeyedItem("foo");
- EXPECT_ANY_THROW(consumer.OnBeginList());
-}
-
-TEST(TDsvWriterTest, MapInsideMap)
-{
- TStringStream outputStream;
- TDsvNodeConsumer consumer(&outputStream);
-
- consumer.OnBeginMap();
- consumer.OnKeyedItem("foo");
- EXPECT_ANY_THROW(consumer.OnBeginMap());
-}
-
-TEST(TDsvWriterTest, WithoutEsacping)
-{
- auto config = New<TDsvFormatConfig>();
- config->EnableEscaping = false;
-
- TStringStream outputStream;
- TDsvNodeConsumer consumer(&outputStream, config);
-
- consumer.OnStringScalar("string_with_\t_\\_=_and_\n");
-
- TString output = "string_with_\t_\\_=_and_\n";
-
- EXPECT_EQ(output, outputStream.Str());
-}
-
-TEST(TDsvWriterTest, ListUsingOnRaw)
-{
- TStringStream outputStream;
- TDsvNodeConsumer consumer(&outputStream);
-
- consumer.OnRaw("[10; 20; 30]", EYsonType::Node);
- TString output =
- "10\n"
- "20\n"
- "30\n";
-
- EXPECT_EQ(output, outputStream.Str());
-}
-
-TEST(TDsvWriterTest, MapUsingOnRaw)
-{
- TStringStream outputStream;
- TDsvNodeConsumer consumer(&outputStream);
-
- consumer.OnRaw("{a=b; c=d}", EYsonType::Node);
- TString output = "a=b\tc=d";
-
- EXPECT_EQ(output, outputStream.Str());
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(TDsvWriterTest, SimpleTabular)
-{
- auto nameTable = New<TNameTable>();
- auto integerId = nameTable->RegisterName("integer");
- auto stringId = nameTable->RegisterName("string");
- auto doubleId = nameTable->RegisterName("double");
- auto fooId = nameTable->RegisterName("foo");
- auto oneId = nameTable->RegisterName("one");
- auto tableIndexId = nameTable->RegisterName(TableIndexColumnName);
- auto rowIndexId = nameTable->RegisterName(RowIndexColumnName);
- auto rangeIndexId = nameTable->RegisterName(RangeIndexColumnName);
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedInt64Value(42, integerId));
- row1.AddValue(MakeUnversionedStringValue("some", stringId));
- row1.AddValue(MakeUnversionedDoubleValue(10., doubleId));
- row1.AddValue(MakeUnversionedInt64Value(2, tableIndexId));
- row1.AddValue(MakeUnversionedInt64Value(42, rowIndexId));
- row1.AddValue(MakeUnversionedInt64Value(1, rangeIndexId));
-
- TUnversionedRowBuilder row2;
- row2.AddValue(MakeUnversionedStringValue("bar", fooId));
- row2.AddValue(MakeUnversionedSentinelValue(EValueType::Null, integerId));
- row2.AddValue(MakeUnversionedInt64Value(1, oneId));
- row2.AddValue(MakeUnversionedInt64Value(2, tableIndexId));
- row2.AddValue(MakeUnversionedInt64Value(43, rowIndexId));
-
- std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow()};
-
- TStringStream outputStream;
- auto config = New<TDsvFormatConfig>();
- config->EnableTableIndex = true;
-
- auto controlAttributes = New<TControlAttributesConfig>();
- controlAttributes->EnableTableIndex = true;
- auto writer = CreateSchemalessWriterForDsv(
- config,
- nameTable,
- CreateAsyncAdapter(static_cast<IOutputStream*>(&outputStream)),
- false,
- controlAttributes,
- 0);
-
- EXPECT_EQ(true, writer->Write(rows));
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TString output =
- "integer=42\tstring=some\tdouble=10.\t@table_index=2\n"
- "foo=bar\tone=1\t@table_index=2\n";
- EXPECT_EQ(output, outputStream.Str());
-}
-
-TEST(TDsvWriterTest, AnyTabular)
-{
- auto nameTable = New<TNameTable>();
- auto anyId = nameTable->RegisterName("any");
-
- TUnversionedRowBuilder row;
- row.AddValue(MakeUnversionedAnyValue("[]", anyId));
-
- std::vector<TUnversionedRow> rows = { row.GetRow() };
-
- TStringStream outputStream;
- auto controlAttributes = New<TControlAttributesConfig>();
- auto writer = CreateSchemalessWriterForDsv(
- New<TDsvFormatConfig>(),
- nameTable,
- CreateAsyncAdapter(static_cast<IOutputStream*>(&outputStream)),
- false,
- controlAttributes,
- 0);
-
- EXPECT_FALSE(writer->Write(rows));
- EXPECT_ANY_THROW(writer->GetReadyEvent().Get().ThrowOnError());
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(TTskvWriterTest, SimpleTabular)
-{
- auto nameTable = New<TNameTable>();
- auto id1 = nameTable->RegisterName("id");
- auto id2 = nameTable->RegisterName("guid");
- auto tableIndexId = nameTable->RegisterName(TableIndexColumnName);
- auto rowIndexId = nameTable->RegisterName(RowIndexColumnName);
- auto rangeIndexId = nameTable->RegisterName(RangeIndexColumnName);
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedInt64Value(2, tableIndexId));
- row1.AddValue(MakeUnversionedInt64Value(42, rowIndexId));
- row1.AddValue(MakeUnversionedInt64Value(1, rangeIndexId));
-
-
- TUnversionedRowBuilder row2;
- row2.AddValue(MakeUnversionedStringValue("1", id1));
- row2.AddValue(MakeUnversionedInt64Value(100500, id2));
-
- TUnversionedRowBuilder row3;
- row3.AddValue(MakeUnversionedStringValue("2", id1));
- row3.AddValue(MakeUnversionedInt64Value(20025, id2));
-
- std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow(), row3.GetRow() };
-
- TStringStream outputStream;
- auto config = New<TDsvFormatConfig>();
- config->LinePrefix = "tskv";
-
- auto controlAttributes = New<TControlAttributesConfig>();
- auto writer = CreateSchemalessWriterForDsv(
- config,
- nameTable,
- CreateAsyncAdapter(static_cast<IOutputStream*>(&outputStream)),
- false,
- controlAttributes,
- 0);
-
- EXPECT_EQ(true, writer->Write(rows));
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TString output =
- "tskv\n"
- "tskv\tid=1\tguid=100500\n"
- "tskv\tid=2\tguid=20025\n";
-
- EXPECT_EQ(output, outputStream.Str());
-}
-
-TEST(TTskvWriterTest, Escaping)
-{
- auto key1 = TString("\0 is escaped", 12);
-
- auto nameTable = New<TNameTable>();
- auto id1 = nameTable->RegisterName(key1);
- auto id2 = nameTable->RegisterName("Escaping in in key: \r \t \n \\ =");
-
- TUnversionedRowBuilder row;
- row.AddValue(MakeUnversionedStringValue(key1, id1));
- row.AddValue(MakeUnversionedStringValue("Escaping in value: \r \t \n \\ =", id2));
-
- std::vector<TUnversionedRow> rows = { row.GetRow() };
-
- TStringStream outputStream;
- auto config = New<TDsvFormatConfig>();
- config->LinePrefix = "tskv";
-
- auto controlAttributes = New<TControlAttributesConfig>();
- auto writer = CreateSchemalessWriterForDsv(
- config,
- nameTable,
- CreateAsyncAdapter(static_cast<IOutputStream*>(&outputStream)),
- false,
- controlAttributes,
- 0);
-
- EXPECT_EQ(true, writer->Write(rows));
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TString output =
- "tskv"
- "\t"
-
- "\\0 is escaped"
- "="
- "\\0 is escaped"
-
- "\t"
-
- "Escaping in in key: \\r \\t \\n \\\\ \\="
- "="
- "Escaping in value: \\r \\t \\n \\\\ =" // Note: = is not escaped
-
- "\n";
-
- EXPECT_EQ(output, outputStream.Str());
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace
-} // namespace NYT::NFormats
diff --git a/yt/yt/client/unittests/format_writer_ut.h b/yt/yt/client/unittests/format_writer_ut.h
deleted file mode 100644
index 4680090755..0000000000
--- a/yt/yt/client/unittests/format_writer_ut.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#pragma once
-
-#include <yt/yt/client/formats/format.h>
-
-#include <yt/yt/client/table_client/name_table.h>
-#include <yt/yt/client/table_client/unversioned_row.h>
-
-namespace NYT::NFormats {
-namespace {
-
-////////////////////////////////////////////////////////////////////////////////
-
-void TestNameTableExpansion(ISchemalessFormatWriterPtr writer, NTableClient::TNameTablePtr nameTable)
-{
- // We write five rows, on each iteration we double number of
- // columns in the NameTable.
- for (int iteration = 0; iteration < 5; ++iteration) {
- NTableClient::TUnversionedOwningRowBuilder row;
- for (int index = 0; index < (1 << iteration); ++index) {
- auto key = "Column" + ToString(index);
- auto value = "Value" + ToString(index);
- int columnId = nameTable->GetIdOrRegisterName(key);
- row.AddValue(NTableClient::MakeUnversionedStringValue(value, columnId));
- }
- auto completeRow = row.FinishRow();
- EXPECT_EQ(true, writer->Write({completeRow.Get()}));
- }
- writer->Close()
- .Get()
- .ThrowOnError();
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace
-} // namespace NYT::NFormats
diff --git a/yt/yt/client/unittests/logical_type_ut.cpp b/yt/yt/client/unittests/logical_type_ut.cpp
index 0cf744313e..f6b98ae638 100644
--- a/yt/yt/client/unittests/logical_type_ut.cpp
+++ b/yt/yt/client/unittests/logical_type_ut.cpp
@@ -1,4 +1,4 @@
-#include "logical_type_shortcuts.h"
+#include <yt/yt/library/logical_type_shortcuts/logical_type_shortcuts.h>
#include <yt/yt/core/test_framework/framework.h>
diff --git a/yt/yt/client/unittests/protobuf_format_ut.cpp b/yt/yt/client/unittests/protobuf_format_ut.cpp
deleted file mode 100644
index af9d2a0155..0000000000
--- a/yt/yt/client/unittests/protobuf_format_ut.cpp
+++ /dev/null
@@ -1,4657 +0,0 @@
-#include "row_helpers.h"
-#include "yson_helpers.h"
-#include "yt/yt/client/table_client/public.h"
-
-#include <yt/yt/client/unittests/protobuf_format_ut.pb.h>
-
-#include <yt/yt/core/test_framework/framework.h>
-
-#include <yt/yt/core/concurrency/async_stream.h>
-#include <yt/yt/core/json/json_parser.h>
-#include <yt/yt/core/yson/string.h>
-#include <yt/yt/core/ytree/fluent.h>
-
-#include <yt/yt/client/formats/config.h>
-#include <yt/yt/client/formats/parser.h>
-#include <yt/yt/client/formats/lenval_control_constants.h>
-#include <yt/yt/client/formats/protobuf_writer.h>
-#include <yt/yt/client/formats/protobuf_parser.h>
-#include <yt/yt/client/formats/protobuf.h>
-#include <yt/yt/client/formats/format.h>
-#include <yt/yt/client/table_client/logical_type.h>
-#include <yt/yt/client/table_client/name_table.h>
-#include <yt/yt/client/table_client/value_consumer.h>
-#include <yt/yt/client/table_client/unversioned_row.h>
-
-#include <yt/yt/library/named_value/named_value.h>
-
-#include <util/random/fast.h>
-
-#include <google/protobuf/text_format.h>
-#include <google/protobuf/descriptor.h>
-#include <google/protobuf/descriptor.pb.h>
-
-using namespace std::string_view_literals;
-
-
-namespace NYT {
-namespace {
-
-using namespace NYson;
-using namespace NYTree;
-using namespace NFormats;
-using namespace NTableClient;
-using namespace NConcurrency;
-using namespace NProtobufFormatTest;
-
-using ::google::protobuf::FileDescriptor;
-using NNamedValue::MakeRow;
-
-////////////////////////////////////////////////////////////////////////////////
-
-DEFINE_ENUM(EProtoFormatType,
- (FileDescriptorLegacy)
- (FileDescriptor)
- (Structured)
-);
-
-////////////////////////////////////////////////////////////////////////////////
-
-#define EXPECT_NODES_EQUAL(a, b) \
- EXPECT_TRUE(AreNodesEqual((a), (b))) \
- << #a ": " << ConvertToYsonString((a), EYsonFormat::Text).ToString() \
- << "\n\n" #b ": " << ConvertToYsonString((b), EYsonFormat::Text).ToString();
-
-////////////////////////////////////////////////////////////////////////////////
-
-TString ConvertToTextYson(const INodePtr& node)
-{
- return ConvertToYsonString(node, EYsonFormat::Text).ToString();
-}
-
-// Hardcoded serialization of file descriptor used in old format description.
-TString FileDescriptorLegacy = "\x0a\xb6\x03\x0a\x29\x6a\x75\x6e\x6b\x2f\x65\x72\x6d\x6f\x6c\x6f\x76\x64\x2f\x74\x65\x73\x74\x2d\x70\x72\x6f\x74\x6f\x62"
- "\x75\x66\x2f\x6d\x65\x73\x73\x61\x67\x65\x2e\x70\x72\x6f\x74\x6f\x22\x2d\x0a\x0f\x54\x45\x6d\x62\x65\x64\x65\x64\x4d\x65\x73\x73\x61\x67\x65\x12"
- "\x0b\x0a\x03\x4b\x65\x79\x18\x01\x20\x01\x28\x09\x12\x0d\x0a\x05\x56\x61\x6c\x75\x65\x18\x02\x20\x01\x28\x09\x22\xb3\x02\x0a\x08\x54\x4d\x65\x73"
- "\x73\x61\x67\x65\x12\x0e\x0a\x06\x44\x6f\x75\x62\x6c\x65\x18\x01\x20\x01\x28\x01\x12\x0d\x0a\x05\x46\x6c\x6f\x61\x74\x18\x02\x20\x01\x28\x02\x12"
- "\x0d\x0a\x05\x49\x6e\x74\x36\x34\x18\x03\x20\x01\x28\x03\x12\x0e\x0a\x06\x55\x49\x6e\x74\x36\x34\x18\x04\x20\x01\x28\x04\x12\x0e\x0a\x06\x53\x49"
- "\x6e\x74\x36\x34\x18\x05\x20\x01\x28\x12\x12\x0f\x0a\x07\x46\x69\x78\x65\x64\x36\x34\x18\x06\x20\x01\x28\x06\x12\x10\x0a\x08\x53\x46\x69\x78\x65"
- "\x64\x36\x34\x18\x07\x20\x01\x28\x10\x12\x0d\x0a\x05\x49\x6e\x74\x33\x32\x18\x08\x20\x01\x28\x05\x12\x0e\x0a\x06\x55\x49\x6e\x74\x33\x32\x18\x09"
- "\x20\x01\x28\x0d\x12\x0e\x0a\x06\x53\x49\x6e\x74\x33\x32\x18\x0a\x20\x01\x28\x11\x12\x0f\x0a\x07\x46\x69\x78\x65\x64\x33\x32\x18\x0b\x20\x01\x28"
- "\x07\x12\x10\x0a\x08\x53\x46\x69\x78\x65\x64\x33\x32\x18\x0c\x20\x01\x28\x0f\x12\x0c\x0a\x04\x42\x6f\x6f\x6c\x18\x0d\x20\x01\x28\x08\x12\x0e\x0a"
- "\x06\x53\x74\x72\x69\x6e\x67\x18\x0e\x20\x01\x28\x09\x12\x0d\x0a\x05\x42\x79\x74\x65\x73\x18\x0f\x20\x01\x28\x0c\x12\x14\x0a\x04\x45\x6e\x75\x6d"
- "\x18\x10\x20\x01\x28\x0e\x32\x06\x2e\x45\x45\x6e\x75\x6d\x12\x21\x0a\x07\x4d\x65\x73\x73\x61\x67\x65\x18\x11\x20\x01\x28\x0b\x32\x10\x2e\x54\x45"
- "\x6d\x62\x65\x64\x65\x64\x4d\x65\x73\x73\x61\x67\x65\x2a\x24\x0a\x05\x45\x45\x6e\x75\x6d\x12\x07\x0a\x03\x4f\x6e\x65\x10\x01\x12\x07\x0a\x03\x54"
- "\x77\x6f\x10\x02\x12\x09\x0a\x05\x54\x68\x72\x65\x65\x10\x03";
-
-TString GenerateRandomLenvalString(TFastRng64& rng, ui32 size)
-{
- TString result;
- result.append(reinterpret_cast<const char*>(&size), sizeof(size));
-
- size += sizeof(ui32);
-
- while (result.size() < size) {
- ui64 num = rng.GenRand();
- result.append(reinterpret_cast<const char*>(&num), sizeof(num));
- }
- if (result.size() > size) {
- result.resize(size);
- }
- return result;
-}
-
-static TProtobufFormatConfigPtr MakeProtobufFormatConfig(const std::vector<const ::google::protobuf::Descriptor*>& descriptorList)
-{
- ::google::protobuf::FileDescriptorSet fileDescriptorSet;
- THashSet<const ::google::protobuf::FileDescriptor*> files;
-
- std::function<void(const ::google::protobuf::FileDescriptor*)> addFile;
- addFile = [&] (const ::google::protobuf::FileDescriptor* fileDescriptor) {
- if (!files.insert(fileDescriptor).second) {
- return;
- }
-
- // N.B. We want to write dependencies in fileDescriptorSet in topological order
- // so we traverse dependencies first and the add current fileDescriptor.
- for (int i = 0; i < fileDescriptor->dependency_count(); ++i) {
- addFile(fileDescriptor->dependency(i));
- }
- fileDescriptor->CopyTo(fileDescriptorSet.add_file());
- };
- std::vector<TString> typeNames;
-
- for (const auto* descriptor : descriptorList) {
- addFile(descriptor->file());
- typeNames.push_back(descriptor->full_name());
- }
-
- auto formatConfigYsonString = BuildYsonStringFluently()
- .BeginMap()
- .Item("file_descriptor_set_text").Value(fileDescriptorSet.ShortDebugString())
- .Item("type_names").Value(typeNames)
- .EndMap();
-
- return ConvertTo<TProtobufFormatConfigPtr>(formatConfigYsonString);
-}
-
-INodePtr ParseYson(TStringBuf data)
-{
- return ConvertToNode(NYson::TYsonString(TString{data}));
-}
-
-TString LenvalBytes(const ::google::protobuf::Message& message)
-{
- TStringStream out;
- ui32 messageSize = static_cast<ui32>(message.ByteSizeLong());
- out.Write(&messageSize, sizeof(messageSize));
- if (!message.SerializeToArcadiaStream(&out)) {
- THROW_ERROR_EXCEPTION("Can not serialize message");
- }
- return out.Str();
-}
-
-void EnsureTypesMatch(EValueType expected, EValueType actual)
-{
- if (expected != actual) {
- THROW_ERROR_EXCEPTION("Mismatching type: expected %Qlv, actual %Qlv",
- expected,
- actual);
- }
-}
-
-double GetDouble(const TUnversionedValue& row)
-{
- EnsureTypesMatch(EValueType::Double, row.Type);
- return row.Data.Double;
-}
-
-template <typename TMessage>
-TCollectingValueConsumer ParseRows(
- const TMessage& message,
- const TProtobufFormatConfigPtr& config,
- const TTableSchemaPtr& schema = New<TTableSchema>(),
- int count = 1)
-{
- TString lenvalBytes;
- TStringOutput out(lenvalBytes);
- auto messageSize = static_cast<ui32>(message.ByteSize());
- for (int i = 0; i < count; ++i) {
- out.Write(&messageSize, sizeof(messageSize));
- if (!message.SerializeToArcadiaStream(&out)) {
- THROW_ERROR_EXCEPTION("Failed to serialize message");
- }
- }
-
- TCollectingValueConsumer rowCollector(schema);
- auto parser = CreateParserForProtobuf(&rowCollector, config, 0);
- parser->Read(lenvalBytes);
- parser->Finish();
- if (static_cast<ssize_t>(rowCollector.Size()) != count) {
- THROW_ERROR_EXCEPTION("rowCollector has wrong size: expected %v, actual %v",
- count,
- rowCollector.Size());
- }
- return rowCollector;
-}
-
-template <typename TMessage>
-TCollectingValueConsumer ParseRows(
- const TMessage& message,
- const INodePtr& config,
- const TTableSchemaPtr& schema = New<TTableSchema>(),
- int count = 1)
-{
- return ParseRows(message, ConvertTo<TProtobufFormatConfigPtr>(config->Attributes().ToMap()), schema, count);
-}
-
-
-void AddDependencies(
- const FileDescriptor* fileDescriptor,
- std::vector<const FileDescriptor*>& fileDescriptors,
- THashSet<const FileDescriptor*>& fileDescriptorSet)
-{
- if (fileDescriptorSet.contains(fileDescriptor)) {
- return;
- }
- fileDescriptorSet.insert(fileDescriptor);
- for (int i = 0; i < fileDescriptor->dependency_count(); ++i) {
- AddDependencies(fileDescriptor->dependency(i), fileDescriptors, fileDescriptorSet);
- }
- fileDescriptors.push_back(fileDescriptor);
-}
-
-template <typename ... Ts>
-INodePtr CreateFileDescriptorConfig(std::optional<EComplexTypeMode> complexTypeMode = {})
-{
- std::vector<const FileDescriptor*> fileDescriptors;
- THashSet<const FileDescriptor*> fileDescriptorSet;
- std::vector<const FileDescriptor*> originalFileDescriptors = {Ts::descriptor()->file()...};
-
- for (auto d : originalFileDescriptors) {
- AddDependencies(d, fileDescriptors, fileDescriptorSet);
- }
-
- ::google::protobuf::FileDescriptorSet fileDescriptorSetProto;
- for (auto fileDescriptor : fileDescriptors) {
- fileDescriptor->CopyTo(fileDescriptorSetProto.add_file());
- }
- TString fileDescriptorSetText;
- ::google::protobuf::TextFormat::Printer().PrintToString(fileDescriptorSetProto, &fileDescriptorSetText);
- std::vector<TString> typeNames = {Ts::descriptor()->full_name()...};
- return BuildYsonNodeFluently()
- .BeginAttributes()
- .Item("file_descriptor_set_text").Value(fileDescriptorSetText)
- .Item("type_names").Value(typeNames)
- .OptionalItem("complex_type_mode", complexTypeMode)
- .EndAttributes()
- .Value("protobuf");
-}
-
-static const auto EnumerationsConfig = BuildYsonNodeFluently()
- .BeginMap()
- .Item("EEnum")
- .BeginMap()
- .Item("One").Value(1)
- .Item("Two").Value(2)
- .Item("Three").Value(3)
- .Item("MinusFortyTwo").Value(-42)
- .Item("MaxInt32").Value(std::numeric_limits<int>::max())
- .Item("MinInt32").Value(std::numeric_limits<int>::min())
- .EndMap()
- .EndMap();
-
-INodePtr CreateAllFieldsConfig(EProtoFormatType protoFormatType)
-{
- switch (protoFormatType) {
- case EProtoFormatType::FileDescriptor:
- return CreateFileDescriptorConfig<TMessage>();
- case EProtoFormatType::FileDescriptorLegacy:
- return BuildYsonNodeFluently()
- .BeginAttributes()
- .Item("file_descriptor_set")
- .Value(FileDescriptorLegacy)
- .Item("file_indices")
- .BeginList()
- .Item().Value(0)
- .EndList()
- .Item("message_indices")
- .BeginList()
- .Item().Value(1)
- .EndList()
- .EndAttributes()
- .Value("protobuf");
- case EProtoFormatType::Structured:
- return BuildYsonNodeFluently()
- .BeginAttributes()
- .Item("enumerations").Value(EnumerationsConfig)
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("Double")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("double")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("Float")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("float")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("Int64")
- .Item("field_number").Value(3)
- .Item("proto_type").Value("int64")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("UInt64")
- .Item("field_number").Value(4)
- .Item("proto_type").Value("uint64")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("SInt64")
- .Item("field_number").Value(5)
- .Item("proto_type").Value("sint64")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("Fixed64")
- .Item("field_number").Value(6)
- .Item("proto_type").Value("fixed64")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("SFixed64")
- .Item("field_number").Value(7)
- .Item("proto_type").Value("sfixed64")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("Int32")
- .Item("field_number").Value(8)
- .Item("proto_type").Value("int32")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("UInt32")
- .Item("field_number").Value(9)
- .Item("proto_type").Value("uint32")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("SInt32")
- .Item("field_number").Value(10)
- .Item("proto_type").Value("sint32")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("Fixed32")
- .Item("field_number").Value(11)
- .Item("proto_type").Value("fixed32")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("SFixed32")
- .Item("field_number").Value(12)
- .Item("proto_type").Value("sfixed32")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("Bool")
- .Item("field_number").Value(13)
- .Item("proto_type").Value("bool")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("String")
- .Item("field_number").Value(14)
- .Item("proto_type").Value("string")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("Bytes")
- .Item("field_number").Value(15)
- .Item("proto_type").Value("bytes")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("Enum")
- .Item("field_number").Value(16)
- .Item("proto_type").Value("enum_string")
- .Item("enumeration_name").Value("EEnum")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("Message")
- .Item("field_number").Value(17)
- .Item("proto_type").Value("message")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("AnyWithMap")
- .Item("field_number").Value(18)
- .Item("proto_type").Value("any")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("AnyWithInt64")
- .Item("field_number").Value(19)
- .Item("proto_type").Value("any")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("AnyWithString")
- .Item("field_number").Value(20)
- .Item("proto_type").Value("any")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("OtherColumns")
- .Item("field_number").Value(21)
- .Item("proto_type").Value("other_columns")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("MissingInt64")
- .Item("field_number").Value(22)
- .Item("proto_type").Value("int64")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndAttributes()
- .Value("protobuf");
- }
- Y_FAIL();
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-struct TLenvalEntry
-{
- TString RowData;
- ui32 TableIndex;
- ui64 TabletIndex;
-};
-
-////////////////////////////////////////////////////////////////////////////////
-
-class TLenvalParser
-{
-public:
- explicit TLenvalParser(IInputStream* input)
- : Input_(input)
- { }
-
- explicit TLenvalParser(TStringBuf input)
- : StreamHolder_(std::make_unique<TMemoryInput>(input))
- , Input_(StreamHolder_.get())
- { }
-
- std::optional<TLenvalEntry> Next()
- {
- ui32 rowSize;
- size_t read = Input_->Load(&rowSize, sizeof(rowSize));
- if (read == 0) {
- return std::nullopt;
- } else if (read < sizeof(rowSize)) {
- THROW_ERROR_EXCEPTION("corrupted lenval: can't read row length");
- }
- switch (rowSize) {
- case LenvalTableIndexMarker: {
- ui32 tableIndex;
- read = Input_->Load(&tableIndex, sizeof(tableIndex));
- if (read != sizeof(tableIndex)) {
- THROW_ERROR_EXCEPTION("corrupted lenval: can't read table index");
- }
- CurrentTableIndex_ = tableIndex;
- return Next();
- }
- case LenvalTabletIndexMarker: {
- ui64 tabletIndex;
- read = Input_->Load(&tabletIndex, sizeof(tabletIndex));
- if (read != sizeof(tabletIndex)) {
- THROW_ERROR_EXCEPTION("corrupted lenval: can't read tablet index");
- }
- CurrentTabletIndex_ = tabletIndex;
- return Next();
- }
- case LenvalEndOfStream:
- EndOfStream_ = true;
- return std::nullopt;
- case LenvalKeySwitch:
- case LenvalRangeIndexMarker:
- case LenvalRowIndexMarker:
- THROW_ERROR_EXCEPTION("marker is unsupported");
- default: {
- TLenvalEntry result;
- result.RowData.resize(rowSize);
- result.TableIndex = CurrentTableIndex_;
- result.TabletIndex = CurrentTabletIndex_;
- Input_->Load(result.RowData.Detach(), rowSize);
-
- return result;
- }
- }
- }
-
- bool IsEndOfStream() const
- {
- return EndOfStream_;
- }
-
-private:
- std::unique_ptr<IInputStream> StreamHolder_;
- IInputStream* Input_;
- ui32 CurrentTableIndex_ = 0;
- ui64 CurrentTabletIndex_ = 0;
- bool EndOfStream_ = false;
-};
-
-////////////////////////////////////////////////////////////////////////////////
-
-namespace {
-
-TProtobufFormatConfigPtr ParseAndValidateConfig(const INodePtr& node, std::vector<TTableSchemaPtr> schemas = {})
-{
- auto config = ConvertTo<TProtobufFormatConfigPtr>(node);
- if (schemas.empty()) {
- schemas.assign(config->Tables.size(), New<TTableSchema>());
- }
- New<TProtobufParserFormatDescription>()->Init(config, schemas);
- New<TProtobufWriterFormatDescription>()->Init(config, schemas);
- return config;
-}
-
-} // namespace
-
-INodePtr BuildEmbeddedConfig(EComplexTypeMode complexTypeMode, EProtoFormatType formatType) {
- if (formatType == EProtoFormatType::FileDescriptor) {
- return CreateFileDescriptorConfig<NYT::TEmbeddingMessage>(complexTypeMode);
- }
-
- auto config = BuildYsonNodeFluently()
- .BeginAttributes()
- .Item("tables").BeginList()
- .Item().BeginMap()
- .Item("columns").BeginList()
- .Item().BeginMap()
- .Item("name").Value("*")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("embedded_message")
- .Item("fields").BeginList()
- .Item().BeginMap()
- .Item("name").Value("other_columns_field")
- .Item("field_number").Value(15)
- .Item("proto_type").Value("other_columns")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("embedded_num")
- .Item("field_number").Value(10)
- .Item("proto_type").Value("uint64")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("embedded_extra_field")
- .Item("field_number").Value(11)
- .Item("proto_type").Value("string")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("variant")
- .Item("proto_type").Value("oneof")
- .Item("fields").BeginList()
- .Item().BeginMap()
- .Item("name").Value("str_variant")
- .Item("field_number").Value(101)
- .Item("proto_type").Value("string")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("uint_variant")
- .Item("field_number").Value(102)
- .Item("proto_type").Value("uint64")
- .EndMap()
- .EndList()
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("*")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("embedded_message")
- .Item("fields").BeginList()
- .Item().BeginMap()
- .Item("name").Value("embedded2_num")
- .Item("field_number").Value(10)
- .Item("proto_type").Value("uint64")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("embedded2_struct")
- .Item("field_number").Value(17)
- .Item("proto_type").Value("structured_message")
- .Item("fields").BeginList()
- .Item().BeginMap()
- .Item("name").Value("float1")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("float")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("string1")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("string")
- .EndMap()
- .EndList()
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("embedded2_repeated")
- .Item("field_number").Value(42)
- .Item("proto_type").Value("string")
- .Item("repeated").Value(true)
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("num")
- .Item("field_number").Value(12)
- .Item("proto_type").Value("uint64")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("extra_field")
- .Item("field_number").Value(13)
- .Item("proto_type").Value("string")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .Item("complex_type_mode").Value(complexTypeMode)
- .EndAttributes()
- .Value("protobuf");
- return config;
-}
-
-TTableSchemaPtr BuildEmbeddedSchema() {
- auto schema = New<TTableSchema>(std::vector<TColumnSchema>{
- {"num", SimpleLogicalType(ESimpleLogicalValueType::Uint64)},
- {"embedded_num", SimpleLogicalType(ESimpleLogicalValueType::Uint64)},
- {"variant", VariantStructLogicalType({
- {"str_variant", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"uint_variant", SimpleLogicalType(ESimpleLogicalValueType::Uint64)},
- })},
- {"extra_column", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Uint64))},
- {"embedded2_num", SimpleLogicalType(ESimpleLogicalValueType::Uint64)},
- {"embedded2_struct", StructLogicalType({
- {"float1", SimpleLogicalType(ESimpleLogicalValueType::Float)},
- {"string1", SimpleLogicalType(ESimpleLogicalValueType::String)},
- })},
- {"embedded2_repeated", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
- {"other_complex_field", StructLogicalType({
- {"one", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- {"two", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- {"three", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- })},
- {"extra_int", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
-
- });
- return schema;
-}
-
-TEST(TProtobufFormat, TestConfigParsingEmbedded) {
- auto config = BuildEmbeddedConfig(EComplexTypeMode::Positional, EProtoFormatType::Structured);
- auto schema = BuildEmbeddedSchema();
-
- EXPECT_NO_THROW(
- ParseAndValidateConfig(config->Attributes().ToMap(), {schema})
- );
-}
-
-TEST(TProtobufFormat, TestConfigParsing)
-{
- // Empty config.
- EXPECT_THROW_WITH_SUBSTRING(
- ParseAndValidateConfig(ParseYson("{}")),
- "one of \"tables\", \"file_descriptor_set\" and \"file_descriptor_set_text\" must be specified");
-
- // Broken protobuf.
- EXPECT_THROW_WITH_SUBSTRING(
- ParseAndValidateConfig(ParseYson(R"({file_descriptor_set="dfgxx"; file_indices=[0]; message_indices=[0]})")),
- "Error parsing \"file_descriptor_set\" in protobuf config");
-
- EXPECT_NO_THROW(ParseAndValidateConfig(
- CreateAllFieldsConfig(EProtoFormatType::Structured)->Attributes().ToMap()
- ));
-
- EXPECT_NO_THROW(ParseAndValidateConfig(
- CreateAllFieldsConfig(EProtoFormatType::FileDescriptorLegacy)->Attributes().ToMap()
- ));
-
- EXPECT_NO_THROW(ParseAndValidateConfig(
- CreateAllFieldsConfig(EProtoFormatType::FileDescriptor)->Attributes().ToMap()
- ));
-
- auto embeddedInsideNonembeddedConfig = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables").BeginList()
- .Item().BeginMap()
- .Item("columns").BeginList()
- .Item().BeginMap()
- .Item("name").Value("embedded_message1")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("embedded_message")
- .Item("fields").BeginList()
- .Item().BeginMap()
- .Item("name").Value("field1")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("structured_message")
- .Item("fields").BeginList()
- .Item().BeginMap()
- .Item("name").Value("embedded_message2")
- .Item("field_number").Value(3)
- .Item("proto_type").Value("embedded_message")
- .Item("fields").BeginList()
- .Item().BeginMap()
- .Item("name").Value("field2")
- .Item("field_number").Value(4)
- .Item("proto_type").Value("string")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- auto schemaForEmbedded = New<TTableSchema>(std::vector{
- TColumnSchema("field1", StructLogicalType({
- {"embedded_message2", StructLogicalType({
- {"field2", SimpleLogicalType(ESimpleLogicalValueType::String)},
- })},
- }))
- });
-
- EXPECT_THROW_WITH_SUBSTRING(
- ParseAndValidateConfig(embeddedInsideNonembeddedConfig, {schemaForEmbedded}),
- "embedded_message inside of structured_message is not allowed");
-
- auto repeatedEmbeddedConfig = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("*")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("embedded_message")
- .Item("repeated").Value(true)
- .Item("fields").BeginList()
- .Item().BeginMap()
- .Item("name").Value("field1")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("uint64")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- EXPECT_THROW_WITH_SUBSTRING(
- ParseAndValidateConfig(repeatedEmbeddedConfig),
- R"(type "embedded_message" can not be repeated)");
-
- auto multipleOtherColumnsConfig = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("Other1")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("other_columns")
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("Other2")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("other_columns")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- EXPECT_THROW_WITH_SUBSTRING(
- ParseAndValidateConfig(multipleOtherColumnsConfig),
- "Multiple \"other_columns\" in protobuf config are not allowed");
-
- auto duplicateColumnNamesConfig = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("SomeColumn")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("int64")
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("SomeColumn")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("string")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- EXPECT_THROW_WITH_SUBSTRING(
- ParseAndValidateConfig(duplicateColumnNamesConfig),
- "Multiple fields with same column name \"SomeColumn\" are forbidden in protobuf format");
-
- auto anyCorrespondsToStruct = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("SomeColumn")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("any")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- auto schema = New<TTableSchema>(std::vector{
- TColumnSchema("SomeColumn", StructLogicalType({})),
- });
-
- EXPECT_THROW_WITH_SUBSTRING(
- ParseAndValidateConfig(anyCorrespondsToStruct, {schema}),
- "Table schema and protobuf format config mismatch");
-
- auto configWithBytes = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("SomeColumn")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("bytes")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- auto schemaWithUtf8 = New<TTableSchema>(std::vector{
- TColumnSchema("SomeColumn", SimpleLogicalType(ESimpleLogicalValueType::Utf8)),
- });
-
- EXPECT_THROW_WITH_SUBSTRING(
- ParseAndValidateConfig(configWithBytes, {schemaWithUtf8}),
- "mismatch: expected logical type to be one of");
-
- auto configWithPackedNonRepeated = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("SomeColumn")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("int64")
- .Item("packed").Value(true)
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- auto schemaWithInt64List = New<TTableSchema>(std::vector<TColumnSchema>{
- {"SomeColumn", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- });
- EXPECT_THROW_WITH_SUBSTRING(
- ParseAndValidateConfig(configWithPackedNonRepeated, {schemaWithInt64List}),
- "Field \"SomeColumn\" is marked \"packed\" but is not marked \"repeated\"");
-
- auto configWithPackedRepeatedString = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("SomeColumn")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("string")
- .Item("packed").Value(true)
- .Item("repeated").Value(true)
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- auto schemaWithStringList = New<TTableSchema>(std::vector{
- TColumnSchema("SomeColumn", ListLogicalType(
- SimpleLogicalType(ESimpleLogicalValueType::String)))
- });
-
- EXPECT_THROW_WITH_SUBSTRING(
- ParseAndValidateConfig(configWithPackedRepeatedString, {schemaWithStringList}),
- "packed protobuf field must have primitive numeric type, got \"string\"");
-
- auto configWithMissingFieldNumber = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("SomeColumn")
- .Item("proto_type").Value("string")
- .Item("repeated").Value(true)
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- EXPECT_THROW_WITH_SUBSTRING(
- ParseAndValidateConfig(configWithMissingFieldNumber, {schemaWithStringList}),
- "\"field_number\" is required");
-}
-
-TEST(TProtobufFormat, TestParseBigZigZag)
-{
- constexpr i32 value = Min<i32>();
- TMessage message;
- message.set_int32_field(value);
- auto config = ConvertTo<TProtobufFormatConfigPtr>(CreateAllFieldsConfig(EProtoFormatType::Structured)->Attributes().ToMap());
- auto rowCollector = ParseRows(message, config);
- EXPECT_EQ(GetInt64(rowCollector.GetRowValue(0, "Int32")), value);
-}
-
-TEST(TProtobufFormat, TestParseEnumerationString)
-{
- auto config = ConvertTo<TProtobufFormatConfigPtr>(CreateAllFieldsConfig(EProtoFormatType::Structured)->Attributes().ToMap());
- {
- TMessage message;
- message.set_enum_field(EEnum::One);
- auto rowCollector = ParseRows(message, config);
- EXPECT_EQ(GetString(rowCollector.GetRowValue(0, "Enum")), "One");
- }
- {
- TMessage message;
- message.set_enum_field(EEnum::Two);
- auto rowCollector = ParseRows(message, config);
- EXPECT_EQ(GetString(rowCollector.GetRowValue(0, "Enum")), "Two");
- }
- {
- TMessage message;
- message.set_enum_field(EEnum::Three);
- auto rowCollector = ParseRows(message, config);
- EXPECT_EQ(GetString(rowCollector.GetRowValue(0, "Enum")), "Three");
- }
- {
- TMessage message;
- message.set_enum_field(EEnum::MinusFortyTwo);
- auto rowCollector = ParseRows(message, config);
- EXPECT_EQ(GetString(rowCollector.GetRowValue(0, "Enum")), "MinusFortyTwo");
- }
-}
-
-TEST(TProtobufFormat, TestParseWrongEnumeration)
-{
- auto config = ConvertTo<TProtobufFormatConfigPtr>(CreateAllFieldsConfig(EProtoFormatType::Structured)->Attributes().ToMap());
- TMessage message;
- auto enumTag = TMessage::descriptor()->FindFieldByName("enum_field")->number();
- message.mutable_unknown_fields()->AddVarint(enumTag, 30);
- EXPECT_ANY_THROW(ParseRows(message, config));
-}
-
-TEST(TProtobufFormat, TestParseEnumerationInt)
-{
- TCollectingValueConsumer rowCollector;
-
- auto config = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("Enum")
- .Item("field_number").Value(16)
- .Item("proto_type").Value("enum_int")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- auto parser = CreateParserForProtobuf(&rowCollector, ConvertTo<TProtobufFormatConfigPtr>(config), 0);
-
- {
- TMessage message;
- message.set_enum_field(EEnum::One);
- parser->Read(LenvalBytes(message));
- }
- {
- TMessage message;
- message.set_enum_field(EEnum::Two);
- parser->Read(LenvalBytes(message));
- }
- {
- TMessage message;
- message.set_enum_field(EEnum::Three);
- parser->Read(LenvalBytes(message));
- }
- {
- TMessage message;
- message.set_enum_field(EEnum::MinusFortyTwo);
- parser->Read(LenvalBytes(message));
- }
- {
- TMessage message;
- auto enumTag = TMessage::descriptor()->FindFieldByName("enum_field")->number();
- message.mutable_unknown_fields()->AddVarint(enumTag, 100500);
- parser->Read(LenvalBytes(message));
- }
-
- parser->Finish();
-
- EXPECT_EQ(GetInt64(rowCollector.GetRowValue(0, "Enum")), 1);
- EXPECT_EQ(GetInt64(rowCollector.GetRowValue(1, "Enum")), 2);
- EXPECT_EQ(GetInt64(rowCollector.GetRowValue(2, "Enum")), 3);
- EXPECT_EQ(GetInt64(rowCollector.GetRowValue(3, "Enum")), -42);
- EXPECT_EQ(GetInt64(rowCollector.GetRowValue(4, "Enum")), 100500);
-}
-
-TEST(TProtobufFormat, TestParseRandomGarbage)
-{
- // Check that we never crash.
-
- TFastRng64 rng(42);
- for (int i = 0; i != 1000; ++i) {
- auto bytes = GenerateRandomLenvalString(rng, 8);
-
- TCollectingValueConsumer rowCollector;
- auto parser = CreateParserForProtobuf(
- &rowCollector,
- ConvertTo<TProtobufFormatConfigPtr>(CreateAllFieldsConfig(EProtoFormatType::Structured)->Attributes().ToMap()),
- 0);
- try {
- parser->Read(bytes);
- parser->Finish();
- } catch (...) {
- }
- }
-}
-
-TEST(TProtobufFormat, TestParseZeroColumns)
-{
- auto config = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- TCollectingValueConsumer rowCollector;
- auto parser = CreateParserForProtobuf(
- &rowCollector,
- ConvertTo<TProtobufFormatConfigPtr>(config),
- 0);
-
- // Empty lenval values.
- parser->Read("\0\0\0\0"sv);
- parser->Read("\0\0\0\0"sv);
-
- parser->Finish();
-
- ASSERT_EQ(static_cast<ssize_t>(rowCollector.Size()), 2);
- EXPECT_EQ(static_cast<int>(rowCollector.GetRow(0).GetCount()), 0);
- EXPECT_EQ(static_cast<int>(rowCollector.GetRow(1).GetCount()), 0);
-}
-
-TEST(TProtobufFormat, TestWriteEnumerationString)
-{
- auto config = CreateAllFieldsConfig(EProtoFormatType::Structured);
-
- auto nameTable = New<TNameTable>();
-
- TString result;
- TStringOutput resultStream(result);
- auto writer = CreateWriterForProtobuf(
- config->Attributes(),
- {New<TTableSchema>()},
- nameTable,
- CreateAsyncAdapter(&resultStream),
- true,
- New<TControlAttributesConfig>(),
- 0);
-
- writer->Write({
- MakeRow(nameTable, {
- {"Enum", "MinusFortyTwo"}
- }).Get()
- });
- writer->Write({
- MakeRow(nameTable, {
- {"Enum", "Three"},
- }).Get()
- });
-
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TStringInput si(result);
- TLenvalParser parser(&si);
- {
- auto row = parser.Next();
- ASSERT_TRUE(row);
- NYT::TMessage message;
- ASSERT_TRUE(message.ParseFromString(row->RowData));
- ASSERT_EQ(message.enum_field(), NYT::EEnum::MinusFortyTwo);
- }
- {
- auto row = parser.Next();
- ASSERT_TRUE(row);
- NYT::TMessage message;
- ASSERT_TRUE(message.ParseFromString(row->RowData));
- ASSERT_EQ(message.enum_field(), NYT::EEnum::Three);
- }
- {
- auto row = parser.Next();
- ASSERT_FALSE(row);
- }
-}
-
-TEST(TProtobufFormat, TestWriteEnumerationInt)
-{
- auto config = BuildYsonNodeFluently()
- .BeginAttributes()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("Enum")
- .Item("field_number").Value(16)
- .Item("proto_type").Value("enum_int")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndAttributes()
- .Value("protobuf");
-
- auto nameTable = New<TNameTable>();
-
- auto writeAndParseRow = [&] (TUnversionedRow row, TMessage* message) {
- TString result;
- TStringOutput resultStream(result);
- auto writer = CreateWriterForProtobuf(
- config->Attributes(),
- {New<TTableSchema>()},
- nameTable,
- CreateAsyncAdapter(&resultStream),
- true,
- New<TControlAttributesConfig>(),
- 0);
- writer->Write({row});
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TStringInput si(result);
- TLenvalParser parser(&si);
- auto protoRow = parser.Next();
- ASSERT_TRUE(protoRow);
-
- ASSERT_TRUE(message->ParseFromString(protoRow->RowData));
-
- auto nextProtoRow = parser.Next();
- ASSERT_FALSE(nextProtoRow);
- };
-
- {
- TMessage message;
- writeAndParseRow(
- MakeRow(nameTable, {
- {"Enum", -42},
- }).Get(),
- &message);
- ASSERT_EQ(message.enum_field(), EEnum::MinusFortyTwo);
- }
- {
- TMessage message;
- writeAndParseRow(
- MakeRow(nameTable, {
- {"Enum", static_cast<ui64>(std::numeric_limits<i32>::max())},
- }).Get(),
- &message);
- ASSERT_EQ(message.enum_field(), EEnum::MaxInt32);
- }
- {
- TMessage message;
- writeAndParseRow(
- MakeRow(nameTable, {
- {"Enum", std::numeric_limits<i32>::max()},
- }).Get(),
- &message);
- ASSERT_EQ(message.enum_field(), EEnum::MaxInt32);
- }
- {
- TMessage message;
- writeAndParseRow(
- MakeRow(nameTable, {
- {"Enum", std::numeric_limits<i32>::min()},
- }).Get(),
- &message);
- ASSERT_EQ(message.enum_field(), EEnum::MinInt32);
- }
-
- TMessage message;
- ASSERT_THROW(
- writeAndParseRow(
- MakeRow(nameTable, {
- {"Enum", static_cast<i64>(std::numeric_limits<i32>::max()) + 1},
- }).Get(),
- &message),
- TErrorException);
-
- ASSERT_THROW(
- writeAndParseRow(
- MakeRow(nameTable, {
- {"Enum", static_cast<i64>(std::numeric_limits<i32>::min()) - 1},
- }).Get(),
- &message),
- TErrorException);
-
- ASSERT_THROW(
- writeAndParseRow(
- MakeRow(nameTable, {
- {"Enum", static_cast<ui64>(std::numeric_limits<i32>::max()) + 1},
- }).Get(),
- &message),
- TErrorException);
-}
-
-
-TEST(TProtobufFormat, TestWriteZeroColumns)
-{
- auto config = BuildYsonNodeFluently()
- .BeginAttributes()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .EndList()
- .EndMap()
- .EndList()
- .EndAttributes()
- .Value("protobuf");
-
- auto nameTable = New<TNameTable>();
-
- TString result;
- TStringOutput resultStream(result);
- auto writer = CreateWriterForProtobuf(
- config->Attributes(),
- {New<TTableSchema>()},
- nameTable,
- CreateAsyncAdapter(&resultStream),
- true,
- New<TControlAttributesConfig>(),
- 0);
-
- writer->Write({
- MakeRow(nameTable, {
- {"Int64", -1},
- {"String", "this_is_string"},
- }).Get()
- });
- writer->Write({MakeRow(nameTable, { }).Get()});
-
- writer->Close()
- .Get()
- .ThrowOnError();
-
- ASSERT_EQ(result, "\0\0\0\0\0\0\0\0"sv);
-}
-
-TEST(TProtobufFormat, TestTabletIndex)
-{
- auto config = ConvertTo<TProtobufFormatConfigPtr>(BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("int64_field")
- .Item("field_number").Value(3)
- .Item("proto_type").Value("int64")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap());
-
- auto nameTable = New<TNameTable>();
-
- TString result;
- TStringOutput resultStream(result);
- auto controlAttributesConfig = New<TControlAttributesConfig>();
- controlAttributesConfig->EnableTabletIndex = true;
-
- auto writer = CreateWriterForProtobuf(
- config,
- {New<TTableSchema>()},
- nameTable,
- CreateAsyncAdapter(&resultStream),
- true,
- controlAttributesConfig,
- 0);
-
- writer->Write({
- MakeRow(nameTable, {
- {TabletIndexColumnName, 1LL << 50},
- {"int64_field", -2345},
- }).Get(),
- MakeRow(nameTable, {
- {TabletIndexColumnName, 12},
- {"int64_field", 2345},
- }).Get(),
- });
-
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TStringInput si(result);
- TLenvalParser parser(&si);
- {
- auto row = parser.Next();
- ASSERT_TRUE(row);
- ASSERT_EQ(row->TabletIndex, 1ULL << 50);
- NYT::TMessage message;
- ASSERT_TRUE(message.ParseFromString(row->RowData));
- ASSERT_EQ(message.int64_field(), -2345);
- }
- {
- auto row = parser.Next();
- ASSERT_TRUE(row);
- ASSERT_EQ(static_cast<int>(row->TabletIndex), 12);
- NYT::TMessage message;
- ASSERT_TRUE(message.ParseFromString(row->RowData));
- ASSERT_EQ(message.int64_field(), 2345);
- }
- {
- auto row = parser.Next();
- ASSERT_FALSE(row);
- }
-}
-
-TEST(TProtobufFormat, TestContext)
-{
- auto config = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- TCollectingValueConsumer rowCollector;
- auto parser = CreateParserForProtobuf(
- &rowCollector,
- ConvertTo<TProtobufFormatConfigPtr>(config),
- 0);
-
- TString context;
- try {
- TMessage message;
- message.set_string_field("PYSHCH-PYSHCH");
- parser->Read(LenvalBytes(message));
- parser->Finish();
- GTEST_FATAL_FAILURE_("expected to throw");
- } catch (const NYT::TErrorException& e) {
- context = *e.Error().Attributes().Find<TString>("context");
- }
- ASSERT_NE(context.find("PYSHCH-PYSHCH"), TString::npos);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TTableSchemaPtr CreateSchemaWithStructuredMessage()
-{
- auto keyValueStruct = StructLogicalType({
- {"key", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
- {"value", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
- });
-
- return New<TTableSchema>(std::vector<TColumnSchema>{
- {"first", StructLogicalType({
- {"field_missing_from_proto1", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int32))},
- {"enum_field", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"int64_field", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
- {"repeated_int64_field", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- {"another_repeated_int64_field", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- {"message_field", keyValueStruct},
- {"repeated_message_field", ListLogicalType(keyValueStruct)},
- {"any_int64_field", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- {"any_map_field", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Any))},
- {"optional_int64_field", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- {"repeated_optional_any_field", ListLogicalType(OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Any)))},
- {"packed_repeated_enum_field", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
- {"optional_repeated_bool_field", OptionalLogicalType(ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Boolean)))},
- {"oneof_field", VariantStructLogicalType({
- {"oneof_string_field_1", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"oneof_string_field", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"oneof_message_field", keyValueStruct},
- })},
- {"optional_oneof_field", OptionalLogicalType(VariantStructLogicalType({
- {"oneof_string_field_1", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"oneof_string_field", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"oneof_message_field", keyValueStruct},
- }))},
- {"map_field", DictLogicalType(
- SimpleLogicalType(ESimpleLogicalValueType::Int64),
- OptionalLogicalType(keyValueStruct))
- },
- {"field_missing_from_proto2", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int32))},
- })},
- {"repeated_int64_field", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- {"another_repeated_int64_field", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- {"repeated_message_field", ListLogicalType(keyValueStruct)},
- {"second", StructLogicalType({
- {"one", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- {"two", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- {"three", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- })},
- {"any_field", SimpleLogicalType(ESimpleLogicalValueType::Any)},
-
- {"int64_field", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
- {"uint64_field", SimpleLogicalType(ESimpleLogicalValueType::Uint64)},
- {"int32_field", SimpleLogicalType(ESimpleLogicalValueType::Int32)},
- {"uint32_field", SimpleLogicalType(ESimpleLogicalValueType::Uint32)},
-
- {"enum_int_field", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
- {"enum_string_string_field", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"enum_string_int64_field", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
-
- {"repeated_optional_any_field", ListLogicalType(OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Any)))},
-
- {"other_complex_field", StructLogicalType({
- {"one", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- {"two", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- {"three", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- })},
-
- {"utf8_field", SimpleLogicalType(ESimpleLogicalValueType::Utf8)},
-
- {"packed_repeated_int64_field", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
-
- {"optional_repeated_int64_field", OptionalLogicalType(ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64)))},
-
- {"oneof_field", VariantStructLogicalType({
- {"oneof_string_field_1", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"oneof_string_field", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"oneof_message_field", keyValueStruct},
- })},
-
- {"optional_oneof_field", OptionalLogicalType(VariantStructLogicalType({
- {"oneof_string_field_1", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"oneof_string_field", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"oneof_message_field", keyValueStruct},
- }))},
-
- {"map_field", DictLogicalType(
- SimpleLogicalType(ESimpleLogicalValueType::Int64),
- OptionalLogicalType(keyValueStruct))
- },
- });
-}
-
-INodePtr CreateConfigWithStructuredMessage(EComplexTypeMode complexTypeMode, EProtoFormatType formatType)
-{
- if (formatType == EProtoFormatType::FileDescriptor) {
- return CreateFileDescriptorConfig<TMessageWithStructuredEmbedded>(complexTypeMode);
- }
- YT_VERIFY(formatType == EProtoFormatType::Structured);
-
- auto buildOneofConfig = [] (TString prefix, int fieldNumberOffset) {
- return BuildYsonNodeFluently()
- .BeginMap()
- .Item("name").Value(prefix + "oneof_field")
- .Item("proto_type").Value("oneof")
- .Item("fields").BeginList()
- .Item().BeginMap()
- .Item("name").Value(prefix + "oneof_string_field_1")
- .Item("field_number").Value(101 + fieldNumberOffset)
- .Item("proto_type").Value("string")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value(prefix + "oneof_string_field")
- .Item("field_number").Value(102 + fieldNumberOffset)
- .Item("proto_type").Value("string")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value(prefix + "oneof_message_field")
- .Item("field_number").Value(1000 + fieldNumberOffset)
- .Item("proto_type").Value("structured_message")
- .Item("fields").BeginList()
- .Item().BeginMap()
- .Item("name").Value("key")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("string")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("value")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("string")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
- };
- auto oneofConfig = buildOneofConfig("", 0);
- auto optionalOneofConfig = buildOneofConfig("optional_", 1000);
-
- auto keyValueFields = BuildYsonStringFluently()
- .BeginList()
- .Item().BeginMap()
- .Item("name").Value("key")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("string")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("value")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("string")
- .EndMap()
- .EndList();
-
- return BuildYsonNodeFluently()
- .BeginAttributes()
- .Item("enumerations").Value(EnumerationsConfig)
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("first")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("structured_message")
- .Item("fields")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("int64_field")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("int64")
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("enum_field")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("enum_string")
- .Item("enumeration_name").Value("EEnum")
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("packed_repeated_enum_field")
- .Item("field_number").Value(11)
- .Item("proto_type").Value("enum_string")
- .Item("enumeration_name").Value("EEnum")
- .Item("repeated").Value(true)
- .Item("packed").Value(true)
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("message_field")
- .Item("field_number").Value(4)
- .Item("proto_type").Value("structured_message")
- .Item("fields").Value(keyValueFields)
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("repeated_int64_field")
- .Item("field_number").Value(3)
- .Item("proto_type").Value("int64")
- .Item("repeated").Value(true)
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("another_repeated_int64_field")
- .Item("field_number").Value(9)
- .Item("proto_type").Value("int64")
- .Item("repeated").Value(true)
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("repeated_message_field")
- .Item("field_number").Value(5)
- .Item("proto_type").Value("structured_message")
- .Item("repeated").Value(true)
- .Item("fields").Value(keyValueFields)
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("any_int64_field")
- .Item("field_number").Value(6)
- .Item("proto_type").Value("any")
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("any_map_field")
- .Item("field_number").Value(7)
- .Item("proto_type").Value("any")
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("optional_int64_field")
- .Item("field_number").Value(8)
- .Item("proto_type").Value("int64")
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("repeated_optional_any_field")
- .Item("field_number").Value(10)
- .Item("proto_type").Value("any")
- .Item("repeated").Value(true)
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("optional_repeated_bool_field")
- .Item("field_number").Value(12)
- .Item("proto_type").Value("bool")
- .Item("repeated").Value(true)
- .EndMap()
- .Item().Value(oneofConfig)
- .Item().Value(optionalOneofConfig)
- .Item()
- .BeginMap()
- .Item("name").Value("map_field")
- .Item("field_number").Value(13)
- .Item("proto_type").Value("structured_message")
- .Item("repeated").Value(true)
- .Item("fields")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("key")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("int64")
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("value")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("structured_message")
- .Item("fields").Value(keyValueFields)
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("second")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("structured_message")
- .Item("fields")
- .BeginList()
- .Item().BeginMap()
- .Item("name").Value("one")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("int64")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("two")
- .Item("field_number").Value(500000000)
- .Item("proto_type").Value("int64")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("three")
- .Item("field_number").Value(100500)
- .Item("proto_type").Value("int64")
- .EndMap()
- .EndList()
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("repeated_message_field")
- .Item("field_number").Value(3)
- .Item("proto_type").Value("structured_message")
- .Item("repeated").Value(true)
- .Item("fields")
- .BeginList()
- .Item().BeginMap()
- .Item("name").Value("key")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("string")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("value")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("string")
- .EndMap()
- .EndList()
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("repeated_int64_field")
- .Item("field_number").Value(4)
- .Item("proto_type").Value("int64")
- .Item("repeated").Value(true)
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("another_repeated_int64_field")
- .Item("field_number").Value(13)
- .Item("proto_type").Value("int64")
- .Item("repeated").Value(true)
- .EndMap()
- .Item()
- .BeginMap()
- // In schema it is of type "any".
- .Item("name").Value("any_field")
- .Item("field_number").Value(5)
- .Item("proto_type").Value("int64")
- .EndMap()
- // The next fields are for type casting testing
- .Item()
- .BeginMap()
- // In schema it is of type "int64".
- .Item("name").Value("int64_field")
- .Item("field_number").Value(6)
- .Item("proto_type").Value("int32")
- .EndMap()
- .Item()
- .BeginMap()
- // In schema it is of type "uint64".
- .Item("name").Value("uint64_field")
- .Item("field_number").Value(7)
- .Item("proto_type").Value("uint32")
- .EndMap()
- .Item()
- .BeginMap()
- // In schema it is of type "int32".
- .Item("name").Value("int32_field")
- .Item("field_number").Value(8)
- .Item("proto_type").Value("int64")
- .EndMap()
- .Item()
- .BeginMap()
- // In schema it is of type "uint32".
- .Item("name").Value("uint32_field")
- .Item("field_number").Value(9)
- .Item("proto_type").Value("uint64")
- .EndMap()
-
- // Enums.
- .Item()
- .BeginMap()
- .Item("name").Value("enum_int_field")
- .Item("field_number").Value(10)
- .Item("proto_type").Value("enum_int")
- .Item("enumeration_name").Value("EEnum")
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("enum_string_string_field")
- .Item("field_number").Value(11)
- .Item("proto_type").Value("enum_string")
- .Item("enumeration_name").Value("EEnum")
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("enum_string_int64_field")
- .Item("field_number").Value(12)
- .Item("proto_type").Value("enum_string")
- .Item("enumeration_name").Value("EEnum")
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("utf8_field")
- .Item("field_number").Value(16)
- .Item("proto_type").Value("string")
- .EndMap()
-
- // list<optional<any>>.
- .Item()
- .BeginMap()
- .Item("name").Value("repeated_optional_any_field")
- .Item("field_number").Value(14)
- .Item("proto_type").Value("any")
- .Item("repeated").Value(true)
- .EndMap()
-
- // Other columns.
- .Item()
- .BeginMap()
- .Item("name").Value("other_columns_field")
- .Item("field_number").Value(15)
- .Item("proto_type").Value("other_columns")
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("packed_repeated_int64_field")
- .Item("field_number").Value(17)
- .Item("proto_type").Value("int64")
- .Item("repeated").Value(true)
- .Item("packed").Value(true)
- .EndMap()
-
- .Item()
- .BeginMap()
- .Item("name").Value("optional_repeated_int64_field")
- .Item("field_number").Value(18)
- .Item("proto_type").Value("int64")
- .Item("repeated").Value(true)
- .EndMap()
-
- .Item().Value(oneofConfig)
- .Item().Value(optionalOneofConfig)
-
- .Item()
- .BeginMap()
- .Item("name").Value("map_field")
- .Item("field_number").Value(19)
- .Item("proto_type").Value("structured_message")
- .Item("repeated").Value(true)
- .Item("fields")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("key")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("int64")
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("value")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("structured_message")
- .Item("fields").Value(keyValueFields)
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .Item("complex_type_mode").Value(complexTypeMode)
- .EndAttributes()
- .Value("protobuf");
-}
-
-using TProtobufFormatStructuredMessageParameter = std::tuple<EComplexTypeMode, int, EProtoFormatType>;
-
-class TProtobufFormatStructuredMessage
- : public ::testing::TestWithParam<TProtobufFormatStructuredMessageParameter>
-{ };
-
-INSTANTIATE_TEST_SUITE_P(
- FileDescriptor,
- TProtobufFormatStructuredMessage,
- ::testing::Values(TProtobufFormatStructuredMessageParameter{
- EComplexTypeMode::Positional,
- 1,
- EProtoFormatType::FileDescriptor}));
-
-INSTANTIATE_TEST_SUITE_P(
- Positional,
- TProtobufFormatStructuredMessage,
- ::testing::Values(TProtobufFormatStructuredMessageParameter{
- EComplexTypeMode::Positional,
- 1,
- EProtoFormatType::Structured}));
-
-INSTANTIATE_TEST_SUITE_P(
- Named,
- TProtobufFormatStructuredMessage,
- ::testing::Values(TProtobufFormatStructuredMessageParameter{
- EComplexTypeMode::Named,
- 1,
- EProtoFormatType::Structured}));
-
-INSTANTIATE_TEST_SUITE_P(
- ManyRows,
- TProtobufFormatStructuredMessage,
- ::testing::Values(TProtobufFormatStructuredMessageParameter{
- EComplexTypeMode::Named,
- 30000,
- EProtoFormatType::Structured}));
-
-TEST_P(TProtobufFormatStructuredMessage, EmbeddedWrite)
-{
- auto [complexTypeMode, rowCount, protoFormatType] = GetParam();
-
- auto nameTable = New<TNameTable>();
- auto numId = nameTable->RegisterName("num");
- auto embeddedNumId = nameTable->RegisterName("embedded_num");
- auto variantId = nameTable->RegisterName("variant");
- auto embedded2NumId = nameTable->RegisterName("embedded2_num");
- auto embedded2StructId = nameTable->RegisterName("embedded2_struct");
- auto embedded2RepeatedId = nameTable->RegisterName("embedded2_repeated");
- auto extraIntId = nameTable->RegisterName("extra_int");
- auto otherComplexFieldId = nameTable->RegisterName("other_complex_field");
-
- //message T2 {
- // optional ui64 embedded2_num;
- //};
- //message T1 {
- // required T2 t2 [embedded];
- // optional ui64 embedded_num;
- //};
- //
- //message T {
- // required T1 t1 [embedded];
- // optional ui64 num;
- //};
-
- auto schema = BuildEmbeddedSchema();
- auto config = BuildEmbeddedConfig(complexTypeMode, protoFormatType);
-
- TString result;
- TStringOutput resultStream(result);
- auto writer = CreateWriterForProtobuf(
- ConvertTo<TProtobufFormatConfigPtr>(config->Attributes()),
- {schema},
- nameTable,
- CreateAsyncAdapter(&resultStream),
- true,
- New<TControlAttributesConfig>(),
- 0);
-
- TUnversionedRowBuilder builder;
- builder.AddValue(MakeUnversionedUint64Value(789, numId));
- builder.AddValue(MakeUnversionedUint64Value(123, embeddedNumId));
- builder.AddValue(MakeUnversionedUint64Value(456, embedded2NumId));
- builder.AddValue(MakeUnversionedCompositeValue("[1; 555u]", variantId));
- auto embeddedYson = BuildYsonStringFluently()
- .BeginList()
- // float1
- .Item().Value(1.5f)
- // string1
- .Item().Value("abc")
- .EndList();
- auto embeddedYsonStr = embeddedYson.ToString();
- builder.AddValue(MakeUnversionedCompositeValue(embeddedYsonStr, embedded2StructId));
- auto repeatedYsonStr = BuildYsonStringFluently()
- .BeginList()
- .Item().Value("a")
- .Item().Value("b")
- .EndList()
- .ToString();
- builder.AddValue(MakeUnversionedCompositeValue(repeatedYsonStr, embedded2RepeatedId));
- builder.AddValue(MakeUnversionedInt64Value(111, extraIntId));
- auto otherComplexFieldYson = BuildYsonStringFluently()
- .BeginList()
- .Item().Value(22)
- .Item().Value(23)
- .Item().Value(24)
- .EndList();
- auto otherComplexFieldYsonStr = otherComplexFieldYson.ToString();
- builder.AddValue(MakeUnversionedCompositeValue(otherComplexFieldYsonStr, otherComplexFieldId));
-
-
- auto rows = std::vector<TUnversionedRow>(rowCount, builder.GetRow());
- writer->Write(rows);
-
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TStringInput input(result);
- TLenvalParser lenvalParser(&input);
-
- for (int rowIndex = 0; rowIndex < rowCount; ++rowIndex) {
- auto entry = lenvalParser.Next();
- ASSERT_TRUE(entry);
-
- NYT::TEmbeddingMessage message;
- ASSERT_TRUE(message.ParseFromString(entry->RowData));
-
- EXPECT_EQ(message.num(), 789UL);
- EXPECT_EQ(message.t1().embedded_num(), 123UL);
- EXPECT_EQ(message.t1().t2().embedded2_num(), 456UL);
-
- EXPECT_FALSE(message.t1().has_str_variant());
- EXPECT_TRUE(message.t1().has_uint_variant());
- EXPECT_EQ(message.t1().uint_variant(), 555UL);
-
- EXPECT_EQ(message.t1().t2().embedded2_struct().float1(), 1.5f);
- EXPECT_EQ(message.t1().t2().embedded2_struct().string1(), "abc");
-
- ASSERT_EQ(message.t1().t2().embedded2_repeated_size(), 2);
- EXPECT_EQ(message.t1().t2().embedded2_repeated(0), "a");
- EXPECT_EQ(message.t1().t2().embedded2_repeated(1), "b");
-
- {
- auto otherColumns = ConvertToNode(TYsonString(message.other_columns_field()))->AsMap();
- auto mode = complexTypeMode;
- auto expected = ([&] {
- switch (mode) {
- case EComplexTypeMode::Named:
- return BuildYsonNodeFluently()
- .BeginMap()
- .Item("one").Value(22)
- .Item("two").Value(23)
- .Item("three").Value(24)
- .EndMap();
- case EComplexTypeMode::Positional:
- return ConvertToNode(otherComplexFieldYson);
- }
- YT_ABORT();
- })();
-
- EXPECT_NODES_EQUAL(expected, otherColumns->GetChildOrThrow("other_complex_field"));
- EXPECT_EQ(ConvertTo<i64>(otherColumns->GetChildOrThrow("extra_int")), 111);
- }
-
- ASSERT_FALSE(message.has_extra_field());
- ASSERT_FALSE(message.t1().has_embedded_extra_field());
- }
-
- ASSERT_FALSE(lenvalParser.Next());
-}
-
-TEST_P(TProtobufFormatStructuredMessage, Write)
-{
- auto [complexTypeMode, rowCount, protoFormatType] = GetParam();
-
- auto nameTable = New<TNameTable>();
- auto firstId = nameTable->RegisterName("first");
- auto secondId = nameTable->RegisterName("second");
- auto repeatedMessageId = nameTable->RegisterName("repeated_message_field");
- auto repeatedInt64Id = nameTable->RegisterName("repeated_int64_field");
- auto anotherRepeatedInt64Id = nameTable->RegisterName("another_repeated_int64_field");
- auto anyFieldId = nameTable->RegisterName("any_field");
- auto int64FieldId = nameTable->RegisterName("int64_field");
- auto uint64FieldId = nameTable->RegisterName("uint64_field");
- auto int32FieldId = nameTable->RegisterName("int32_field");
- auto uint32FieldId = nameTable->RegisterName("uint32_field");
- auto enumIntFieldId = nameTable->RegisterName("enum_int_field");
- auto enumStringStringFieldId = nameTable->RegisterName("enum_string_string_field");
- auto enumStringInt64FieldId = nameTable->RegisterName("enum_string_int64_field");
- auto utf8FieldId = nameTable->RegisterName("utf8_field");
- auto repeatedOptionalAnyFieldId = nameTable->RegisterName("repeated_optional_any_field");
- auto otherComplexFieldId = nameTable->RegisterName("other_complex_field");
- auto packedRepeatedInt64FieldId = nameTable->RegisterName("packed_repeated_int64_field");
- auto optionalRepeatedInt64FieldId = nameTable->RegisterName("optional_repeated_int64_field");
- auto oneofFieldId = nameTable->RegisterName("oneof_field");
- auto optionalOneofFieldId = nameTable->RegisterName("optional_oneof_field");
- auto mapFieldId = nameTable->RegisterName("map_field");
-
- auto schema = CreateSchemaWithStructuredMessage();
- auto config = CreateConfigWithStructuredMessage(complexTypeMode, protoFormatType);
-
- TString result;
- TStringOutput resultStream(result);
- auto writer = CreateWriterForProtobuf(
- ConvertTo<TProtobufFormatConfigPtr>(config->Attributes()),
- {schema},
- nameTable,
- CreateAsyncAdapter(&resultStream),
- true,
- New<TControlAttributesConfig>(),
- 0);
-
- auto firstYsonStr = BuildYsonStringFluently()
- .BeginList()
- // field_missing_from_proto1
- .Item().Value(11111)
- // enum_field
- .Item().Value("Two")
- // int64_field
- .Item().Value(44)
- // repeated_int64_field
- .Item()
- .BeginList()
- .Item().Value(55)
- .Item().Value(56)
- .Item().Value(57)
- .EndList()
- // another_repeated_int64_field
- .Item()
- .BeginList()
- .EndList()
- // message_field
- .Item()
- .BeginList()
- .Item().Value("key")
- .Item().Value("value")
- .EndList()
- // repeated_message_field
- .Item()
- .BeginList()
- .Item()
- .BeginList()
- .Item().Value("key1")
- .Item().Value("value1")
- .EndList()
- .Item()
- .BeginList()
- .Item().Value("key2")
- .Item().Value("value2")
- .EndList()
- .EndList()
- // any_int64_field
- .Item().Value(45)
- // any_map_field
- .Item()
- .BeginMap()
- .Item("key").Value("value")
- .EndMap()
- // optional_int64_field
- .Item().Entity()
- // repeated_optional_any_field
- .Item()
- .BeginList()
- .Item().Value(2)
- .Item().Entity()
- .Item().Value("foo")
- .EndList()
- // packed_repeated_enum_field
- .Item()
- .BeginList()
- .Item().Value("MinusFortyTwo")
- .Item().Value("Two")
- .EndList()
- // optional_repeated_bool_field
- .Item()
- .BeginList()
- .Item().Value(false)
- .Item().Value(true)
- .Item().Value(false)
- .EndList()
- // oneof_field
- .Item()
- .BeginList()
- // message_field
- .Item().Value(2)
- .Item().BeginList()
- .Item().Value("foo")
- .Item().Entity()
- .EndList()
- .EndList()
- // optional_oneof_field
- .Item()
- .Entity()
- // map_field
- .Item()
- .BeginList()
- .Item().BeginList()
- .Item().Value(13)
- .Item().BeginList()
- .Item().Value("bac")
- .Item().Value("cab")
- .EndList()
- .EndList()
- .Item().BeginList()
- .Item().Value(15)
- .Item().BeginList()
- .Item().Value("ya")
- .Item().Value("make")
- .EndList()
- .EndList()
- .EndList()
- .EndList()
- .ToString();
-
- auto secondYsonStr = BuildYsonStringFluently()
- .BeginList()
- .Item().Value(101)
- .Item().Value(102)
- .Item().Value(103)
- .EndList()
- .ToString();
-
- auto repeatedMessageYsonStr = BuildYsonStringFluently()
- .BeginList()
- .Item()
- .BeginList()
- .Item().Value("key11")
- .Item().Value("value11")
- .EndList()
- .Item()
- .BeginList()
- .Item().Value("key21")
- .Item().Value("value21")
- .EndList()
- .EndList()
- .ToString();
-
- auto repeatedInt64Yson = BuildYsonStringFluently()
- .BeginList()
- .Item().Value(31)
- .Item().Value(32)
- .Item().Value(33)
- .EndList();
- auto repeatedInt64YsonStr = repeatedInt64Yson.ToString();
-
- auto anotherRepeatedInt64YsonStr = BuildYsonStringFluently()
- .BeginList()
- .EndList()
- .ToString();
-
- auto repeatedOptionalAnyYson = BuildYsonStringFluently()
- .BeginList()
- .Item().Value(1)
- .Item().Value("abc")
- .Item().Entity()
- .Item().Value(true)
- .EndList();
- auto repeatedOptionalAnyYsonStr = repeatedOptionalAnyYson.ToString();
-
- auto otherComplexFieldYson = BuildYsonStringFluently()
- .BeginList()
- .Item().Value(22)
- .Item().Value(23)
- .Item().Value(24)
- .EndList();
- auto otherComplexFieldYsonStr = otherComplexFieldYson.ToString();
-
- TUnversionedRowBuilder builder;
- builder.AddValue(MakeUnversionedCompositeValue(firstYsonStr, firstId));
- builder.AddValue(MakeUnversionedCompositeValue(secondYsonStr, secondId));
- builder.AddValue(MakeUnversionedCompositeValue(repeatedMessageYsonStr, repeatedMessageId));
- builder.AddValue(MakeUnversionedCompositeValue(repeatedInt64YsonStr, repeatedInt64Id));
- builder.AddValue(MakeUnversionedCompositeValue(anotherRepeatedInt64YsonStr, anotherRepeatedInt64Id));
- builder.AddValue(MakeUnversionedInt64Value(4321, anyFieldId));
-
- builder.AddValue(MakeUnversionedInt64Value(-64, int64FieldId));
- builder.AddValue(MakeUnversionedUint64Value(64, uint64FieldId));
- builder.AddValue(MakeUnversionedInt64Value(-32, int32FieldId));
- builder.AddValue(MakeUnversionedUint64Value(32, uint32FieldId));
-
- builder.AddValue(MakeUnversionedInt64Value(-42, enumIntFieldId));
- builder.AddValue(MakeUnversionedStringValue("Three", enumStringStringFieldId));
- builder.AddValue(MakeUnversionedInt64Value(1, enumStringInt64FieldId));
-
- const auto HelloWorldInRussian = "\xd0\x9f\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82, \xd0\xbc\xd0\xb8\xd1\x80!";
- builder.AddValue(MakeUnversionedStringValue(HelloWorldInRussian, utf8FieldId));
-
- builder.AddValue(MakeUnversionedCompositeValue(repeatedOptionalAnyYsonStr, repeatedOptionalAnyFieldId));
-
- builder.AddValue(MakeUnversionedCompositeValue(otherComplexFieldYsonStr, otherComplexFieldId));
-
- builder.AddValue(MakeUnversionedCompositeValue("[12;-10;123456789000;]", packedRepeatedInt64FieldId));
-
- builder.AddValue(MakeUnversionedCompositeValue("[1;2;3]", optionalRepeatedInt64FieldId));
-
- builder.AddValue(MakeUnversionedCompositeValue("[0; foobaz]", oneofFieldId));
- builder.AddValue(MakeUnversionedNullValue(optionalOneofFieldId));
-
- builder.AddValue(MakeUnversionedCompositeValue("[[2; [x; y]]; [5; [z; w]]]", mapFieldId));
-
- auto rows = std::vector<TUnversionedRow>(rowCount, builder.GetRow());
- writer->Write(rows);
-
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TStringInput input(result);
- TLenvalParser lenvalParser(&input);
-
- for (int rowIndex = 0; rowIndex < rowCount; ++rowIndex) {
- auto entry = lenvalParser.Next();
- ASSERT_TRUE(entry);
-
- NYT::TMessageWithStructuredEmbedded message;
- ASSERT_TRUE(message.ParseFromString(entry->RowData));
-
- const auto& first = message.first();
- EXPECT_EQ(first.enum_field(), EEnum::Two);
- EXPECT_EQ(first.int64_field(), 44);
- std::vector<i64> firstRepeatedInt64Field(
- first.repeated_int64_field().begin(),
- first.repeated_int64_field().end());
- EXPECT_EQ(firstRepeatedInt64Field, (std::vector<i64>{55, 56, 57}));
- std::vector<i64> firstAnotherRepeatedInt64Field(
- first.another_repeated_int64_field().begin(),
- first.another_repeated_int64_field().end());
- EXPECT_EQ(firstAnotherRepeatedInt64Field, (std::vector<i64>{}));
- EXPECT_EQ(first.message_field().key(), "key");
- EXPECT_EQ(first.message_field().value(), "value");
- ASSERT_EQ(first.repeated_message_field_size(), 2);
- EXPECT_EQ(first.repeated_message_field(0).key(), "key1");
- EXPECT_EQ(first.repeated_message_field(0).value(), "value1");
- EXPECT_EQ(first.repeated_message_field(1).key(), "key2");
- EXPECT_EQ(first.repeated_message_field(1).value(), "value2");
-
- EXPECT_NODES_EQUAL(
- ConvertToNode(TYsonString(first.any_int64_field())),
- BuildYsonNodeFluently().Value(45));
-
- EXPECT_NODES_EQUAL(
- ConvertToNode(TYsonString(first.any_map_field())),
- BuildYsonNodeFluently().BeginMap()
- .Item("key").Value("value")
- .EndMap());
-
- std::vector<TYsonString> firstRepeatedOptionalAnyField(
- first.repeated_optional_any_field().begin(),
- first.repeated_optional_any_field().end());
-
- EXPECT_NODES_EQUAL(
- ConvertToNode(firstRepeatedOptionalAnyField),
- BuildYsonNodeFluently()
- .BeginList()
- .Item().Value(2)
- .Item().Entity()
- .Item().Value("foo")
- .EndList());
-
- EXPECT_FALSE(first.has_optional_int64_field());
-
- std::vector<EEnum> actualFirstPackedRepeatedEnumField;
- for (auto x : first.packed_repeated_enum_field()) {
- actualFirstPackedRepeatedEnumField.push_back(static_cast<EEnum>(x));
- }
- auto expectedFirstPackedRepeatedEnumField = std::vector<EEnum>{EEnum::MinusFortyTwo, EEnum::Two};
- EXPECT_EQ(expectedFirstPackedRepeatedEnumField, actualFirstPackedRepeatedEnumField);
-
- std::vector<bool> firstOptionalRepeatedBoolField(
- first.optional_repeated_bool_field().begin(),
- first.optional_repeated_bool_field().end());
- auto expectedFirstOptionalRepeatedBoolField = std::vector<bool>{false, true, false};
- EXPECT_EQ(expectedFirstOptionalRepeatedBoolField, firstOptionalRepeatedBoolField);
-
- EXPECT_FALSE(first.has_oneof_string_field_1());
- EXPECT_FALSE(first.has_oneof_string_field());
- EXPECT_TRUE(first.has_oneof_message_field());
- EXPECT_EQ(first.oneof_message_field().key(), "foo");
- EXPECT_FALSE(first.oneof_message_field().has_value());
-
- EXPECT_FALSE(first.has_optional_oneof_string_field_1());
- EXPECT_FALSE(first.has_optional_oneof_string_field());
- EXPECT_FALSE(first.has_optional_oneof_message_field());
-
- EXPECT_EQ(std::ssize(first.map_field()), 2);
- ASSERT_EQ(static_cast<int>(first.map_field().count(13)), 1);
- EXPECT_EQ(first.map_field().at(13).key(), "bac");
- EXPECT_EQ(first.map_field().at(13).value(), "cab");
- ASSERT_EQ(static_cast<int>(first.map_field().count(15)), 1);
- EXPECT_EQ(first.map_field().at(15).key(), "ya");
- EXPECT_EQ(first.map_field().at(15).value(), "make");
-
- const auto& second = message.second();
- EXPECT_EQ(second.one(), 101);
- EXPECT_EQ(second.two(), 102);
- EXPECT_EQ(second.three(), 103);
-
- ASSERT_EQ(message.repeated_message_field_size(), 2);
- EXPECT_EQ(message.repeated_message_field(0).key(), "key11");
- EXPECT_EQ(message.repeated_message_field(0).value(), "value11");
- EXPECT_EQ(message.repeated_message_field(1).key(), "key21");
- EXPECT_EQ(message.repeated_message_field(1).value(), "value21");
-
- std::vector<i64> repeatedInt64Field(
- message.repeated_int64_field().begin(),
- message.repeated_int64_field().end());
- EXPECT_EQ(repeatedInt64Field, (std::vector<i64>{31, 32, 33}));
-
- std::vector<i64> anotherRepeatedInt64Field(
- message.another_repeated_int64_field().begin(),
- message.another_repeated_int64_field().end());
- EXPECT_EQ(anotherRepeatedInt64Field, (std::vector<i64>{}));
-
- EXPECT_EQ(message.int64_any_field(), 4321);
-
- // Note the reversal of 32 <-> 64.
- EXPECT_EQ(message.int32_field(), -64);
- EXPECT_EQ(message.uint32_field(), 64u);
- EXPECT_EQ(message.int64_field(), -32);
- EXPECT_EQ(message.uint64_field(), 32u);
-
- EXPECT_EQ(message.enum_int_field(), EEnum::MinusFortyTwo);
- EXPECT_EQ(message.enum_string_string_field(), EEnum::Three);
- EXPECT_EQ(message.enum_string_int64_field(), EEnum::One);
-
- EXPECT_EQ(message.utf8_field(), HelloWorldInRussian);
-
- std::vector<TYsonString> repeatedOptionalAnyField(
- message.repeated_optional_any_field().begin(),
- message.repeated_optional_any_field().end());
- EXPECT_NODES_EQUAL(ConvertToNode(repeatedOptionalAnyField), ConvertToNode(repeatedOptionalAnyYson));
-
- {
- auto otherColumns = ConvertToNode(TYsonString(message.other_columns_field()))->AsMap();
- auto mode = complexTypeMode;
- auto expected = ([&] {
- switch (mode) {
- case EComplexTypeMode::Named:
- return BuildYsonNodeFluently()
- .BeginMap()
- .Item("one").Value(22)
- .Item("two").Value(23)
- .Item("three").Value(24)
- .EndMap();
- case EComplexTypeMode::Positional:
- return ConvertToNode(otherComplexFieldYson);
- }
- YT_ABORT();
- })();
-
- EXPECT_NODES_EQUAL(expected, otherColumns->GetChildOrThrow("other_complex_field"));
- }
-
- std::vector<i64> actualPackedRepeatedInt64Field(
- message.packed_repeated_int64_field().begin(),
- message.packed_repeated_int64_field().end());
- auto expectedPackedRepeatedInt64Field = std::vector<i64>{12, -10, 123456789000LL};
- EXPECT_EQ(expectedPackedRepeatedInt64Field, actualPackedRepeatedInt64Field);
-
- std::vector<i64> actualOptionalRepeatedInt64Field(
- message.optional_repeated_int64_field().begin(),
- message.optional_repeated_int64_field().end());
- auto expectedOptionalRepeatedInt64Field = std::vector<i64>{1, 2, 3};
- EXPECT_EQ(expectedOptionalRepeatedInt64Field, actualOptionalRepeatedInt64Field);
-
- EXPECT_TRUE(message.has_oneof_string_field_1());
- EXPECT_EQ(message.oneof_string_field_1(), "foobaz");
- EXPECT_FALSE(message.has_oneof_string_field());
- EXPECT_FALSE(message.has_oneof_message_field());
-
- EXPECT_FALSE(message.has_optional_oneof_string_field_1());
- EXPECT_FALSE(message.has_optional_oneof_string_field());
- EXPECT_FALSE(message.has_optional_oneof_message_field());
-
- EXPECT_EQ(std::ssize(message.map_field()), 2);
- ASSERT_EQ(static_cast<int>(message.map_field().count(2)), 1);
- EXPECT_EQ(message.map_field().at(2).key(), "x");
- EXPECT_EQ(message.map_field().at(2).value(), "y");
- ASSERT_EQ(static_cast<int>(message.map_field().count(5)), 1);
- EXPECT_EQ(message.map_field().at(5).key(), "z");
- EXPECT_EQ(message.map_field().at(5).value(), "w");
- }
-
- ASSERT_FALSE(lenvalParser.Next());
-}
-
-INodePtr SortMapByKey(const INodePtr& node)
-{
- auto keyValuePairs = ConvertTo<std::vector<std::pair<i64, INodePtr>>>(node);
- std::sort(std::begin(keyValuePairs), std::end(keyValuePairs));
- return ConvertTo<INodePtr>(keyValuePairs);
-}
-
-TEST_P(TProtobufFormatStructuredMessage, EmbeddedParse)
-{
- auto [complexTypeMode, rowCount, protoFormatType] = GetParam();
-
- auto schema = BuildEmbeddedSchema();
- auto config = BuildEmbeddedConfig(complexTypeMode, protoFormatType);
-
- NYT::TEmbeddingMessage message;
-
- message.set_num(789);
- auto* t1 = message.mutable_t1();
- t1->set_embedded_num(123);
- auto* t2 = t1->mutable_t2();
- t2->set_embedded2_num(456);
- t1->set_uint_variant(555);
- t2->add_embedded2_repeated("a");
- t2->add_embedded2_repeated("b");
- t2->add_embedded2_repeated("c");
- auto* embedded2_struct = t2->mutable_embedded2_struct();
- embedded2_struct->set_float1(1.5f);
- embedded2_struct->set_string1("abc");
-
- //message.set_extra_field("*");
- //t1->set_embedded_extra_field("*");
-
- auto rowCollector = ParseRows(message, config, schema, rowCount);
- for (int rowIndex = 0; rowIndex < rowCount; ++rowIndex) {
- EXPECT_EQ(GetUint64(rowCollector.GetRowValue(rowIndex, "num")), 789u);
- EXPECT_EQ(GetUint64(rowCollector.GetRowValue(rowIndex, "embedded_num")), 123u);
- EXPECT_EQ(GetUint64(rowCollector.GetRowValue(rowIndex, "embedded2_num")), 456u);
- EXPECT_NODES_EQUAL(
- GetComposite(rowCollector.GetRowValue(rowIndex, "variant")),
- ConvertToNode(TYsonString(TStringBuf("[1; 555u]"))));
-
- auto embedded2_repeatedNode = GetComposite(rowCollector.GetRowValue(rowIndex, "embedded2_repeated"));
- ASSERT_EQ(embedded2_repeatedNode->GetType(), ENodeType::List);
- const auto& embedded2_repeatedList = embedded2_repeatedNode->AsList();
- ASSERT_EQ(embedded2_repeatedList->GetChildCount(), 3);
- EXPECT_EQ(embedded2_repeatedList->GetChildValueOrThrow<TString>(0), "a");
- EXPECT_EQ(embedded2_repeatedList->GetChildValueOrThrow<TString>(1), "b");
- EXPECT_EQ(embedded2_repeatedList->GetChildValueOrThrow<TString>(2), "c");
-
- auto embedded2_structNode = GetComposite(rowCollector.GetRowValue(rowIndex, "embedded2_struct"));
- ASSERT_EQ(embedded2_structNode->GetType(), ENodeType::List);
- const auto& embedded2_structList = embedded2_structNode->AsList();
- ASSERT_EQ(embedded2_structList->GetChildCount(), 2);
- EXPECT_EQ(embedded2_structList->GetChildValueOrThrow<double>(0), 1.5f);
- EXPECT_EQ(embedded2_structList->GetChildValueOrThrow<TString>(1), "abc");
- }
-}
-
-TEST_P(TProtobufFormatStructuredMessage, Parse)
-{
- auto [complexTypeMode, rowCount, protoFormatType] = GetParam();
-
- auto schema = CreateSchemaWithStructuredMessage();
- auto config = CreateConfigWithStructuredMessage(complexTypeMode, protoFormatType);
-
- NYT::TMessageWithStructuredEmbedded message;
-
- auto* first = message.mutable_first();
- first->set_enum_field(EEnum::Two);
- first->set_int64_field(44);
-
- first->add_repeated_int64_field(55);
- first->add_repeated_int64_field(56);
- first->add_repeated_int64_field(57);
-
- // another_repeated_int64_field is intentionally empty.
-
- first->mutable_message_field()->set_key("key");
- first->mutable_message_field()->set_value("value");
- auto* firstSubfield1 = first->add_repeated_message_field();
- firstSubfield1->set_key("key1");
- firstSubfield1->set_value("value1");
- auto* firstSubfield2 = first->add_repeated_message_field();
- firstSubfield2->set_key("key2");
- firstSubfield2->set_value("value2");
-
- first->set_any_int64_field(BuildYsonStringFluently().Value(4422).ToString());
- first->set_any_map_field(
- BuildYsonStringFluently()
- .BeginMap()
- .Item("key").Value("value")
- .EndMap()
- .ToString());
-
- first->add_repeated_optional_any_field("%false");
- first->add_repeated_optional_any_field("42");
- first->add_repeated_optional_any_field("#");
-
- first->add_packed_repeated_enum_field(EEnum::MaxInt32);
- first->add_packed_repeated_enum_field(EEnum::MinusFortyTwo);
-
- // optional_repeated_bool_field is intentionally empty.
-
- first->mutable_oneof_message_field()->set_key("KEY");
-
- // optional_oneof_field is intentionally empty.
-
- (*first->mutable_map_field())[111].set_key("key111");
- (*first->mutable_map_field())[111].set_value("value111");
- (*first->mutable_map_field())[222].set_key("key222");
- (*first->mutable_map_field())[222].set_value("value222");
-
- auto* second = message.mutable_second();
- second->set_one(101);
- second->set_two(102);
- second->set_three(103);
-
- message.add_repeated_int64_field(31);
- message.add_repeated_int64_field(32);
- message.add_repeated_int64_field(33);
-
- // another_repeated_int64_field is intentionally empty.
-
- auto* subfield1 = message.add_repeated_message_field();
- subfield1->set_key("key11");
- subfield1->set_value("value11");
- auto* subfield2 = message.add_repeated_message_field();
- subfield2->set_key("key21");
- subfield2->set_value("value21");
-
- message.set_int64_any_field(4321);
-
- // Note the reversal of 32 <-> 64.
- message.set_int64_field(-32);
- message.set_uint64_field(32);
- message.set_int32_field(-64);
- message.set_uint32_field(64);
-
- // Note that we don't set the "enum_string_int64_field" as it would fail during parsing.
- message.set_enum_int_field(EEnum::MinusFortyTwo);
- message.set_enum_string_string_field(EEnum::Three);
-
- const auto HelloWorldInChinese = "\xe4\xbd\xa0\xe5\xa5\xbd\xef\xbc\x8c\xe4\xb8\x96\xe7\x95\x8c";
- message.set_utf8_field(HelloWorldInChinese);
-
- message.add_repeated_optional_any_field("#");
- message.add_repeated_optional_any_field("1");
- message.add_repeated_optional_any_field("\"qwe\"");
- message.add_repeated_optional_any_field("%true");
-
- auto otherComplexFieldPositional = BuildYsonNodeFluently()
- .BeginList()
- .Item().Value(301)
- .Item().Value(302)
- .Item().Value(303)
- .EndList();
-
- auto mode = complexTypeMode;
- auto otherComplexField = ([&] {
- switch (mode) {
- case EComplexTypeMode::Named:
- return BuildYsonNodeFluently()
- .BeginMap()
- .Item("one").Value(301)
- .Item("two").Value(302)
- .Item("three").Value(303)
- .EndMap();
- case EComplexTypeMode::Positional:
- return otherComplexFieldPositional;
- }
- YT_ABORT();
- })();
- auto otherColumnsYson = BuildYsonStringFluently()
- .BeginMap()
- .Item("other_complex_field").Value(otherComplexField)
- .EndMap();
- message.set_other_columns_field(otherColumnsYson.ToString());
-
- message.add_packed_repeated_int64_field(-123456789000LL);
- message.add_packed_repeated_int64_field(0);
-
- message.add_optional_repeated_int64_field(-4242);
-
- // optional_oneof_field is intentionally empty.
-
- message.set_oneof_string_field("spam");
-
- (*message.mutable_map_field())[777].set_key("key777");
- (*message.mutable_map_field())[777].set_value("value777");
- (*message.mutable_map_field())[888].set_key("key888");
- (*message.mutable_map_field())[888].set_value("value888");
-
- auto rowCollector = ParseRows(message, config, schema, rowCount);
- for (int rowIndex = 0; rowIndex < rowCount; ++rowIndex) {
- auto firstNode = GetComposite(rowCollector.GetRowValue(rowIndex, "first"));
- ASSERT_EQ(firstNode->GetType(), ENodeType::List);
- const auto& firstList = firstNode->AsList();
- ASSERT_EQ(firstList->GetChildCount(), 17);
-
- EXPECT_EQ(firstList->GetChildOrThrow(0)->GetType(), ENodeType::Entity);
- EXPECT_EQ(firstList->GetChildValueOrThrow<TString>(1), "Two");
- EXPECT_EQ(firstList->GetChildValueOrThrow<i64>(2), 44);
-
- ASSERT_EQ(firstList->GetChildOrThrow(3)->GetType(), ENodeType::List);
- EXPECT_EQ(ConvertTo<std::vector<i64>>(firstList->GetChildOrThrow(3)), (std::vector<i64>{55, 56, 57}));
-
- ASSERT_EQ(firstList->GetChildOrThrow(4)->GetType(), ENodeType::List);
- EXPECT_EQ(ConvertTo<std::vector<i64>>(firstList->GetChildOrThrow(4)), (std::vector<i64>{}));
-
- ASSERT_EQ(firstList->GetChildOrThrow(5)->GetType(), ENodeType::List);
- EXPECT_EQ(firstList->GetChildOrThrow(5)->AsList()->GetChildValueOrThrow<TString>(0), "key");
- EXPECT_EQ(firstList->GetChildOrThrow(5)->AsList()->GetChildValueOrThrow<TString>(1), "value");
-
- ASSERT_EQ(firstList->GetChildOrThrow(6)->GetType(), ENodeType::List);
- ASSERT_EQ(firstList->GetChildOrThrow(6)->AsList()->GetChildCount(), 2);
-
- const auto& firstSubNode1 = firstList->GetChildOrThrow(6)->AsList()->GetChildOrThrow(0);
- ASSERT_EQ(firstSubNode1->GetType(), ENodeType::List);
- ASSERT_EQ(firstSubNode1->AsList()->GetChildCount(), 2);
- EXPECT_EQ(firstSubNode1->AsList()->GetChildValueOrThrow<TString>(0), "key1");
- EXPECT_EQ(firstSubNode1->AsList()->GetChildValueOrThrow<TString>(1), "value1");
-
- const auto& firstSubNode2 = firstList->GetChildOrThrow(6)->AsList()->GetChildOrThrow(1);
- ASSERT_EQ(firstSubNode2->GetType(), ENodeType::List);
- ASSERT_EQ(firstSubNode2->AsList()->GetChildCount(), 2);
- EXPECT_EQ(firstSubNode2->AsList()->GetChildValueOrThrow<TString>(0), "key2");
- EXPECT_EQ(firstSubNode2->AsList()->GetChildValueOrThrow<TString>(1), "value2");
-
- ASSERT_EQ(firstList->GetChildOrThrow(7)->GetType(), ENodeType::Int64);
- EXPECT_EQ(firstList->GetChildValueOrThrow<i64>(7), 4422);
-
- ASSERT_EQ(firstList->GetChildOrThrow(8)->GetType(), ENodeType::Map);
- EXPECT_NODES_EQUAL(
- firstList->GetChildOrThrow(8),
- BuildYsonNodeFluently()
- .BeginMap()
- .Item("key").Value("value")
- .EndMap());
-
- ASSERT_EQ(firstList->GetChildOrThrow(9)->GetType(), ENodeType::Entity);
-
- EXPECT_NODES_EQUAL(
- firstList->GetChildOrThrow(10),
- BuildYsonNodeFluently()
- .BeginList()
- .Item().Value(false)
- .Item().Value(42)
- .Item().Entity()
- .EndList());
-
- EXPECT_NODES_EQUAL(
- firstList->GetChildOrThrow(11),
- BuildYsonNodeFluently()
- .BeginList()
- .Item().Value("MaxInt32")
- .Item().Value("MinusFortyTwo")
- .EndList());
-
- // optional_repeated_bool_field.
- ASSERT_EQ(firstList->GetChildOrThrow(12)->GetType(), ENodeType::Entity);
-
- // oneof_field.
- EXPECT_NODES_EQUAL(
- firstList->GetChildOrThrow(13),
- BuildYsonNodeFluently()
- .BeginList()
- .Item().Value(2)
- .Item().BeginList()
- .Item().Value("KEY")
- .Item().Entity()
- .EndList()
- .EndList());
-
- // optional_oneof_field.
- ASSERT_EQ(firstList->GetChildOrThrow(14)->GetType(), ENodeType::Entity);
-
- // map_field.
- EXPECT_NODES_EQUAL(
- SortMapByKey(firstList->GetChildOrThrow(15)),
- BuildYsonNodeFluently()
- .BeginList()
- .Item().BeginList()
- .Item().Value(111)
- .Item().BeginList()
- .Item().Value("key111")
- .Item().Value("value111")
- .EndList()
- .EndList()
- .Item().BeginList()
- .Item().Value(222)
- .Item().BeginList()
- .Item().Value("key222")
- .Item().Value("value222")
- .EndList()
- .EndList()
- .EndList());
-
- // field_missing_from_proto2.
- ASSERT_EQ(firstList->GetChildOrThrow(16)->GetType(), ENodeType::Entity);
-
- auto secondNode = GetComposite(rowCollector.GetRowValue(rowIndex, "second"));
- ASSERT_EQ(secondNode->GetType(), ENodeType::List);
- EXPECT_EQ(ConvertTo<std::vector<i64>>(secondNode), (std::vector<i64>{101, 102, 103}));
-
- auto repeatedMessageNode = GetComposite(rowCollector.GetRowValue(rowIndex, "repeated_message_field"));
- ASSERT_EQ(repeatedMessageNode->GetType(), ENodeType::List);
- ASSERT_EQ(repeatedMessageNode->AsList()->GetChildCount(), 2);
-
- const auto& subNode1 = repeatedMessageNode->AsList()->GetChildOrThrow(0);
- ASSERT_EQ(subNode1->GetType(), ENodeType::List);
- ASSERT_EQ(subNode1->AsList()->GetChildCount(), 2);
- EXPECT_EQ(subNode1->AsList()->GetChildValueOrThrow<TString>(0), "key11");
- EXPECT_EQ(subNode1->AsList()->GetChildValueOrThrow<TString>(1), "value11");
-
- const auto& subNode2 = repeatedMessageNode->AsList()->GetChildOrThrow(1);
- ASSERT_EQ(subNode2->GetType(), ENodeType::List);
- ASSERT_EQ(subNode2->AsList()->GetChildCount(), 2);
- EXPECT_EQ(subNode2->AsList()->GetChildValueOrThrow<TString>(0), "key21");
- EXPECT_EQ(subNode2->AsList()->GetChildValueOrThrow<TString>(1), "value21");
-
- auto repeatedInt64Node = GetComposite(rowCollector.GetRowValue(rowIndex, "repeated_int64_field"));
- EXPECT_EQ(ConvertTo<std::vector<i64>>(repeatedInt64Node), (std::vector<i64>{31, 32, 33}));
-
- auto anotherRepeatedInt64Node = GetComposite(rowCollector.GetRowValue(rowIndex, "another_repeated_int64_field"));
- EXPECT_EQ(ConvertTo<std::vector<i64>>(anotherRepeatedInt64Node), (std::vector<i64>{}));
-
- auto anyValue = rowCollector.GetRowValue(rowIndex, "any_field");
- ASSERT_EQ(anyValue.Type, EValueType::Int64);
- EXPECT_EQ(anyValue.Data.Int64, 4321);
-
- EXPECT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "int64_field")), -64);
- EXPECT_EQ(GetUint64(rowCollector.GetRowValue(rowIndex, "uint64_field")), 64u);
- EXPECT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "int32_field")), -32);
- EXPECT_EQ(GetUint64(rowCollector.GetRowValue(rowIndex, "uint32_field")), 32u);
-
- EXPECT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "enum_int_field")), -42);
- EXPECT_EQ(GetString(rowCollector.GetRowValue(rowIndex, "enum_string_string_field")), "Three");
-
- EXPECT_EQ(GetString(rowCollector.GetRowValue(rowIndex, "utf8_field")), HelloWorldInChinese);
-
- auto repeatedRepeatedOptionalAnyNode = GetComposite(rowCollector.GetRowValue(rowIndex, "repeated_optional_any_field"));
- auto expectedRepeatedOptionalAnyNode = BuildYsonNodeFluently()
- .BeginList()
- .Item().Entity()
- .Item().Value(1)
- .Item().Value("qwe")
- .Item().Value(true)
- .EndList();
- EXPECT_NODES_EQUAL(repeatedRepeatedOptionalAnyNode, expectedRepeatedOptionalAnyNode);
-
- auto actualOtherComplexField = GetComposite(rowCollector.GetRowValue(rowIndex, "other_complex_field"));
- EXPECT_NODES_EQUAL(actualOtherComplexField, otherComplexFieldPositional);
-
- EXPECT_NODES_EQUAL(
- GetComposite(rowCollector.GetRowValue(rowIndex, "packed_repeated_int64_field")),
- ConvertToNode(TYsonString(TStringBuf("[-123456789000;0]"))));
-
- EXPECT_NODES_EQUAL(
- GetComposite(rowCollector.GetRowValue(rowIndex, "optional_repeated_int64_field")),
- ConvertToNode(TYsonString(TStringBuf("[-4242]"))));
-
- EXPECT_NODES_EQUAL(
- GetComposite(rowCollector.GetRowValue(rowIndex, "oneof_field")),
- ConvertToNode(TYsonString(TStringBuf("[1; \"spam\"]"))));
-
- EXPECT_FALSE(rowCollector.FindRowValue(rowIndex, "optional_oneof_field"));
-
- // map_field.
- EXPECT_NODES_EQUAL(
- SortMapByKey(GetComposite(rowCollector.GetRowValue(rowIndex, "map_field"))),
- ConvertToNode(TYsonString(TStringBuf("[[777; [key777; value777]]; [888; [key888; value888]]]"))));
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-std::vector<TTableSchemaPtr> CreateSeveralTablesSchemas()
-{
- return {
- New<TTableSchema>(std::vector<TColumnSchema>{
- {"embedded", StructLogicalType({
- {"enum_field", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"int64_field", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
- })},
- {"repeated_int64_field", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- {"any_field", SimpleLogicalType(ESimpleLogicalValueType::Any)},
- }),
- New<TTableSchema>(std::vector<TColumnSchema>{
- {"enum_field", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"int64_field", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
- }),
- // Empty schema.
- New<TTableSchema>(),
- };
-}
-
-INodePtr CreateSeveralTablesConfig(EProtoFormatType protoFormatType)
-{
- if (protoFormatType == EProtoFormatType::FileDescriptor) {
- return CreateFileDescriptorConfig<TSeveralTablesMessageFirst, TSeveralTablesMessageSecond, TSeveralTablesMessageThird>();
- }
- YT_VERIFY(protoFormatType == EProtoFormatType::Structured);
-
- return BuildYsonNodeFluently()
- .BeginAttributes()
- .Item("enumerations").Value(EnumerationsConfig)
- .Item("tables")
- .BeginList()
- // Table #1.
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("embedded")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("structured_message")
- .Item("fields")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("int64_field")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("int64")
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("enum_field")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("enum_string")
- .Item("enumeration_name").Value("EEnum")
- .EndMap()
- .EndList()
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("repeated_int64_field")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("int64")
- .Item("repeated").Value(true)
- .EndMap()
- .Item()
- .BeginMap()
- // In schema it is of type "any".
- .Item("name").Value("any_field")
- .Item("field_number").Value(3)
- .Item("proto_type").Value("int64")
- .EndMap()
- .EndList()
- .EndMap()
-
- // Table #2.
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("int64_field")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("int64")
- .EndMap()
- .Item()
- .BeginMap()
- .Item("name").Value("enum_field")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("enum_string")
- .Item("enumeration_name").Value("EEnum")
- .EndMap()
- .EndList()
- .EndMap()
-
- // Table #3.
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("string_field")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("string")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndAttributes()
- .Value("protobuf");
-}
-
-using TProtobufFormatSeveralTablesParam = std::tuple<EProtoFormatType>;
-
-class TProtobufFormatSeveralTables
- : public ::testing::TestWithParam<TProtobufFormatSeveralTablesParam>
-{ };
-
-INSTANTIATE_TEST_SUITE_P(
- FileDescriptor,
- TProtobufFormatSeveralTables,
- ::testing::Values(TProtobufFormatSeveralTablesParam{
- EProtoFormatType::FileDescriptor}));
-
-INSTANTIATE_TEST_SUITE_P(
- Structured,
- TProtobufFormatSeveralTables,
- ::testing::Values(TProtobufFormatSeveralTablesParam{
- EProtoFormatType::Structured}));
-
-TEST_P(TProtobufFormatSeveralTables, Write)
-{
- auto [protoFormatType] = GetParam();
-
- auto schemas = CreateSeveralTablesSchemas();
- auto configNode = CreateSeveralTablesConfig(protoFormatType);
-
- auto config = ConvertTo<TProtobufFormatConfigPtr>(configNode->Attributes().ToMap());
-
- auto nameTable = New<TNameTable>();
- auto embeddedId = nameTable->RegisterName("embedded");
- auto anyFieldId = nameTable->RegisterName("any_field");
- auto int64FieldId = nameTable->RegisterName("int64_field");
- auto repeatedInt64Id = nameTable->RegisterName("repeated_int64_field");
- auto enumFieldId = nameTable->RegisterName("enum_field");
- auto stringFieldId = nameTable->RegisterName("string_field");
- auto tableIndexId = nameTable->RegisterName(TableIndexColumnName);
-
- TString result;
- TStringOutput resultStream(result);
- auto controlAttributesConfig = New<TControlAttributesConfig>();
- controlAttributesConfig->EnableTableIndex = true;
- controlAttributesConfig->EnableEndOfStream = true;
- auto writer = CreateWriterForProtobuf(
- std::move(config),
- schemas,
- nameTable,
- CreateAsyncAdapter(&resultStream),
- true,
- std::move(controlAttributesConfig),
- 0);
-
- auto embeddedYson = BuildYsonStringFluently()
- .BeginList()
- .Item().Value("Two")
- .Item().Value(44)
- .EndList()
- .ToString();
-
- auto repeatedInt64Yson = ConvertToYsonString(std::vector<i64>{31, 32, 33}).ToString();
-
- {
- TUnversionedRowBuilder builder;
- builder.AddValue(MakeUnversionedCompositeValue(embeddedYson, embeddedId));
- builder.AddValue(MakeUnversionedCompositeValue(repeatedInt64Yson, repeatedInt64Id));
- builder.AddValue(MakeUnversionedInt64Value(4321, anyFieldId));
- writer->Write({builder.GetRow()});
- }
- {
- TUnversionedRowBuilder builder;
- builder.AddValue(MakeUnversionedStringValue("Two", enumFieldId));
- builder.AddValue(MakeUnversionedInt64Value(999, int64FieldId));
- builder.AddValue(MakeUnversionedInt64Value(1, tableIndexId));
- writer->Write({builder.GetRow()});
- }
- {
- TUnversionedRowBuilder builder;
- builder.AddValue(MakeUnversionedStringValue("blah", stringFieldId));
- builder.AddValue(MakeUnversionedInt64Value(2, tableIndexId));
- writer->Write({builder.GetRow()});
- }
-
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TStringInput input(result);
- TLenvalParser lenvalParser(&input);
-
- {
- auto entry = lenvalParser.Next();
- ASSERT_TRUE(entry);
-
- NYT::TSeveralTablesMessageFirst message;
- ASSERT_TRUE(message.ParseFromString(entry->RowData));
-
- const auto& embedded = message.embedded();
- EXPECT_EQ(embedded.enum_field(), EEnum::Two);
- EXPECT_EQ(embedded.int64_field(), 44);
-
- std::vector<i64> repeatedInt64Field(
- message.repeated_int64_field().begin(),
- message.repeated_int64_field().end());
- EXPECT_EQ(repeatedInt64Field, (std::vector<i64>{31, 32, 33}));
- EXPECT_EQ(message.int64_field(), 4321);
- }
- {
- auto entry = lenvalParser.Next();
- ASSERT_TRUE(entry);
-
- NYT::TSeveralTablesMessageSecond message;
- ASSERT_TRUE(message.ParseFromString(entry->RowData));
-
- EXPECT_EQ(message.enum_field(), EEnum::Two);
- EXPECT_EQ(message.int64_field(), 999);
- }
- {
- auto entry = lenvalParser.Next();
- ASSERT_TRUE(entry);
-
- NYT::TSeveralTablesMessageThird message;
- ASSERT_TRUE(message.ParseFromString(entry->RowData));
-
- EXPECT_EQ(message.string_field(), "blah");
- }
- ASSERT_FALSE(lenvalParser.IsEndOfStream());
- ASSERT_FALSE(lenvalParser.Next());
- ASSERT_TRUE(lenvalParser.IsEndOfStream());
- ASSERT_FALSE(lenvalParser.Next());
-}
-
-TEST_P(TProtobufFormatSeveralTables, Parse)
-{
- auto [protoFormatType] = GetParam();
-
- auto schemas = CreateSeveralTablesSchemas();
- auto configNode = CreateSeveralTablesConfig(protoFormatType);
- auto config = ConvertTo<TProtobufFormatConfigPtr>(configNode->Attributes().ToMap());
-
- std::vector<TCollectingValueConsumer> rowCollectors;
- std::vector<std::unique_ptr<IParser>> parsers;
- for (const auto& schema : schemas) {
- rowCollectors.emplace_back(schema);
- }
- for (int tableIndex = 0; tableIndex < static_cast<int>(schemas.size()); ++tableIndex) {
- parsers.push_back(CreateParserForProtobuf(
- &rowCollectors[tableIndex],
- config,
- tableIndex));
- }
-
- NYT::TSeveralTablesMessageFirst firstMessage;
- auto* embedded = firstMessage.mutable_embedded();
- embedded->set_enum_field(EEnum::Two);
- embedded->set_int64_field(44);
-
- firstMessage.add_repeated_int64_field(55);
- firstMessage.add_repeated_int64_field(56);
- firstMessage.add_repeated_int64_field(57);
-
- firstMessage.set_int64_field(4444);
-
- NYT::TSeveralTablesMessageSecond secondMessage;
- secondMessage.set_enum_field(EEnum::Two);
- secondMessage.set_int64_field(44);
-
- NYT::TSeveralTablesMessageThird thirdMessage;
- thirdMessage.set_string_field("blah");
-
- auto parse = [] (auto& parser, const auto& message) {
- TString lenvalBytes;
- {
- TStringOutput out(lenvalBytes);
- auto messageSize = static_cast<ui32>(message.ByteSizeLong());
- out.Write(&messageSize, sizeof(messageSize));
- ASSERT_TRUE(message.SerializeToArcadiaStream(&out));
- }
- parser->Read(lenvalBytes);
- parser->Finish();
- };
-
- parse(parsers[0], firstMessage);
- parse(parsers[1], secondMessage);
- parse(parsers[2], thirdMessage);
-
- {
- const auto& rowCollector = rowCollectors[0];
- ASSERT_EQ(static_cast<int>(rowCollector.Size()), 1);
-
- auto embeddedNode = GetComposite(rowCollector.GetRowValue(0, "embedded"));
- ASSERT_EQ(ConvertToTextYson(embeddedNode), "[\"Two\";44;]");
-
- auto repeatedInt64Node = GetComposite(rowCollector.GetRowValue(0, "repeated_int64_field"));
- ASSERT_EQ(ConvertToTextYson(repeatedInt64Node), "[55;56;57;]");
-
- auto int64Field = GetInt64(rowCollector.GetRowValue(0, "any_field"));
- EXPECT_EQ(int64Field, 4444);
- }
-
- {
- const auto& rowCollector = rowCollectors[1];
- ASSERT_EQ(static_cast<int>(rowCollector.Size()), 1);
-
- EXPECT_EQ(GetString(rowCollector.GetRowValue(0, "enum_field")), "Two");
- EXPECT_EQ(GetInt64(rowCollector.GetRowValue(0, "int64_field")), 44);
- }
-
- {
- const auto& rowCollector = rowCollectors[2];
- ASSERT_EQ(static_cast<int>(rowCollector.Size()), 1);
-
- EXPECT_EQ(GetString(rowCollector.GetRowValue(0, "string_field")), "blah");
- }
-}
-
-TEST(TProtobufFormat, SchemaConfigMismatch)
-{
- auto createParser = [] (const TTableSchemaPtr& schema, const INodePtr& configNode) {
- TCollectingValueConsumer rowCollector(schema);
- return CreateParserForProtobuf(
- &rowCollector,
- ConvertTo<TProtobufFormatConfigPtr>(configNode),
- 0);
- };
- auto createSeveralTableWriter = [] (const std::vector<TTableSchemaPtr>& schemas, const INodePtr& configNode) {
- TString result;
- TStringOutput resultStream(result);
- return CreateWriterForProtobuf(
- ConvertTo<TProtobufFormatConfigPtr>(configNode),
- schemas,
- New<TNameTable>(),
- CreateAsyncAdapter(&resultStream),
- true,
- New<TControlAttributesConfig>(),
- 0);
- };
- auto createWriter = [&] (const TTableSchemaPtr& schema, const INodePtr& configNode) {
- createSeveralTableWriter({schema}, configNode);
- };
-
- auto schema_struct_with_int64 = New<TTableSchema>(std::vector<TColumnSchema>{
- {"struct", StructLogicalType({
- {"int64_field", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- })},
- });
-
- auto schema_struct_with_uint64 = New<TTableSchema>(std::vector<TColumnSchema>{
- {"struct", StructLogicalType({
- {"int64_field", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Uint64))},
- })},
- });
-
- auto config_struct_with_int64 = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("struct")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("structured_message")
- .Item("fields")
- .BeginList()
- .Item().BeginMap()
- .Item("name").Value("int64_field")
- .Item("field_number").Value(2)
- // Wrong type.
- .Item("proto_type").Value("int64")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- // OK.
- EXPECT_NO_THROW(createParser(schema_struct_with_int64, config_struct_with_int64));
- EXPECT_NO_THROW(createWriter(schema_struct_with_int64, config_struct_with_int64));
-
- // Types mismatch.
- EXPECT_THROW_WITH_SUBSTRING(
- createParser(schema_struct_with_uint64, config_struct_with_int64),
- "signedness of both types must be the same");
- EXPECT_THROW_WITH_SUBSTRING(
- createWriter(schema_struct_with_uint64, config_struct_with_int64),
- "signedness of both types must be the same");
-
- // No schema for structured field is Ok.
- EXPECT_NO_THROW(createParser(New<TTableSchema>(), config_struct_with_int64));
- EXPECT_NO_THROW(createWriter(New<TTableSchema>(), config_struct_with_int64));
-
- auto schema_list_int64 = New<TTableSchema>(std::vector<TColumnSchema>{
- {"repeated", ListLogicalType(
- SimpleLogicalType(ESimpleLogicalValueType::Int64)
- )},
- });
-
- auto schema_list_optional_int64 = New<TTableSchema>(std::vector<TColumnSchema>{
- {"repeated", ListLogicalType(
- OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))
- )},
- });
-
- auto config_repeated_int64 = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("repeated")
- .Item("field_number").Value(1)
- .Item("repeated").Value(true)
- .Item("proto_type").Value("int64")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- // OK.
- EXPECT_NO_THROW(createParser(schema_list_int64, config_repeated_int64));
- EXPECT_NO_THROW(createWriter(schema_list_int64, config_repeated_int64));
-
- // No schema for repeated field is Ok.
- EXPECT_NO_THROW(createParser(New<TTableSchema>(), config_repeated_int64));
- EXPECT_NO_THROW(createWriter(New<TTableSchema>(), config_repeated_int64));
-
- // List of optional is not allowed.
- EXPECT_THROW_WITH_SUBSTRING(
- createParser(schema_list_optional_int64, config_repeated_int64),
- "unexpected logical metatype \"optional\"");
- EXPECT_THROW_WITH_SUBSTRING(
- createWriter(schema_list_optional_int64, config_repeated_int64),
- "unexpected logical metatype \"optional\"");
-
- auto schema_optional_list_int64 = New<TTableSchema>(std::vector<TColumnSchema>{
- {"repeated", OptionalLogicalType(
- ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))
- )},
- });
-
- // Optional list is OK.
- EXPECT_NO_THROW(createParser(schema_optional_list_int64, config_repeated_int64));
- EXPECT_NO_THROW(createWriter(schema_optional_list_int64, config_repeated_int64));
-
- auto schema_optional_optional_int64 = New<TTableSchema>(std::vector<TColumnSchema>{
- {"field", OptionalLogicalType(
- OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))
- )},
- });
-
- auto config_int64 = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("field")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("int64")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- // Optional of optional is not allowed.
- EXPECT_THROW_WITH_SUBSTRING(
- createParser(schema_optional_optional_int64, config_int64),
- "unexpected logical metatype \"optional\"");
- EXPECT_THROW_WITH_SUBSTRING(
- createWriter(schema_optional_optional_int64, config_int64),
- "unexpected logical metatype \"optional\"");
-
- auto schema_struct_with_both = New<TTableSchema>(std::vector<TColumnSchema>{
- {"struct", StructLogicalType({
- {"required_field", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
- {"optional_field", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- })},
- });
-
- auto config_struct_with_required = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("struct")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("structured_message")
- .Item("fields")
- .BeginList()
- .Item().BeginMap()
- .Item("name").Value("required_field")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("int64")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- auto config_struct_with_optional = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("struct")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("structured_message")
- .Item("fields")
- .BeginList()
- .Item().BeginMap()
- .Item("name").Value("optional_field")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("int64")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- auto config_struct_with_unknown = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("struct")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("structured_message")
- .Item("fields")
- .BeginList()
- .Item().BeginMap()
- .Item("name").Value("required_field")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("int64")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("optional_field")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("int64")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("unknown_field")
- .Item("field_number").Value(3)
- .Item("proto_type").Value("int64")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- // Schema has more fields, non-optional field is missing in protobuf config.
- // Parser should fail.
- EXPECT_THROW_WITH_SUBSTRING(
- createParser(schema_struct_with_both, config_struct_with_optional),
- "non-optional field \"required_field\" in schema is missing from protobuf config");
- // Writer feels OK.
- EXPECT_NO_THROW(createWriter(schema_struct_with_both, config_struct_with_optional));
-
- // Schema has more fields, optional field is missing in protobuf config.
- // It's OK for both the writer and the parser.
- EXPECT_NO_THROW(createParser(schema_struct_with_both, config_struct_with_required));
- EXPECT_NO_THROW(createWriter(schema_struct_with_both, config_struct_with_required));
-
- // Protobuf config has more fields, it is always OK.
- EXPECT_NO_THROW(createParser(schema_struct_with_both, config_struct_with_unknown));
- EXPECT_NO_THROW(createWriter(schema_struct_with_both, config_struct_with_unknown));
-
- auto schema_int64 = New<TTableSchema>(std::vector<TColumnSchema>{
- {"int64_field", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- });
-
- auto config_two_tables = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("int64_field")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("int64")
- .EndMap()
- .EndList()
- .EndMap()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("int64_field")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("int64")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- EXPECT_NO_THROW(createWriter(schema_int64, config_two_tables));
- EXPECT_THROW_WITH_SUBSTRING(
- createSeveralTableWriter({schema_int64, schema_int64, schema_int64}, config_two_tables),
- "Number of schemas is greater than number of tables in protobuf config: 3 > 2");
-
- auto schema_variant_with_int = New<TTableSchema>(std::vector<TColumnSchema>{
- {"variant", VariantStructLogicalType({
- {"a", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
- })},
- });
- auto schema_variant_with_optional_int = New<TTableSchema>(std::vector<TColumnSchema>{
- {"variant", VariantStructLogicalType({
- {"a", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- })},
- });
-
- auto config_with_oneof = BuildYsonNodeFluently()
- .BeginMap()
- .Item("tables")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("columns")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("variant")
- .Item("proto_type").Value("oneof")
- .Item("fields").BeginList()
- .Item()
- .BeginMap()
- .Item("name").Value("a")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("int64")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap();
-
- // Oneof fields require schematized columns.
- EXPECT_THROW_WITH_SUBSTRING(
- createParser(New<TTableSchema>(), config_with_oneof),
- "requires a corresponding schematized column");
- EXPECT_THROW_WITH_SUBSTRING(
- createWriter(New<TTableSchema>(), config_with_oneof),
- "requires a corresponding schematized column");
-
- EXPECT_THROW_WITH_SUBSTRING(
- createParser(schema_variant_with_optional_int, config_with_oneof),
- "Optional variant field \"variant.a\"");
- EXPECT_THROW_WITH_SUBSTRING(
- createWriter(schema_variant_with_optional_int, config_with_oneof),
- "Optional variant field \"variant.a\"");
- EXPECT_NO_THROW(createParser(schema_variant_with_int, config_with_oneof));
- EXPECT_NO_THROW(createWriter(schema_variant_with_int, config_with_oneof));
-}
-
-TEST(TProtobufFormat, MultipleOtherColumns)
-{
- auto nameTable = New<TNameTable>();
-
- TString data;
- TStringOutput resultStream(data);
-
- auto controlAttributesConfig = New<TControlAttributesConfig>();
- controlAttributesConfig->EnableTableIndex = true;
- controlAttributesConfig->EnableEndOfStream = true;
-
- auto protoWriter = CreateWriterForProtobuf(
- MakeProtobufFormatConfig({TOtherColumnsMessage::descriptor(), TOtherColumnsMessage::descriptor()}),
- std::vector<TTableSchemaPtr>(2, New<TTableSchema>()),
- nameTable,
- CreateAsyncAdapter(&resultStream),
- true,
- controlAttributesConfig,
- 0);
-
- protoWriter->Write(
- std::vector<TUnversionedRow>{
- NNamedValue::MakeRow(nameTable, {
- {TableIndexColumnName, 0},
- {"field1", "foo"},
- }),
- NNamedValue::MakeRow(nameTable, {
- {TableIndexColumnName, 1},
- {"field2", "bar"},
- }),
- }
- );
- WaitFor(protoWriter->Close())
- .ThrowOnError();
-
- std::vector<TString> otherColumnsValue;
- auto parser = TLenvalParser(data);
- while (auto item = parser.Next()) {
- TOtherColumnsMessage message;
- bool parsed = message.ParseFromString(item->RowData);
- EXPECT_TRUE(parsed);
- otherColumnsValue.push_back(CanonizeYson(message.other_columns_field()));
- }
-
- EXPECT_EQ(
- otherColumnsValue,
- std::vector<TString>({
- CanonizeYson("{field1=foo}"),
- CanonizeYson("{field2=bar}"),
- }));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-using TProtobufFormatAllFieldsParameter = std::tuple<int, EProtoFormatType>;
-class TProtobufFormatAllFields
- : public ::testing::TestWithParam<TProtobufFormatAllFieldsParameter>
-{
-public:
- bool IsLegacyFormat() const
- {
- auto [rowCount, protoFormatType] = GetParam();
- return protoFormatType == EProtoFormatType::FileDescriptorLegacy;
- }
-};
-
-INSTANTIATE_TEST_SUITE_P(
- Specification,
- TProtobufFormatAllFields,
- ::testing::Values(TProtobufFormatAllFieldsParameter{1, EProtoFormatType::Structured}));
-
-INSTANTIATE_TEST_SUITE_P(
- FileDescriptorLegacy,
- TProtobufFormatAllFields,
- ::testing::Values(TProtobufFormatAllFieldsParameter{1, EProtoFormatType::FileDescriptorLegacy}));
-
-INSTANTIATE_TEST_SUITE_P(
- FileDescriptor,
- TProtobufFormatAllFields,
- ::testing::Values(TProtobufFormatAllFieldsParameter{1, EProtoFormatType::FileDescriptor}));
-
-INSTANTIATE_TEST_SUITE_P(
- ManyRows,
- TProtobufFormatAllFields,
- ::testing::Values(TProtobufFormatAllFieldsParameter{50000, EProtoFormatType::Structured}));
-
-TEST_P(TProtobufFormatAllFields, Writer)
-{
- auto [rowCount, protoFormatType] = GetParam();
- auto config = CreateAllFieldsConfig(protoFormatType);
-
- auto nameTable = New<TNameTable>();
-
- auto doubleId = nameTable->RegisterName("Double");
- auto floatId = nameTable->RegisterName("Float");
-
- auto int64Id = nameTable->RegisterName("Int64");
- auto uint64Id = nameTable->RegisterName("UInt64");
- auto sint64Id = nameTable->RegisterName("SInt64");
- auto fixed64Id = nameTable->RegisterName("Fixed64");
- auto sfixed64Id = nameTable->RegisterName("SFixed64");
-
- auto int32Id = nameTable->RegisterName("Int32");
- auto uint32Id = nameTable->RegisterName("UInt32");
- auto sint32Id = nameTable->RegisterName("SInt32");
- auto fixed32Id = nameTable->RegisterName("Fixed32");
- auto sfixed32Id = nameTable->RegisterName("SFixed32");
-
- auto boolId = nameTable->RegisterName("Bool");
- auto stringId = nameTable->RegisterName("String");
- auto bytesId = nameTable->RegisterName("Bytes");
-
- auto enumId = nameTable->RegisterName("Enum");
-
- auto messageId = nameTable->RegisterName("Message");
-
- auto anyWithMapId = nameTable->RegisterName("AnyWithMap");
- auto anyWithInt64Id = nameTable->RegisterName("AnyWithInt64");
- auto anyWithStringId = nameTable->RegisterName("AnyWithString");
-
- auto otherInt64ColumnId = nameTable->RegisterName("OtherInt64Column");
- auto otherDoubleColumnId = nameTable->RegisterName("OtherDoubleColumn");
- auto otherStringColumnId = nameTable->RegisterName("OtherStringColumn");
- auto otherNullColumnId = nameTable->RegisterName("OtherNullColumn");
- auto otherBooleanColumnId = nameTable->RegisterName("OtherBooleanColumn");
- auto otherAnyColumnId = nameTable->RegisterName("OtherAnyColumn");
-
- auto tableIndexColumnId = nameTable->RegisterName(TableIndexColumnName);
- auto rowIndexColumnId = nameTable->RegisterName(RowIndexColumnName);
- auto rangeIndexColumnId = nameTable->RegisterName(RangeIndexColumnName);
-
- auto missintInt64Id = nameTable->RegisterName("MissingInt64");
-
- TString result;
- TStringOutput resultStream(result);
- auto writer = CreateWriterForProtobuf(
- config->Attributes(),
- {New<TTableSchema>()},
- nameTable,
- CreateAsyncAdapter(&resultStream),
- true,
- New<TControlAttributesConfig>(),
- 0);
-
- TEmbeddedMessage embeddedMessage;
- embeddedMessage.set_key("embedded_key");
- embeddedMessage.set_value("embedded_value");
- TString embeddedMessageBytes;
- ASSERT_TRUE(embeddedMessage.SerializeToString(&embeddedMessageBytes));
-
- auto mapNode = BuildYsonNodeFluently()
- .BeginMap()
- .Item("Key").Value("Value")
- .Item("Another")
- .BeginList()
- .Item().Value(1)
- .Item().Value("two")
- .EndList()
- .EndMap();
- auto ysonString = ConvertToYsonString(mapNode).ToString();
-
- TUnversionedRowBuilder builder;
- for (const auto& value : {
- MakeUnversionedDoubleValue(3.14159, doubleId),
- MakeUnversionedDoubleValue(2.71828, floatId),
-
- MakeUnversionedInt64Value(-1, int64Id),
- MakeUnversionedUint64Value(2, uint64Id),
- MakeUnversionedInt64Value(-3, sint64Id),
- MakeUnversionedUint64Value(4, fixed64Id),
- MakeUnversionedInt64Value(-5, sfixed64Id),
-
- MakeUnversionedInt64Value(-6, int32Id),
- MakeUnversionedUint64Value(7, uint32Id),
- MakeUnversionedInt64Value(-8, sint32Id),
- MakeUnversionedUint64Value(9, fixed32Id),
- MakeUnversionedInt64Value(-10, sfixed32Id),
-
- MakeUnversionedBooleanValue(true, boolId),
- MakeUnversionedStringValue("this_is_string", stringId),
- MakeUnversionedStringValue("this_is_bytes", bytesId),
-
- MakeUnversionedStringValue("Two", enumId),
-
- MakeUnversionedStringValue(embeddedMessageBytes, messageId),
-
- MakeUnversionedNullValue(missintInt64Id),
-
- MakeUnversionedInt64Value(12, tableIndexColumnId),
- MakeUnversionedInt64Value(42, rowIndexColumnId),
- MakeUnversionedInt64Value(333, rangeIndexColumnId),
- }) {
- builder.AddValue(value);
- }
-
- if (!IsLegacyFormat()) {
- builder.AddValue(MakeUnversionedAnyValue(ysonString, anyWithMapId));
- builder.AddValue(MakeUnversionedInt64Value(22, anyWithInt64Id));
- builder.AddValue(MakeUnversionedStringValue("some_string", anyWithStringId));
-
- builder.AddValue(MakeUnversionedInt64Value(-123, otherInt64ColumnId));
- builder.AddValue(MakeUnversionedDoubleValue(-123.456, otherDoubleColumnId));
- builder.AddValue(MakeUnversionedStringValue("some_string", otherStringColumnId));
- builder.AddValue(MakeUnversionedBooleanValue(true, otherBooleanColumnId));
- builder.AddValue(MakeUnversionedAnyValue(ysonString, otherAnyColumnId));
- builder.AddValue(MakeUnversionedNullValue(otherNullColumnId));
- }
-
- auto row = builder.GetRow();
- std::vector<TUnversionedRow> rows(rowCount, row);
- writer->Write(rows);
-
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TStringInput input(result);
- TLenvalParser lenvalParser(&input);
-
- for (int rowIndex = 0; rowIndex < rowCount; ++rowIndex) {
- auto entry = lenvalParser.Next();
- ASSERT_TRUE(entry);
-
- NYT::TMessage message;
- ASSERT_TRUE(message.ParseFromString(entry->RowData));
-
- EXPECT_DOUBLE_EQ(message.double_field(), 3.14159);
- EXPECT_FLOAT_EQ(message.float_field(), 2.71828);
- EXPECT_EQ(message.int64_field(), -1);
- EXPECT_EQ(message.uint64_field(), 2u);
- EXPECT_EQ(message.sint64_field(), -3);
- EXPECT_EQ(message.fixed64_field(), 4u);
- EXPECT_EQ(message.sfixed64_field(), -5);
-
- EXPECT_EQ(message.int32_field(), -6);
- EXPECT_EQ(message.uint32_field(), 7u);
- EXPECT_EQ(message.sint32_field(), -8);
- EXPECT_EQ(message.fixed32_field(), 9u);
- EXPECT_EQ(message.sfixed32_field(), -10);
-
- EXPECT_EQ(message.bool_field(), true);
- EXPECT_EQ(message.string_field(), "this_is_string");
- EXPECT_EQ(message.bytes_field(), "this_is_bytes");
-
- EXPECT_EQ(message.enum_field(), EEnum::Two);
-
- EXPECT_EQ(message.message_field().key(), "embedded_key");
- EXPECT_EQ(message.message_field().value(), "embedded_value");
-
- if (!IsLegacyFormat()) {
- EXPECT_TRUE(AreNodesEqual(ConvertToNode(TYsonString(message.any_field_with_map())), mapNode));
- EXPECT_TRUE(AreNodesEqual(
- ConvertToNode(TYsonString(message.any_field_with_int64())),
- BuildYsonNodeFluently().Value(22)));
- EXPECT_TRUE(AreNodesEqual(
- ConvertToNode(TYsonString(message.any_field_with_string())),
- BuildYsonNodeFluently().Value("some_string")));
-
- auto otherColumnsMap = ConvertToNode(TYsonString(message.other_columns_field()))->AsMap();
- EXPECT_EQ(otherColumnsMap->GetChildValueOrThrow<i64>("OtherInt64Column"), -123);
- EXPECT_DOUBLE_EQ(otherColumnsMap->GetChildValueOrThrow<double>("OtherDoubleColumn"), -123.456);
- EXPECT_EQ(otherColumnsMap->GetChildValueOrThrow<TString>("OtherStringColumn"), "some_string");
- EXPECT_EQ(otherColumnsMap->GetChildValueOrThrow<bool>("OtherBooleanColumn"), true);
- EXPECT_TRUE(AreNodesEqual(otherColumnsMap->GetChildOrThrow("OtherAnyColumn"), mapNode));
- EXPECT_EQ(otherColumnsMap->GetChildOrThrow("OtherNullColumn")->GetType(), ENodeType::Entity);
-
- auto keys = otherColumnsMap->GetKeys();
- std::sort(keys.begin(), keys.end());
- std::vector<TString> expectedKeys = {
- "OtherInt64Column",
- "OtherDoubleColumn",
- "OtherStringColumn",
- "OtherBooleanColumn",
- "OtherAnyColumn",
- "OtherNullColumn"};
- std::sort(expectedKeys.begin(), expectedKeys.end());
- EXPECT_EQ(expectedKeys, keys);
- }
- }
-
- ASSERT_FALSE(lenvalParser.Next());
-}
-
-TEST_P(TProtobufFormatAllFields, Parser)
-{
- auto [rowCount, protoFormatType] = GetParam();
-
- auto config = CreateAllFieldsConfig(protoFormatType);
-
- TMessage message;
- message.set_double_field(3.14159);
- message.set_float_field(2.71828);
-
- message.set_int64_field(-1);
- message.set_uint64_field(2);
- message.set_sint64_field(-3);
- message.set_fixed64_field(4);
- message.set_sfixed64_field(-5);
-
- message.set_int32_field(-6);
- message.set_uint32_field(7);
- message.set_sint32_field(-8);
- message.set_fixed32_field(9);
- message.set_sfixed32_field(-10);
-
- message.set_bool_field(true);
- message.set_string_field("this_is_string");
- message.set_bytes_field("this_is_bytes");
- message.set_enum_field(EEnum::Three);
-
- message.mutable_message_field()->set_key("embedded_key");
- message.mutable_message_field()->set_value("embedded_value");
-
- auto mapNode = BuildYsonNodeFluently()
- .BeginMap()
- .Item("Key").Value("Value")
- .Item("Another")
- .BeginList()
- .Item().Value(1)
- .Item().Value("two")
- .EndList()
- .EndMap();
-
- auto otherColumnsNode = BuildYsonNodeFluently()
- .BeginMap()
- .Item("OtherInt64Column").Value(-123)
- .Item("OtherDoubleColumn").Value(-123.456)
- .Item("OtherStringColumn").Value("some_string")
- .Item("OtherBooleanColumn").Value(true)
- .Item("OtherAnyColumn").Value(mapNode)
- .Item("OtherNullColumn").Entity()
- .EndMap();
-
- if (!IsLegacyFormat()) {
- message.set_any_field_with_map(ConvertToYsonString(mapNode).ToString());
- message.set_any_field_with_int64(BuildYsonStringFluently().Value(22).ToString());
- message.set_any_field_with_string(BuildYsonStringFluently().Value("some_string").ToString());
- message.set_other_columns_field(ConvertToYsonString(otherColumnsNode).ToString());
- }
-
- auto rowCollector = ParseRows(
- message,
- ConvertTo<TProtobufFormatConfigPtr>(config->Attributes().ToMap()),
- New<TTableSchema>(),
- rowCount);
-
- for (int rowIndex = 0; rowIndex < rowCount; ++rowIndex) {
- int expectedSize = IsLegacyFormat() ? 17 : 26;
- ASSERT_EQ(static_cast<int>(rowCollector.GetRow(rowIndex).GetCount()), expectedSize);
-
- ASSERT_DOUBLE_EQ(GetDouble(rowCollector.GetRowValue(rowIndex, "Double")), 3.14159);
- ASSERT_NEAR(GetDouble(rowCollector.GetRowValue(rowIndex, "Float")), 2.71828, 1e-5);
-
- ASSERT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "Int64")), -1);
- ASSERT_EQ(GetUint64(rowCollector.GetRowValue(rowIndex, "UInt64")), 2u);
- ASSERT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "SInt64")), -3);
- ASSERT_EQ(GetUint64(rowCollector.GetRowValue(rowIndex, "Fixed64")), 4u);
- ASSERT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "SFixed64")), -5);
-
- ASSERT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "Int32")), -6);
- ASSERT_EQ(GetUint64(rowCollector.GetRowValue(rowIndex, "UInt32")), 7u);
- ASSERT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "SInt32")), -8);
- ASSERT_EQ(GetUint64(rowCollector.GetRowValue(rowIndex, "Fixed32")), 9u);
- ASSERT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "SFixed32")), -10);
-
- ASSERT_EQ(GetBoolean(rowCollector.GetRowValue(rowIndex, "Bool")), true);
- ASSERT_EQ(GetString(rowCollector.GetRowValue(rowIndex, "String")), "this_is_string");
- ASSERT_EQ(GetString(rowCollector.GetRowValue(rowIndex, "Bytes")), "this_is_bytes");
-
- if (IsLegacyFormat()) {
- ASSERT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "Enum")), 3);
- } else {
- ASSERT_EQ(GetString(rowCollector.GetRowValue(rowIndex, "Enum")), "Three");
- }
-
- TEmbeddedMessage embeddedMessage;
- ASSERT_TRUE(embeddedMessage.ParseFromString(GetString(rowCollector.GetRowValue(rowIndex, "Message"))));
- ASSERT_EQ(embeddedMessage.key(), "embedded_key");
- ASSERT_EQ(embeddedMessage.value(), "embedded_value");
-
- if (!IsLegacyFormat()) {
- ASSERT_TRUE(AreNodesEqual(GetAny(rowCollector.GetRowValue(rowIndex, "AnyWithMap")), mapNode));
- ASSERT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "AnyWithInt64")), 22);
- ASSERT_EQ(GetString(rowCollector.GetRowValue(rowIndex, "AnyWithString")), "some_string");
-
- ASSERT_EQ(GetInt64(rowCollector.GetRowValue(rowIndex, "OtherInt64Column")), -123);
- ASSERT_DOUBLE_EQ(GetDouble(rowCollector.GetRowValue(rowIndex, "OtherDoubleColumn")), -123.456);
- ASSERT_EQ(GetString(rowCollector.GetRowValue(rowIndex, "OtherStringColumn")), "some_string");
- ASSERT_EQ(GetBoolean(rowCollector.GetRowValue(rowIndex, "OtherBooleanColumn")), true);
- ASSERT_TRUE(AreNodesEqual(GetAny(rowCollector.GetRowValue(rowIndex, "OtherAnyColumn")), mapNode));
- ASSERT_EQ(rowCollector.GetRowValue(rowIndex, "OtherNullColumn").Type, EValueType::Null);
- }
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-class TProtobufFormatCompat
- : public ::testing::Test
-{
-public:
- static TTableSchemaPtr GetEarlySchema()
- {
- static const auto schema = New<TTableSchema>(std::vector<TColumnSchema>{
- {"a", OptionalLogicalType(VariantStructLogicalType({
- {"f1", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
- }))},
- });
- return schema;
- }
-
- static TTableSchemaPtr GetFirstMiddleSchema()
- {
- static const auto schema = New<TTableSchema>(std::vector<TColumnSchema>{
- {"a", OptionalLogicalType(VariantStructLogicalType({
- {"f1", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
- {"f2", SimpleLogicalType(ESimpleLogicalValueType::String)},
- }))},
- {"b", OptionalLogicalType(StructLogicalType({
- {"x", SimpleLogicalType(ESimpleLogicalValueType::String)},
- }))},
- });
- return schema;
- }
-
- static TTableSchemaPtr GetSecondMiddleSchema()
- {
- static const auto schema = New<TTableSchema>(std::vector<TColumnSchema>{
- {"a", OptionalLogicalType(VariantStructLogicalType({
- {"f1", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
- {"f2", SimpleLogicalType(ESimpleLogicalValueType::String)},
- }))},
- {"b", OptionalLogicalType(StructLogicalType({
- {"x", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"y", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
- }))},
- });
- return schema;
- }
-
- static TTableSchemaPtr GetThirdMiddleSchema()
- {
- static const auto schema = New<TTableSchema>(std::vector<TColumnSchema>{
- {"a", OptionalLogicalType(VariantStructLogicalType({
- {"f1", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
- {"f2", SimpleLogicalType(ESimpleLogicalValueType::String)},
- }))},
- {"b", OptionalLogicalType(StructLogicalType({
- {"x", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"y", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
- {"z", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
- }))},
- });
- return schema;
- }
-
- static TTableSchemaPtr GetLateSchema()
- {
- static const auto schema = New<TTableSchema>(std::vector<TColumnSchema>{
- {"a", OptionalLogicalType(VariantStructLogicalType({
- {"f1", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
- {"f2", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"f3", SimpleLogicalType(ESimpleLogicalValueType::Boolean)},
- }))},
- {"c", OptionalLogicalType(ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Boolean)))},
- {"b", OptionalLogicalType(StructLogicalType({
- {"x", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"y", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
- {"z", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
- }))},
- });
- return schema;
- }
-
- static TProtobufFormatConfigPtr GetFirstMiddleConfig()
- {
- static const auto config = ConvertTo<TProtobufFormatConfigPtr>(BuildYsonNodeFluently()
- .BeginMap().Item("tables").BeginList().Item().BeginMap().Item("columns").BeginList()
- .Item().BeginMap()
- .Item("name").Value("a")
- .Item("field_number").Value(0)
- .Item("proto_type").Value("oneof")
- .Item("fields").BeginList()
- .Item().BeginMap()
- .Item("name").Value("f1")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("int64")
- .EndMap()
- .EndList()
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("b")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("structured_message")
- .Item("fields")
- .BeginList()
- .Item().BeginMap()
- .Item("name").Value("x")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("string")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList().EndMap().EndList().EndMap());
- return config;
- }
-
- static TProtobufFormatConfigPtr GetSecondMiddleConfig()
- {
- static const auto config = ConvertTo<TProtobufFormatConfigPtr>(BuildYsonNodeFluently()
- .BeginMap().Item("tables").BeginList().Item().BeginMap().Item("columns").BeginList()
- .Item().BeginMap()
- .Item("name").Value("a")
- .Item("field_number").Value(0)
- .Item("proto_type").Value("oneof")
- .Item("fields").BeginList()
- .Item().BeginMap()
- .Item("name").Value("f1")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("int64")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("f2")
- .Item("field_number").Value(101)
- .Item("proto_type").Value("string")
- .EndMap()
- .EndList()
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("b")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("structured_message")
- .Item("fields")
- .BeginList()
- .Item().BeginMap()
- .Item("name").Value("x")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("string")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("y")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("string")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList().EndMap().EndList().EndMap());
- return config;
- }
-};
-
-template <typename TMessage>
-TMessage WriteRow(
- TUnversionedRow row,
- const TProtobufFormatConfigPtr& config,
- const TTableSchemaPtr& schema,
- const TNameTablePtr& nameTable)
-{
- TString result;
- TStringOutput resultStream(result);
-
- auto writer = CreateWriterForProtobuf(
- config,
- {schema},
- nameTable,
- CreateAsyncAdapter(&resultStream),
- true,
- New<TControlAttributesConfig>(),
- 0);
- writer->Write(std::vector<TUnversionedRow>{row});
- writer->Close().Get().ThrowOnError();
-
- TStringInput input(result);
- TLenvalParser lenvalParser(&input);
- auto entry = lenvalParser.Next();
- if (!entry) {
- THROW_ERROR_EXCEPTION("Unexpected end of stream in lenval parser");
- }
- TMessage message;
- if (!message.ParseFromString(entry->RowData)) {
- THROW_ERROR_EXCEPTION("Failed to parse message");
- }
- if (lenvalParser.Next()) {
- THROW_ERROR_EXCEPTION("Unexpected entry in lenval parser");
- }
- return message;
-}
-
-TEST_F(TProtobufFormatCompat, Write)
-{
- auto nameTable = TNameTable::FromSchema(*GetLateSchema());
- auto config = GetSecondMiddleConfig();
-
- auto writeRow = [&] (TUnversionedRow row, const TTableSchemaPtr& schema) {
- return WriteRow<NYT::TCompatMessage>(row, config, schema, nameTable);
- };
-
- {
- auto earlyRow = MakeRow(nameTable, {
- {"a", EValueType::Composite, "[0; -24]"}
- });
-
- SCOPED_TRACE("early");
- auto message = writeRow(earlyRow, GetEarlySchema());
- EXPECT_EQ(message.f1(), -24);
- EXPECT_FALSE(message.has_f2());
- EXPECT_EQ(message.has_b(), false);
- }
- {
- auto firstMiddleRow = MakeRow(nameTable, {
- {"a", EValueType::Composite, "[1; foobar]"},
- {"b", EValueType::Composite, "[foo]"},
- });
-
- SCOPED_TRACE("firstMiddle");
- auto message = writeRow(firstMiddleRow, GetFirstMiddleSchema());
- EXPECT_FALSE(message.has_f1());
- EXPECT_EQ(message.f2(), "foobar");
- EXPECT_EQ(message.b().x(), "foo");
- EXPECT_EQ(message.b().has_y(), false);
- }
- {
- auto secondMiddleRow = MakeRow(nameTable, {
- {"a", EValueType::Composite, "[1; foobar]"},
- {"b", EValueType::Composite, "[foo; bar]"},
- });
-
- SCOPED_TRACE("secondMiddle");
- auto message = writeRow(secondMiddleRow, GetSecondMiddleSchema());
- EXPECT_FALSE(message.has_f1());
- EXPECT_EQ(message.f2(), "foobar");
- EXPECT_EQ(message.b().x(), "foo");
- EXPECT_EQ(message.b().y(), "bar");
- }
- {
- auto thirdMiddleRow = MakeRow(nameTable, {
- {"a", EValueType::Composite, "[1; foobar]"},
- {"b", EValueType::Composite, "[foo; bar; spam]"},
- });
-
- SCOPED_TRACE("thirdMiddle");
- auto message = writeRow(thirdMiddleRow, GetThirdMiddleSchema());
- EXPECT_FALSE(message.has_f1());
- EXPECT_EQ(message.f2(), "foobar");
- EXPECT_EQ(message.b().x(), "foo");
- EXPECT_EQ(message.b().y(), "bar");
- }
- {
- auto lateRow = MakeRow(nameTable, {
- {"a", EValueType::Composite, "[2; %true]"},
- {"c", EValueType::Composite, "[%false; %true; %false]"},
- {"b", EValueType::Composite, "[foo; bar; spam]"},
- });
-
- SCOPED_TRACE("late");
- auto message = writeRow(lateRow, GetLateSchema());
- EXPECT_FALSE(message.has_f1());
- EXPECT_FALSE(message.has_f2());
- EXPECT_EQ(message.b().x(), "foo");
- EXPECT_EQ(message.b().y(), "bar");
- }
-}
-
-TEST_F(TProtobufFormatCompat, Parse)
-{
- auto config = GetSecondMiddleConfig();
-
- NYT::TCompatMessage message;
- message.set_f2("Sandiego");
- message.mutable_b()->set_x("foo");
- message.mutable_b()->set_y("bar");
-
- {
- SCOPED_TRACE("early");
- auto collector = ParseRows(message, config, GetEarlySchema());
- EXPECT_FALSE(collector.FindRowValue(0, "a"));
- EXPECT_FALSE(collector.GetNameTable()->FindId("b"));
- EXPECT_FALSE(collector.GetNameTable()->FindId("c"));
- }
- {
- SCOPED_TRACE("firstMiddle");
- auto collector = ParseRows(message, config, GetFirstMiddleSchema());
- EXPECT_NODES_EQUAL(
- GetComposite(collector.GetRowValue(0, "a")),
- ConvertToNode(TYsonString(TStringBuf("[1;Sandiego]"))));
- EXPECT_NODES_EQUAL(GetComposite(collector.GetRowValue(0, "b")), ConvertToNode(TYsonString(TStringBuf("[foo]"))));
- EXPECT_FALSE(collector.GetNameTable()->FindId("c"));
- }
- {
- SCOPED_TRACE("secondMiddle");
- auto collector = ParseRows(message, config, GetSecondMiddleSchema());
- EXPECT_NODES_EQUAL(
- GetComposite(collector.GetRowValue(0, "a")),
- ConvertToNode(TYsonString(TStringBuf("[1;Sandiego]"))));
- EXPECT_NODES_EQUAL(GetComposite(collector.GetRowValue(0, "b")), ConvertToNode(TYsonString(TStringBuf("[foo;bar]"))));
- EXPECT_FALSE(collector.GetNameTable()->FindId("c"));
- }
- {
- SCOPED_TRACE("thirdMiddle");
- auto collector = ParseRows(message, config, GetThirdMiddleSchema());
- EXPECT_NODES_EQUAL(
- GetComposite(collector.GetRowValue(0, "a")),
- ConvertToNode(TYsonString(TStringBuf("[1;Sandiego]"))));
- EXPECT_NODES_EQUAL(GetComposite(collector.GetRowValue(0, "b")), ConvertToNode(TYsonString(TStringBuf("[foo;bar;#]"))));
- EXPECT_FALSE(collector.GetNameTable()->FindId("c"));
- }
- {
- SCOPED_TRACE("late");
- auto collector = ParseRows(message, config, GetLateSchema());
- EXPECT_NODES_EQUAL(
- GetComposite(collector.GetRowValue(0, "a")),
- ConvertToNode(TYsonString(TStringBuf("[1;Sandiego]"))));
- EXPECT_NODES_EQUAL(GetComposite(collector.GetRowValue(0, "b")), ConvertToNode(TYsonString(TStringBuf("[foo;bar;#]"))));
- EXPECT_TRUE(collector.GetNameTable()->FindId("c"));
- }
-}
-
-TEST_F(TProtobufFormatCompat, ParseWrong)
-{
- NYT::TCompatMessage message;
- message.set_f1(42);
- message.mutable_b()->set_x("foo");
- message.mutable_b()->set_y("bar");
-
- EXPECT_THROW_WITH_SUBSTRING(
- ParseRows(message, GetFirstMiddleConfig(), GetFirstMiddleSchema()),
- "Unexpected field number 2");
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-class TProtobufFormatEnumCompat
- : public ::testing::Test
-{
-public:
- static TTableSchemaPtr CreateTableSchema()
- {
- static const auto schema = New<TTableSchema>(std::vector<TColumnSchema>{
- {"optional_enum", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
- {"required_enum", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"repeated_enum", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
- {"packed_repeated_enum", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
- {"inner", OptionalLogicalType(StructLogicalType({
- {"optional_enum", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
- {"required_enum", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"repeated_enum", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
- {"packed_repeated_enum", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))},
- }))},
- });
- return schema;
- }
- static TProtobufFormatConfigPtr CreateProtobufFormatConfig()
- {
- static const auto config = ConvertTo<TProtobufFormatConfigPtr>(BuildYsonNodeFluently()
- .BeginMap()
- .Item("enumerations").BeginMap()
- .Item("ECompatEnum")
- .BeginMap()
- .Item("One").Value(1)
- .Item("Two").Value(2)
- .Item("Three").Value(3)
- .EndMap()
- .EndMap()
- .Item("tables").BeginList().Item().BeginMap().Item("columns").BeginList()
- .Item().BeginMap()
- .Item("name").Value("optional_enum")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("enum_string")
- .Item("enum_writing_mode").Value("skip_unknown_values")
- .Item("enumeration_name").Value("ECompatEnum")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("required_enum")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("enum_string")
- .Item("enum_writing_mode").Value("skip_unknown_values")
- .Item("enumeration_name").Value("ECompatEnum")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("repeated_enum")
- .Item("field_number").Value(3)
- .Item("proto_type").Value("enum_string")
- .Item("repeated").Value(true)
- .Item("enum_writing_mode").Value("skip_unknown_values")
- .Item("enumeration_name").Value("ECompatEnum")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("packed_repeated_enum")
- .Item("field_number").Value(4)
- .Item("proto_type").Value("enum_string")
- .Item("repeated").Value(true)
- .Item("packed").Value(true)
- .Item("enum_writing_mode").Value("skip_unknown_values")
- .Item("enumeration_name").Value("ECompatEnum")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("inner")
- .Item("field_number").Value(100)
- .Item("proto_type").Value("structured_message")
- .Item("fields").BeginList()
- .Item().BeginMap()
- .Item("name").Value("optional_enum")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("enum_string")
- .Item("enum_writing_mode").Value("skip_unknown_values")
- .Item("enumeration_name").Value("ECompatEnum")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("required_enum")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("enum_string")
- .Item("enum_writing_mode").Value("skip_unknown_values")
- .Item("enumeration_name").Value("ECompatEnum")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("repeated_enum")
- .Item("field_number").Value(3)
- .Item("proto_type").Value("enum_string")
- .Item("repeated").Value(true)
- .Item("enum_writing_mode").Value("skip_unknown_values")
- .Item("enumeration_name").Value("ECompatEnum")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("packed_repeated_enum")
- .Item("field_number").Value(4)
- .Item("proto_type").Value("enum_string")
- .Item("repeated").Value(true)
- .Item("packed").Value(true)
- .Item("enum_writing_mode").Value("skip_unknown_values")
- .Item("enumeration_name").Value("ECompatEnum")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList().EndMap().EndList().EndMap());
- return config;
- }
-
-};
-
-TEST_F(TProtobufFormatEnumCompat, WriteCanSkipUnknownEnumValues)
-{
- auto schema = CreateTableSchema();
- auto config = CreateProtobufFormatConfig();
-
- auto nameTable = TNameTable::FromSchema(*schema);
-
- auto row = MakeRow(nameTable, {
- {"optional_enum", "MinusFortyTwo"},
- {"required_enum", "One"},
- {"repeated_enum", EValueType::Composite, "[MinusFortyTwo;One;MinusFortyTwo]"},
- {"packed_repeated_enum", EValueType::Composite, "[MinusFortyTwo;Two;MinusFortyTwo]"},
- {"inner", EValueType::Composite, "[MinusFortyTwo;Two;[MinusFortyTwo;Two];[One;MinusFortyTwo]]"},
- });
-
- auto collectRepeated = [](const auto& repeated) {
- std::vector<TEnumCompat::ECompatEnum> values;
- for (auto value : repeated) {
- values.push_back(static_cast<TEnumCompat::ECompatEnum>(value));
- }
- return values;
- };
-
- auto message = WriteRow<TEnumCompat>(row, config, schema, nameTable);
-
- EXPECT_FALSE(message.has_optional_enum());
- EXPECT_EQ(message.required_enum(), TEnumCompat::One);
- EXPECT_EQ(collectRepeated(message.repeated_enum()), std::vector{TEnumCompat::One});
- EXPECT_EQ(collectRepeated(message.packed_repeated_enum()), std::vector{TEnumCompat::Two});
-
- ASSERT_TRUE(message.has_inner());
- EXPECT_FALSE(message.inner().has_optional_enum());
- EXPECT_EQ(message.inner().required_enum(), TEnumCompat::Two);
- EXPECT_EQ(collectRepeated(message.inner().repeated_enum()), std::vector{TEnumCompat::Two});
- EXPECT_EQ(collectRepeated(message.inner().packed_repeated_enum()), std::vector{TEnumCompat::One});
-}
-
-TEST_F(TProtobufFormatEnumCompat, WriteDoesntSkipRequiredFields)
-{
- auto schema = CreateTableSchema();
- auto config = CreateProtobufFormatConfig();
-
- auto nameTable = TNameTable::FromSchema(*schema);
-
- {
- auto row = MakeRow(nameTable, {{"required_enum", "MinusFortyTwo"}});
- EXPECT_THROW_WITH_SUBSTRING(WriteRow<TEnumCompat>(row, config, schema, nameTable), "Invalid value for enum");
- }
- {
- auto row = MakeRow(nameTable, {{"inner", EValueType::Composite, "[#;MinusFortyTwo;#;#]"},});
- EXPECT_THROW_WITH_SUBSTRING(WriteRow<TEnumCompat>(row, config, schema, nameTable), "Invalid value for enum");
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-class TProtobufFormatRuntimeErrors
- : public ::testing::Test
-{
-public:
- static TTableSchemaPtr GetSchemaWithVariant(bool optional = false)
- {
- auto variantType = VariantStructLogicalType({
- {"f1", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
- {"f2", SimpleLogicalType(ESimpleLogicalValueType::String)},
- });
- return New<TTableSchema>(std::vector<TColumnSchema>{
- {"a", optional ? OptionalLogicalType(variantType) : variantType},
- });
- }
-
- static TTableSchemaPtr GetSchemaWithStruct(bool optional = false)
- {
- auto structType = StructLogicalType({
- {"f1", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
- {"f2", SimpleLogicalType(ESimpleLogicalValueType::String)},
- });
- return New<TTableSchema>(std::vector<TColumnSchema>{
- {"a", optional ? OptionalLogicalType(structType) : structType},
- });
- }
-
- static TProtobufFormatConfigPtr GetConfigWithVariant()
- {
- static const auto config = ConvertTo<TProtobufFormatConfigPtr>(BuildYsonNodeFluently()
- .BeginMap().Item("tables").BeginList().Item().BeginMap().Item("columns").BeginList()
- .Item().BeginMap()
- .Item("name").Value("a")
- .Item("proto_type").Value("oneof")
- .Item("fields").BeginList()
- .Item().BeginMap()
- .Item("name").Value("f1")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("int64")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("f2")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("string")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList().EndMap().EndList().EndMap());
- return config;
- }
-
- static TProtobufFormatConfigPtr GetConfigWithStruct()
- {
- static const auto config = ConvertTo<TProtobufFormatConfigPtr>(BuildYsonNodeFluently()
- .BeginMap().Item("tables").BeginList().Item().BeginMap().Item("columns").BeginList()
- .Item().BeginMap()
- .Item("name").Value("a")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("structured_message")
- .Item("fields").BeginList()
- .Item().BeginMap()
- .Item("name").Value("f1")
- .Item("field_number").Value(1)
- .Item("proto_type").Value("int64")
- .EndMap()
- .Item().BeginMap()
- .Item("name").Value("f2")
- .Item("field_number").Value(2)
- .Item("proto_type").Value("string")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList().EndMap().EndList().EndMap());
- return config;
- }
-};
-
-TEST_F(TProtobufFormatRuntimeErrors, ParseVariant)
-{
- {
- SCOPED_TRACE("Optional variant, all missing");
- TMessageWithOneof message;
- auto collector = ParseRows(message, GetConfigWithVariant(), GetSchemaWithVariant(/* optional */ true));
- EXPECT_FALSE(collector.FindRowValue(0, "a"));
- }
- {
- SCOPED_TRACE("All missing");
- TMessageWithOneof message;
- EXPECT_THROW_WITH_SUBSTRING(
- ParseRows(message, GetConfigWithVariant(), GetSchemaWithVariant()),
- "required field \"<root>.a\" is missing");
- }
- {
- SCOPED_TRACE("two alternatives");
- TMessageWithStruct::TStruct message;
- message.set_f1(5);
- message.set_f2("boo");
- EXPECT_THROW_WITH_SUBSTRING(
- ParseRows(message, GetConfigWithVariant(), GetSchemaWithVariant()),
- "multiple entries for oneof field \"<root>.a\"");
- }
-}
-
-TEST_F(TProtobufFormatRuntimeErrors, ParseStruct)
-{
- {
- SCOPED_TRACE("Optional submessage missing");
- TMessageWithStruct message;
- auto collector = ParseRows(message, GetConfigWithStruct(), GetSchemaWithStruct(/* optional */ true));
- EXPECT_FALSE(collector.FindRowValue(0, "a"));
- }
- {
- SCOPED_TRACE("Required submessage missing");
- TMessageWithStruct message;
- EXPECT_THROW_WITH_SUBSTRING(
- ParseRows(message, GetConfigWithStruct(), GetSchemaWithStruct()),
- "required field \"<root>.a\" is missing");
- }
- {
- SCOPED_TRACE("All fields missing");
- TMessageWithStruct message;
- message.mutable_a();
- EXPECT_THROW_WITH_SUBSTRING(
- ParseRows(message, GetConfigWithStruct(), GetSchemaWithStruct()),
- "required field \"<root>.a.f1\" is missing");
- }
- {
- SCOPED_TRACE("Second field missing");
- TMessageWithStruct message;
- message.mutable_a()->set_f1(17);
- EXPECT_THROW_WITH_SUBSTRING(
- ParseRows(message, GetConfigWithStruct(), GetSchemaWithStruct()),
- "required field \"<root>.a.f2\" is missing");
- }
- {
- SCOPED_TRACE("All present");
- TMessageWithStruct message;
- message.mutable_a()->set_f1(17);
- message.mutable_a()->set_f2("foobar");
- auto collector = ParseRows(message, GetConfigWithStruct(), GetSchemaWithStruct());
- EXPECT_NODES_EQUAL(
- GetComposite(collector.GetRowValue(0, "a")),
- ConvertToNode(TYsonString(TStringBuf("[17;foobar]"))));
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace
-} // namespace NYT
diff --git a/yt/yt/client/unittests/protobuf_format_ut.proto b/yt/yt/client/unittests/protobuf_format_ut.proto
deleted file mode 100644
index 06258de619..0000000000
--- a/yt/yt/client/unittests/protobuf_format_ut.proto
+++ /dev/null
@@ -1,255 +0,0 @@
-import "yt/yt_proto/yt/formats/extension.proto";
-
-package NYT.NProtobufFormatTest;
-
-enum EEnum
-{
- One = 1;
- Two = 2;
- Three = 3;
-
- MinusFortyTwo = -42;
-
- MinInt32 = -2147483648;
- MaxInt32 = 2147483647;
-}
-
-message TEmbeddedStruct {
- optional float float1 = 1;
- optional string string1 = 2;
-};
-
-message TEmbedded2Message {
- option (NYT.default_field_flags) = SERIALIZATION_YT;
- optional uint64 embedded2_num = 10;
- optional TEmbeddedStruct embedded2_struct = 17;
- repeated string embedded2_repeated = 42;
-};
-
-message TEmbedded1Message {
- option (NYT.default_field_flags) = SERIALIZATION_YT;
- optional TEmbedded2Message t2 = 1 [(NYT.flags) = EMBEDDED];
- oneof variant {
- string str_variant = 101;
- uint64 uint_variant = 102;
- }
- optional uint64 embedded_num = 10; // make intentional field_num collision!
- optional string embedded_extra_field = 11;
-};
-message TEmbeddingMessage {
- optional bytes other_columns_field = 15 [(NYT.flags) = OTHER_COLUMNS];
- optional TEmbedded1Message t1 = 2 [(NYT.flags) = EMBEDDED];
- optional uint64 num = 12;
- optional string extra_field = 13;
-};
-
-message TEmbeddedMessage
-{
- optional string key = 1;
- optional string value = 2;
-}
-
-message TMessageWithStructuredEmbedded
-{
- option (NYT.default_field_flags) = SERIALIZATION_YT;
-
- message TFirstMessage
- {
- option (NYT.default_field_flags) = SERIALIZATION_YT;
-
- optional EEnum enum_field = 1 [(NYT.flags) = ENUM_STRING];
- optional int64 int64_field = 2;
- repeated int64 repeated_int64_field = 3;
- optional TEmbeddedMessage message_field = 4;
- repeated TEmbeddedMessage repeated_message_field = 5;
- optional bytes any_int64_field = 6 [(NYT.flags) = ANY];
- optional bytes any_map_field = 7 [(NYT.flags) = ANY];
- optional int64 optional_int64_field = 8;
- repeated int64 another_repeated_int64_field = 9;
- repeated bytes repeated_optional_any_field = 10 [(NYT.flags) = ANY];
- repeated EEnum packed_repeated_enum_field = 11 [packed=true, (NYT.flags) = ENUM_STRING];
- repeated bool optional_repeated_bool_field = 12;
- oneof oneof_field {
- string oneof_string_field_1 = 101;
- string oneof_string_field = 102;
- TEmbeddedMessage oneof_message_field = 1000;
- }
- oneof optional_oneof_field {
- string optional_oneof_string_field_1 = 201;
- string optional_oneof_string_field = 202;
- TEmbeddedMessage optional_oneof_message_field = 2000;
- }
- map<int64, TEmbeddedMessage> map_field = 13 [(NYT.flags) = MAP_AS_DICT];
- }
-
- message TSecondMessage
- {
- optional int64 one = 2;
- optional int64 two = 500000000;
- optional int64 three = 100500;
- }
-
- optional TFirstMessage first = 1;
- optional TSecondMessage second = 2;
- repeated TEmbeddedMessage repeated_message_field = 3;
- repeated int64 repeated_int64_field = 4;
- optional int64 int64_any_field = 5 [(NYT.column_name) = "any_field"];
-
- optional int32 int32_field = 6 [(NYT.column_name) = "int64_field"];
- optional uint32 uint32_field = 7 [(NYT.column_name) = "uint64_field"];
- optional int64 int64_field = 8 [(NYT.column_name) = "int32_field"];
- optional uint64 uint64_field = 9 [(NYT.column_name) = "uint32_field"];
-
- optional EEnum enum_int_field = 10 [(NYT.flags) = ENUM_INT];
- optional EEnum enum_string_string_field = 11 [(NYT.flags) = ENUM_STRING];
- optional EEnum enum_string_int64_field = 12 [(NYT.flags) = ENUM_STRING];
-
-
- repeated int64 another_repeated_int64_field = 13;
-
- repeated bytes repeated_optional_any_field = 14 [(NYT.flags) = ANY];
-
- optional bytes other_columns_field = 15 [(NYT.flags) = OTHER_COLUMNS];
-
- optional string utf8_field = 16;
-
- repeated int64 packed_repeated_int64_field = 17 [packed=true];
-
- repeated int64 optional_repeated_int64_field = 18;
-
- oneof oneof_field {
- string oneof_string_field_1 = 101;
- string oneof_string_field = 102;
- TEmbeddedMessage oneof_message_field = 1000;
- }
-
- oneof optional_oneof_field {
- string optional_oneof_string_field_1 = 201;
- string optional_oneof_string_field = 202;
- TEmbeddedMessage optional_oneof_message_field = 2000;
- }
-
- map<int64, TEmbeddedMessage> map_field = 19 [(NYT.flags) = MAP_AS_DICT];
-}
-
-message TSeveralTablesMessageFirst
-{
- option (NYT.default_field_flags) = SERIALIZATION_YT;
-
- message TEmbedded
- {
- optional EEnum enum_field = 1 [(NYT.flags) = ENUM_STRING];
- optional int64 int64_field = 2;
- }
- optional TEmbedded embedded = 1;
- repeated int64 repeated_int64_field = 2;
- optional int64 int64_field = 3 [(NYT.column_name) = "any_field"];
-}
-
-message TSeveralTablesMessageSecond
-{
- optional EEnum enum_field = 1 [(NYT.flags) = ENUM_STRING];
- optional int64 int64_field = 2;
-}
-
-message TSeveralTablesMessageThird
-{
- optional string string_field = 1;
-}
-
-message TMessage
-{
- optional double double_field = 1 [(NYT.column_name) = "Double"];
- optional float float_field = 2 [(NYT.column_name) = "Float"];
-
- optional int64 int64_field = 3 [(NYT.column_name) = "Int64"];
- optional uint64 uint64_field = 4 [(NYT.column_name) = "UInt64"];
- optional sint64 sint64_field = 5 [(NYT.column_name) = "SInt64"];
- optional fixed64 fixed64_field = 6 [(NYT.column_name) = "Fixed64"];
- optional sfixed64 sfixed64_field = 7 [(NYT.column_name) = "SFixed64"];
-
- optional int32 int32_field = 8 [(NYT.column_name) = "Int32"];
- optional uint32 uint32_field = 9 [(NYT.column_name) = "UInt32"];
- optional sint32 sint32_field = 10 [(NYT.column_name) = "SInt32"];
- optional fixed32 fixed32_field = 11 [(NYT.column_name) = "Fixed32"];
- optional sfixed32 sfixed32_field = 12 [(NYT.column_name) = "SFixed32"];
-
- optional bool bool_field = 13 [(NYT.column_name) = "Bool"];
- optional string string_field = 14 [(NYT.column_name) = "String"];
- optional bytes bytes_field = 15 [(NYT.column_name) = "Bytes"];
-
- optional EEnum enum_field = 16 [(NYT.column_name) = "Enum", (NYT.flags) = ENUM_STRING];
- optional TEmbeddedMessage message_field = 17 [(NYT.column_name) = "Message"];
-
- optional bytes any_field_with_map = 18 [(NYT.column_name) = "AnyWithMap", (NYT.flags) = ANY];
- optional bytes any_field_with_int64 = 19 [(NYT.column_name) = "AnyWithInt64", (NYT.flags) = ANY];
- optional bytes any_field_with_string = 20 [(NYT.column_name) = "AnyWithString", (NYT.flags) = ANY];
- optional bytes other_columns_field = 21 [(NYT.flags) = OTHER_COLUMNS];
-
- optional int64 missing_int64_field = 22 [(NYT.column_name) = "MissingInt64"];
-}
-
-message TCompatMessage
-{
- message TEmbedded
- {
- optional string x = 1;
- optional string y = 2;
- }
-
- oneof a {
- int64 f1 = 1;
- string f2 = 101;
- }
- optional TEmbedded b = 2;
-}
-
-message TMessageWithOneof
-{
- oneof variant {
- int64 f1 = 1;
- string f2 = 2;
- }
-}
-
-message TMessageWithStruct
-{
- message TStruct
- {
- optional int64 f1 = 1;
- optional string f2 = 2;
- }
- optional TStruct a = 1;
-}
-
-message TOtherColumnsMessage
-{
- optional bytes other_columns_field = 1 [(NYT.flags) = OTHER_COLUMNS];
-}
-
-message TEnumCompat {
- option (NYT.default_field_flags) = SERIALIZATION_YT;
- option (NYT.default_field_flags) = ENUM_SKIP_UNKNOWN_VALUES;
-
- enum ECompatEnum {
- One = 1;
- Two = 2;
- Three = 3;
- }
-
-
- message TStruct
- {
- optional ECompatEnum optional_enum = 1;
- required ECompatEnum required_enum = 2;
- repeated ECompatEnum repeated_enum = 3;
- repeated ECompatEnum packed_repeated_enum = 4 [packed=true, (NYT.flags) = ENUM_STRING];
- }
-
- optional ECompatEnum optional_enum = 1;
- required ECompatEnum required_enum = 2;
- repeated ECompatEnum repeated_enum = 3;
- repeated ECompatEnum packed_repeated_enum = 4 [packed=true, (NYT.flags) = ENUM_STRING];
-
- optional TStruct inner = 100;
-}
diff --git a/yt/yt/client/unittests/row_helpers.cpp b/yt/yt/client/unittests/row_helpers.cpp
deleted file mode 100644
index d28628c5ab..0000000000
--- a/yt/yt/client/unittests/row_helpers.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-#include "row_helpers.h"
-
-#include <yt/yt/core/yson/string.h>
-#include <yt/yt/core/ytree/convert.h>
-
-namespace NYT {
-
-using namespace NTableClient;
-
-////////////////////////////////////////////////////////////////////////////////
-
-static void EnsureTypesMatch(EValueType expected, EValueType actual)
-{
- if (expected != actual) {
- THROW_ERROR_EXCEPTION("Unexpected type of TUnversionedValue: expected %Qlv, actual %Qlv",
- expected,
- actual);
- }
-}
-
-i64 GetInt64(const TUnversionedValue& row)
-{
- EnsureTypesMatch(EValueType::Int64, row.Type);
- return row.Data.Int64;
-}
-
-ui64 GetUint64(const TUnversionedValue& row)
-{
- EnsureTypesMatch(EValueType::Uint64, row.Type);
- return row.Data.Uint64;
-}
-
-double GetDouble(const NTableClient::TUnversionedValue& row)
-{
- EnsureTypesMatch(EValueType::Double, row.Type);
- return row.Data.Double;
-}
-
-bool GetBoolean(const TUnversionedValue& row)
-{
- EnsureTypesMatch(EValueType::Boolean, row.Type);
- return row.Data.Boolean;
-}
-
-TString GetString(const TUnversionedValue& row)
-{
- EnsureTypesMatch(EValueType::String, row.Type);
- return row.AsString();
-}
-
-NYTree::INodePtr GetAny(const NTableClient::TUnversionedValue& row)
-{
- EnsureTypesMatch(EValueType::Any, row.Type);
- return NYTree::ConvertToNode(NYson::TYsonString(row.AsString()));
-}
-
-NYTree::INodePtr GetComposite(const NTableClient::TUnversionedValue& row)
-{
- EnsureTypesMatch(EValueType::Composite, row.Type);
- return NYTree::ConvertToNode(NYson::TYsonString(row.AsString()));
-}
-
-bool IsNull(const NTableClient::TUnversionedValue& row)
-{
- return row.Type == EValueType::Null;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT
diff --git a/yt/yt/client/unittests/row_helpers.h b/yt/yt/client/unittests/row_helpers.h
deleted file mode 100644
index 4a3fbd854f..0000000000
--- a/yt/yt/client/unittests/row_helpers.h
+++ /dev/null
@@ -1,111 +0,0 @@
-#pragma once
-
-#include <yt/yt/client/table_client/unversioned_row.h>
-#include <yt/yt/client/table_client/name_table.h>
-#include <yt/yt/client/table_client/schema.h>
-#include <yt/yt/client/table_client/value_consumer.h>
-
-#include <vector>
-
-namespace NYT {
-
-////////////////////////////////////////////////////////////////////////////////
-
-class TCollectingValueConsumer
- : public NTableClient::IValueConsumer
-{
-public:
- explicit TCollectingValueConsumer(NTableClient::TTableSchemaPtr schema = New<NTableClient::TTableSchema>())
- : Schema_(std::move(schema))
- { }
-
- explicit TCollectingValueConsumer(NTableClient::TNameTablePtr nameTable, NTableClient::TTableSchemaPtr schema = New<NTableClient::TTableSchema>())
- : Schema_(std::move(schema))
- , NameTable_(std::move(nameTable))
- { }
-
- const NTableClient::TNameTablePtr& GetNameTable() const override
- {
- return NameTable_;
- }
-
- const NTableClient::TTableSchemaPtr& GetSchema() const override
- {
- return Schema_;
- }
-
- bool GetAllowUnknownColumns() const override
- {
- return true;
- }
-
- void OnBeginRow() override
- { }
-
- void OnValue(const NTableClient::TUnversionedValue& value) override
- {
- Builder_.AddValue(value);
- }
-
- void OnEndRow() override
- {
- RowList_.emplace_back(Builder_.FinishRow());
- }
-
- NTableClient::TUnversionedRow GetRow(size_t rowIndex)
- {
- return RowList_.at(rowIndex);
- }
-
- std::optional<NTableClient::TUnversionedValue> FindRowValue(size_t rowIndex, TStringBuf columnName) const
- {
- NTableClient::TUnversionedRow row = RowList_.at(rowIndex);
- auto id = GetNameTable()->GetIdOrThrow(columnName);
-
- for (const auto& value : row) {
- if (value.Id == id) {
- return value;
- }
- }
- return std::nullopt;
- }
-
- NTableClient::TUnversionedValue GetRowValue(size_t rowIndex, TStringBuf columnName) const
- {
- auto row = FindRowValue(rowIndex, columnName);
- if (!row) {
- THROW_ERROR_EXCEPTION("Cannot find column %Qv", columnName);
- }
- return *row;
- }
-
- size_t Size() const
- {
- return RowList_.size();
- }
-
- const std::vector<NTableClient::TUnversionedOwningRow>& GetRowList() const {
- return RowList_;
- }
-
-private:
- const NTableClient::TTableSchemaPtr Schema_;
- const NTableClient::TNameTablePtr NameTable_ = New<NTableClient::TNameTable>();
- NTableClient::TUnversionedOwningRowBuilder Builder_;
- std::vector<NTableClient::TUnversionedOwningRow> RowList_;
-};
-
-////////////////////////////////////////////////////////////////////////////////
-
-i64 GetInt64(const NTableClient::TUnversionedValue& row);
-ui64 GetUint64(const NTableClient::TUnversionedValue& row);
-double GetDouble(const NTableClient::TUnversionedValue& row);
-bool GetBoolean(const NTableClient::TUnversionedValue& row);
-TString GetString(const NTableClient::TUnversionedValue& row);
-NYTree::INodePtr GetAny(const NTableClient::TUnversionedValue& row);
-NYTree::INodePtr GetComposite(const NTableClient::TUnversionedValue& row);
-bool IsNull(const NTableClient::TUnversionedValue& row);
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT
diff --git a/yt/yt/client/unittests/schema_ut.cpp b/yt/yt/client/unittests/schema_ut.cpp
index 3482b866b5..2b5f2d5e8a 100644
--- a/yt/yt/client/unittests/schema_ut.cpp
+++ b/yt/yt/client/unittests/schema_ut.cpp
@@ -1,4 +1,4 @@
-#include "logical_type_shortcuts.h"
+#include <yt/yt/library/logical_type_shortcuts/logical_type_shortcuts.h>
#include "yt/yt/client/table_client/logical_type.h"
#include <yt/yt/core/test_framework/framework.h>
diff --git a/yt/yt/client/unittests/schemaful_dsv_parser_ut.cpp b/yt/yt/client/unittests/schemaful_dsv_parser_ut.cpp
deleted file mode 100644
index 000ae5f635..0000000000
--- a/yt/yt/client/unittests/schemaful_dsv_parser_ut.cpp
+++ /dev/null
@@ -1,259 +0,0 @@
-#include <yt/yt/core/test_framework/framework.h>
-
-#include <yt/yt/core/test_framework/yson_consumer_mock.h>
-
-#include <yt/yt/client/formats/schemaful_dsv_parser.h>
-
-#include <yt/yt/core/yson/null_consumer.h>
-
-namespace NYT::NFormats {
-namespace {
-
-using namespace NYson;
-
-using ::testing::InSequence;
-using ::testing::StrictMock;
-using ::testing::NiceMock;
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(TSchemafulDsvParserTest, Simple)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("a"));
- EXPECT_CALL(Mock, OnStringScalar("5"));
- EXPECT_CALL(Mock, OnKeyedItem("b"));
- EXPECT_CALL(Mock, OnStringScalar("6"));
- EXPECT_CALL(Mock, OnEndMap());
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("a"));
- EXPECT_CALL(Mock, OnStringScalar("100"));
- EXPECT_CALL(Mock, OnKeyedItem("b"));
- EXPECT_CALL(Mock, OnStringScalar("max\tignat"));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input =
- "5\t6\n"
- "100\tmax\\tignat\n";
-
- auto config = New<TSchemafulDsvFormatConfig>();
- config->Columns = std::vector<TString>();
- config->Columns->push_back("a");
- config->Columns->push_back("b");
-
- ParseSchemafulDsv(input, &Mock, config);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(TSchemafulDsvParserTest, TableIndex)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginAttributes());
- EXPECT_CALL(Mock, OnKeyedItem("table_index"));
- EXPECT_CALL(Mock, OnInt64Scalar(1));
- EXPECT_CALL(Mock, OnEndAttributes());
- EXPECT_CALL(Mock, OnEntity());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("a"));
- EXPECT_CALL(Mock, OnStringScalar("x"));
- EXPECT_CALL(Mock, OnEndMap());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginAttributes());
- EXPECT_CALL(Mock, OnKeyedItem("table_index"));
- EXPECT_CALL(Mock, OnInt64Scalar(0));
- EXPECT_CALL(Mock, OnEndAttributes());
- EXPECT_CALL(Mock, OnEntity());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("a"));
- EXPECT_CALL(Mock, OnStringScalar("y"));
- EXPECT_CALL(Mock, OnEndMap());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("a"));
- EXPECT_CALL(Mock, OnStringScalar("z"));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input =
- "1\tx\n"
- "0\ty\n"
- "0\tz\n";
-
- auto config = New<TSchemafulDsvFormatConfig>();
- config->Columns = std::vector<TString>();
- config->Columns->push_back("a");
- config->EnableTableIndex = true;
-
- ParseSchemafulDsv(input, &Mock, config);
-}
-
-TEST(TSchemafulDsvParserTest, TooManyRows)
-{
- TString input = "5\t6\n";
-
- auto config = New<TSchemafulDsvFormatConfig>();
- config->Columns = {"a"};
-
- EXPECT_THROW({ ParseSchemafulDsv(input, GetNullYsonConsumer(), config); }, std::exception);
-}
-
-TEST(TSchemafulDsvParserTest, SpecialSymbols)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- auto value = TString("6\0", 2);
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("a"));
- EXPECT_CALL(Mock, OnStringScalar("5\r"));
- EXPECT_CALL(Mock, OnKeyedItem("b"));
- EXPECT_CALL(Mock, OnStringScalar(value));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input("5\r\t6\0\n", 6);
-
- auto config = New<TSchemafulDsvFormatConfig>();
- config->Columns = std::vector<TString>();
- config->Columns->push_back("a");
- config->Columns->push_back("b");
-
- ParseSchemafulDsv(input, &Mock, config);
-}
-
-TEST(TSchemafulDsvParserTest, EnabledEscaping)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- auto value = TString("6\0", 2);
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("a"));
- EXPECT_CALL(Mock, OnStringScalar("5\r\r"));
- EXPECT_CALL(Mock, OnKeyedItem("b"));
- EXPECT_CALL(Mock, OnStringScalar(value));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input("5\r\\r\t6\0\n", 8);
-
- auto config = New<TSchemafulDsvFormatConfig>();
- config->Columns = std::vector<TString>();
- config->Columns->push_back("a");
- config->Columns->push_back("b");
- config->EnableEscaping = true;
-
- ParseSchemafulDsv(input, &Mock, config);
-}
-
-TEST(TSchemafulDsvParserTest, DisabledEscaping)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- auto value = TString("6\0", 2);
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("a"));
- EXPECT_CALL(Mock, OnStringScalar("5\r\\r"));
- EXPECT_CALL(Mock, OnKeyedItem("b"));
- EXPECT_CALL(Mock, OnStringScalar(value));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input("5\r\\r\t6\0\n", 8);
-
- auto config = New<TSchemafulDsvFormatConfig>();
- config->Columns = std::vector<TString>();
- config->Columns->push_back("a");
- config->Columns->push_back("b");
- config->EnableEscaping = false;
-
- ParseSchemafulDsv(input, &Mock, config);
-}
-
-TEST(TSchemafulDsvParserTest, ColumnsNamesHeader)
-{
- TString input("a\tb\n1\t2\n");
-
- auto config = New<TSchemafulDsvFormatConfig>();
- config->Columns = std::vector<TString>();
- config->Columns->push_back("a");
- config->Columns->push_back("b");
- config->EnableColumnNamesHeader = true;
-
- EXPECT_THROW(ParseSchemafulDsv(input, GetNullYsonConsumer(), config), std::exception);
-}
-
-TEST(TSchemafulDsvParserTest, MissingValueModePrintSentinel)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- TString input = "x\t\tz\n";
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("a"));
- EXPECT_CALL(Mock, OnStringScalar("x"));
- EXPECT_CALL(Mock, OnKeyedItem("b"));
- EXPECT_CALL(Mock, OnStringScalar(""));
- EXPECT_CALL(Mock, OnKeyedItem("c"));
- EXPECT_CALL(Mock, OnStringScalar("z"));
- EXPECT_CALL(Mock, OnEndMap());
-
- auto config = New<TSchemafulDsvFormatConfig>();
- config->Columns = {"a", "b", "c"};
- // By default missing_value_mode = fail and no sentinel values are used,
- // i. e. there is no way to represent YSON entity with this format.
-
- ParseSchemafulDsv(input, &Mock, config);
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("a"));
- EXPECT_CALL(Mock, OnStringScalar("x"));
- EXPECT_CALL(Mock, OnKeyedItem("b"));
- EXPECT_CALL(Mock, OnEntity());
- EXPECT_CALL(Mock, OnKeyedItem("c"));
- EXPECT_CALL(Mock, OnStringScalar("z"));
- EXPECT_CALL(Mock, OnEndMap());
-
- config->MissingValueMode = EMissingSchemafulDsvValueMode::PrintSentinel;
- // By default missing_value_sentinel = "".
-
- ParseSchemafulDsv(input, &Mock, config);
-
- input = "null\tNULL\t\n";
-
- config->MissingValueSentinel = "NULL";
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("a"));
- EXPECT_CALL(Mock, OnStringScalar("null"));
- EXPECT_CALL(Mock, OnKeyedItem("b"));
- EXPECT_CALL(Mock, OnEntity());
- EXPECT_CALL(Mock, OnKeyedItem("c"));
- EXPECT_CALL(Mock, OnStringScalar(""));
- EXPECT_CALL(Mock, OnEndMap());
-
- ParseSchemafulDsv(input, &Mock, config);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace
-} // namespace NYT::NFormats
diff --git a/yt/yt/client/unittests/schemaful_dsv_writer_ut.cpp b/yt/yt/client/unittests/schemaful_dsv_writer_ut.cpp
deleted file mode 100644
index 90a3af0dcb..0000000000
--- a/yt/yt/client/unittests/schemaful_dsv_writer_ut.cpp
+++ /dev/null
@@ -1,344 +0,0 @@
-#include <yt/yt/core/test_framework/framework.h>
-#include "format_writer_ut.h"
-
-#include <yt/yt/client/formats/schemaful_dsv_writer.h>
-#include <yt/yt/client/formats/format.h>
-
-#include <yt/yt/client/table_client/name_table.h>
-
-#include <yt/yt/core/concurrency/async_stream.h>
-
-#include <limits>
-
-namespace NYT::NFormats {
-namespace {
-
-////////////////////////////////////////////////////////////////////////////////
-
-using namespace NYTree;
-using namespace NYson;
-using namespace NConcurrency;
-using namespace NTableClient;
-
-class TSchemalessWriterForSchemafulDsvTest
- : public ::testing::Test
-{
-protected:
- TNameTablePtr NameTable_;
- int KeyAId_;
- int KeyBId_;
- int KeyCId_;
- int KeyDId_;
- int TableIndexId_;
- int RangeIndexId_;
- int RowIndexId_;
- TSchemafulDsvFormatConfigPtr Config_;
-
- ISchemalessFormatWriterPtr Writer_;
-
- TStringStream OutputStream_;
-
- TSchemalessWriterForSchemafulDsvTest() {
- NameTable_ = New<TNameTable>();
- KeyAId_ = NameTable_->RegisterName("column_a");
- KeyBId_ = NameTable_->RegisterName("column_b");
- KeyCId_ = NameTable_->RegisterName("column_c");
- KeyDId_ = NameTable_->RegisterName("column_d");
- TableIndexId_ = NameTable_->RegisterName(TableIndexColumnName);
- RowIndexId_ = NameTable_->RegisterName(RowIndexColumnName);
- RangeIndexId_ = NameTable_->RegisterName(RangeIndexColumnName);
-
- Config_ = New<TSchemafulDsvFormatConfig>();
- }
-
- void CreateStandardWriter() {
- auto controlAttributesConfig = New<TControlAttributesConfig>();
- controlAttributesConfig->EnableTableIndex = Config_->EnableTableIndex;
- Writer_ = CreateSchemalessWriterForSchemafulDsv(
- Config_,
- NameTable_,
- CreateAsyncAdapter(static_cast<IOutputStream*>(&OutputStream_)),
- false, // enableContextSaving
- controlAttributesConfig,
- 0 /* keyColumnCount */);
- }
-};
-
-TEST_F(TSchemalessWriterForSchemafulDsvTest, Simple)
-{
- Config_->Columns = {"column_b", "column_c", "column_a"};
- CreateStandardWriter();
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedStringValue("value_a", KeyAId_));
- row1.AddValue(MakeUnversionedInt64Value(-42, KeyBId_));
- row1.AddValue(MakeUnversionedBooleanValue(true, KeyCId_));
- row1.AddValue(MakeUnversionedStringValue("garbage", KeyDId_));
-
- // Ignore system columns.
- row1.AddValue(MakeUnversionedInt64Value(2, TableIndexId_));
- row1.AddValue(MakeUnversionedInt64Value(42, RowIndexId_));
- row1.AddValue(MakeUnversionedInt64Value(1, RangeIndexId_));
-
- TUnversionedRowBuilder row2;
- // The order is reversed.
- row2.AddValue(MakeUnversionedStringValue("value_c", KeyCId_));
- row2.AddValue(MakeUnversionedBooleanValue(false, KeyBId_));
- row2.AddValue(MakeUnversionedInt64Value(23, KeyAId_));
-
- std::vector<TUnversionedRow> rows = {row1.GetRow(), row2.GetRow()};
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString expectedOutput =
- "-42\ttrue\tvalue_a\n"
- "false\tvalue_c\t23\n";
- EXPECT_EQ(expectedOutput, OutputStream_.Str());
-}
-
-// This test shows the actual behavior of writer. It is OK to change it in the future. :)
-TEST_F(TSchemalessWriterForSchemafulDsvTest, TrickyDoubleRepresentations)
-{
- Config_->Columns = {"column_a", "column_b", "column_c", "column_d"};
- CreateStandardWriter();
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedDoubleValue(1.234567890123456, KeyAId_));
- row1.AddValue(MakeUnversionedDoubleValue(42, KeyBId_));
- row1.AddValue(MakeUnversionedDoubleValue(1e300, KeyCId_));
- row1.AddValue(MakeUnversionedDoubleValue(-1e-300, KeyDId_));
-
- std::vector<TUnversionedRow> rows = {row1.GetRow()};
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
- TString expectedOutput = "1.234567890123456\t42.\t1e+300\t-1e-300\n";
- EXPECT_EQ(expectedOutput, OutputStream_.Str());
-}
-
-TEST_F(TSchemalessWriterForSchemafulDsvTest, IntegralTypeRepresentations)
-{
- Config_->Columns = {"column_a", "column_b", "column_c", "column_d"};
- CreateStandardWriter();
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedInt64Value(0LL, KeyAId_));
- row1.AddValue(MakeUnversionedInt64Value(-1LL, KeyBId_));
- row1.AddValue(MakeUnversionedInt64Value(1LL, KeyCId_));
- row1.AddValue(MakeUnversionedInt64Value(99LL, KeyDId_));
-
- TUnversionedRowBuilder row2;
- row2.AddValue(MakeUnversionedInt64Value(123LL, KeyAId_));
- row2.AddValue(MakeUnversionedInt64Value(-123LL, KeyBId_));
- row2.AddValue(MakeUnversionedInt64Value(1234LL, KeyCId_));
- row2.AddValue(MakeUnversionedInt64Value(-1234LL, KeyDId_));
-
- TUnversionedRowBuilder row3;
- row3.AddValue(MakeUnversionedUint64Value(0ULL, KeyAId_));
- row3.AddValue(MakeUnversionedUint64Value(98ULL, KeyBId_));
- row3.AddValue(MakeUnversionedUint64Value(987ULL, KeyCId_));
- row3.AddValue(MakeUnversionedUint64Value(9876ULL, KeyDId_));
-
- TUnversionedRowBuilder row4;
- row4.AddValue(MakeUnversionedInt64Value(std::numeric_limits<i64>::max(), KeyAId_));
- row4.AddValue(MakeUnversionedInt64Value(std::numeric_limits<i64>::min(), KeyBId_));
- row4.AddValue(MakeUnversionedInt64Value(std::numeric_limits<i64>::min() + 1LL, KeyCId_));
- row4.AddValue(MakeUnversionedUint64Value(std::numeric_limits<ui64>::max(), KeyDId_));
-
- std::vector<TUnversionedRow> rows =
- {row1.GetRow(), row2.GetRow(), row3.GetRow(), row4.GetRow()};
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
- TString expectedOutput =
- "0\t-1\t1\t99\n"
- "123\t-123\t1234\t-1234\n"
- "0\t98\t987\t9876\n"
- "9223372036854775807\t-9223372036854775808\t-9223372036854775807\t18446744073709551615\n";
- EXPECT_EQ(expectedOutput, OutputStream_.Str());
-}
-
-TEST_F(TSchemalessWriterForSchemafulDsvTest, EmptyColumnList)
-{
- Config_->Columns = std::vector<TString>();
- CreateStandardWriter();
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedInt64Value(0LL, KeyAId_));
-
-
- std::vector<TUnversionedRow> rows = { row1.GetRow() };
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
- TString expectedOutput = "\n";
- EXPECT_EQ(expectedOutput, OutputStream_.Str());
-}
-
-TEST_F(TSchemalessWriterForSchemafulDsvTest, MissingValueMode)
-{
- Config_->Columns = {"column_a", "column_b", "column_c"};
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedStringValue("Value1A", KeyAId_));
- row1.AddValue(MakeUnversionedStringValue("Value1B", KeyBId_));
- row1.AddValue(MakeUnversionedStringValue("Value1C", KeyCId_));
-
- TUnversionedRowBuilder row2;
- row2.AddValue(MakeUnversionedStringValue("Value2A", KeyAId_));
- row2.AddValue(MakeUnversionedStringValue("Value2C", KeyCId_));
-
- TUnversionedRowBuilder row3;
- row3.AddValue(MakeUnversionedStringValue("Value3A", KeyAId_));
- row3.AddValue(MakeUnversionedStringValue("Value3B", KeyBId_));
- row3.AddValue(MakeUnversionedStringValue("Value3C", KeyCId_));
-
- std::vector<TUnversionedRow> rows =
- {row1.GetRow(), row2.GetRow(), row3.GetRow()};
-
- {
- Config_->MissingValueMode = EMissingSchemafulDsvValueMode::SkipRow;
- CreateStandardWriter();
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
- TString expectedOutput =
- "Value1A\tValue1B\tValue1C\n"
- "Value3A\tValue3B\tValue3C\n";
- EXPECT_EQ(expectedOutput, OutputStream_.Str());
- OutputStream_.Clear();
- }
-
- {
- Config_->MissingValueMode = EMissingSchemafulDsvValueMode::Fail;
- CreateStandardWriter();
- EXPECT_EQ(false, Writer_->Write(rows));
- EXPECT_THROW(Writer_->Close()
- .Get()
- .ThrowOnError(), std::exception);
- OutputStream_.Clear();
- }
-
- {
- Config_->MissingValueMode = EMissingSchemafulDsvValueMode::PrintSentinel;
- Config_->MissingValueSentinel = "~";
- CreateStandardWriter();
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
- TString expectedOutput =
- "Value1A\tValue1B\tValue1C\n"
- "Value2A\t~\tValue2C\n"
- "Value3A\tValue3B\tValue3C\n";
- EXPECT_EQ(expectedOutput, OutputStream_.Str());
- OutputStream_.Clear();
- }
-}
-
-TEST_F(TSchemalessWriterForSchemafulDsvTest, NameTableExpansion)
-{
- Config_->Columns = {"Column1"};
- Config_->MissingValueMode = {EMissingSchemafulDsvValueMode::PrintSentinel};
- CreateStandardWriter();
- TestNameTableExpansion(Writer_, NameTable_);
-}
-
-TEST_F(TSchemalessWriterForSchemafulDsvTest, TableIndex)
-{
- Config_->Columns = {"column_a", "column_b", "column_c", "column_d"};
- Config_->EnableTableIndex = true;
- CreateStandardWriter();
-
- TUnversionedRowBuilder row0;
- row0.AddValue(MakeUnversionedInt64Value(0LL, KeyAId_));
- row0.AddValue(MakeUnversionedInt64Value(1LL, KeyBId_));
- row0.AddValue(MakeUnversionedInt64Value(2LL, KeyCId_));
- row0.AddValue(MakeUnversionedInt64Value(3LL, KeyDId_));
-
- // It's necessary to specify a column corresponding to the table index
- // when enable_table_index = true.
- EXPECT_EQ(false, Writer_->Write(std::vector<TUnversionedRow>{row0.GetRow()}));
-
- CreateStandardWriter();
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedInt64Value(42LL, TableIndexId_));
- row1.AddValue(MakeUnversionedInt64Value(0LL, KeyAId_));
- row1.AddValue(MakeUnversionedInt64Value(1LL, KeyBId_));
- row1.AddValue(MakeUnversionedInt64Value(2LL, KeyCId_));
- row1.AddValue(MakeUnversionedInt64Value(3LL, KeyDId_));
-
-
- TUnversionedRowBuilder row2;
- row2.AddValue(MakeUnversionedInt64Value(42LL, TableIndexId_));
- row2.AddValue(MakeUnversionedInt64Value(4LL, KeyAId_));
- row2.AddValue(MakeUnversionedInt64Value(5LL, KeyBId_));
- row2.AddValue(MakeUnversionedInt64Value(6LL, KeyCId_));
- row2.AddValue(MakeUnversionedInt64Value(7LL, KeyDId_));
-
- EXPECT_EQ(true, Writer_->Write(std::vector<TUnversionedRow>{row1.GetRow(), row2.GetRow()}));
-
- TUnversionedRowBuilder row3;
- row3.AddValue(MakeUnversionedInt64Value(23LL, TableIndexId_));
- row3.AddValue(MakeUnversionedUint64Value(8LL, KeyAId_));
- row3.AddValue(MakeUnversionedUint64Value(9LL, KeyBId_));
- row3.AddValue(MakeUnversionedUint64Value(10LL, KeyCId_));
- row3.AddValue(MakeUnversionedUint64Value(11ULL, KeyDId_));
-
- EXPECT_EQ(true, Writer_->Write(std::vector<TUnversionedRow>{row3.GetRow()}));
-
- Writer_->Close()
- .Get()
- .ThrowOnError();
- TString expectedOutput =
- "42\t0\t1\t2\t3\n"
- "42\t4\t5\t6\t7\n"
- "23\t8\t9\t10\t11\n";
- EXPECT_EQ(expectedOutput, OutputStream_.Str());
-}
-
-
-TEST_F(TSchemalessWriterForSchemafulDsvTest, ValidateDuplicateNames)
-{
- Config_->Columns = {"column_a", "column_b", "column_a"};
- Config_->EnableTableIndex = true;
- EXPECT_THROW(CreateStandardWriter(), TErrorException);
-}
-
-TEST_F(TSchemalessWriterForSchemafulDsvTest, ColumnsHeader)
-{
- Config_->Columns = {"column_b", "column_c", "column_a"};
- Config_->EnableColumnNamesHeader = true;
- CreateStandardWriter();
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedStringValue("value_a", KeyAId_));
- row1.AddValue(MakeUnversionedInt64Value(-42, KeyBId_));
- row1.AddValue(MakeUnversionedBooleanValue(true, KeyCId_));
- std::vector<TUnversionedRow> rows = {row1.GetRow()};
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString expectedOutput =
- "column_b\tcolumn_c\tcolumn_a\n"
- "-42\ttrue\tvalue_a\n";
- EXPECT_EQ(expectedOutput, OutputStream_.Str());
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace
-} // namespace NYT::NFormats
diff --git a/yt/yt/client/unittests/skiff_format_ut.cpp b/yt/yt/client/unittests/skiff_format_ut.cpp
deleted file mode 100644
index 4878b7f673..0000000000
--- a/yt/yt/client/unittests/skiff_format_ut.cpp
+++ /dev/null
@@ -1,3006 +0,0 @@
-#include <yt/yt/core/test_framework/framework.h>
-
-#include "logical_type_shortcuts.h"
-#include "value_examples.h"
-#include "row_helpers.h"
-#include "yson_helpers.h"
-
-#include <yt/yt/client/formats/config.h>
-#include <yt/yt/client/formats/parser.h>
-#include <yt/yt/client/formats/skiff_parser.h>
-#include <yt/yt/client/formats/skiff_writer.h>
-#include <yt/yt/client/formats/format.h>
-#include <yt/yt/client/table_client/name_table.h>
-#include <yt/yt/client/table_client/validate_logical_type.h>
-
-#include <yt/yt/library/named_value/named_value.h>
-#include <yt/yt/library/skiff_ext/schema_match.h>
-
-#include <yt/yt/core/yson/string.h>
-#include <yt/yt/core/ytree/convert.h>
-#include <yt/yt/core/ytree/fluent.h>
-#include <yt/yt/core/ytree/tree_visitor.h>
-
-#include <library/cpp/skiff/skiff.h>
-#include <library/cpp/skiff/skiff_schema.h>
-
-#include <util/stream/null.h>
-#include <util/string/hex.h>
-
-namespace NYT {
-
-namespace {
-
-using namespace NFormats;
-using namespace NNamedValue;
-using namespace NSkiff;
-using namespace NSkiffExt;
-using namespace NTableClient;
-using namespace NYTree;
-using namespace NYson;
-
-////////////////////////////////////////////////////////////////////////////////
-
-TString ConvertToSkiffSchemaShortDebugString(INodePtr node)
-{
- auto skiffFormatConfig = ConvertTo<TSkiffFormatConfigPtr>(std::move(node));
- auto skiffSchemas = ParseSkiffSchemas(skiffFormatConfig->SkiffSchemaRegistry, skiffFormatConfig->TableSkiffSchemas);
- TStringStream result;
- result << '{';
- for (const auto& schema : skiffSchemas) {
- result << GetShortDebugString(schema);
- result << ',';
- }
- result << '}';
- return result.Str();
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TString ConvertToYsonTextStringStable(const INodePtr& node)
-{
- TStringStream out;
- TYsonWriter writer(&out, EYsonFormat::Text);
- VisitTree(node, &writer, true, TAttributeFilter());
- writer.Flush();
- return out.Str();
-}
-
-TTableSchemaPtr CreateSingleValueTableSchema(const TLogicalTypePtr& logicalType)
-{
- std::vector<TColumnSchema> columns;
- if (logicalType) {
- columns.emplace_back("value", logicalType);
-
- }
- auto strict = static_cast<bool>(logicalType);
- return New<TTableSchema>(columns, strict);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(TSkiffSchemaParse, TestAllowedTypes)
-{
- EXPECT_EQ(
- "{uint64,}",
-
- ConvertToSkiffSchemaShortDebugString(
- BuildYsonNodeFluently()
- .BeginMap()
- .Item("table_skiff_schemas")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("wire_type")
- .Value("uint64")
- .EndMap()
- .EndList()
- .EndMap()));
-
- EXPECT_EQ(
- "{string32,}",
-
- ConvertToSkiffSchemaShortDebugString(
- BuildYsonNodeFluently()
- .BeginMap()
- .Item("table_skiff_schemas")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("wire_type")
- .Value("string32")
- .EndMap()
- .EndList()
- .EndMap()));
-
- EXPECT_EQ(
- "{variant8<string32;int64;>,}",
-
- ConvertToSkiffSchemaShortDebugString(
- BuildYsonNodeFluently()
- .BeginMap()
- .Item("table_skiff_schemas")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("wire_type")
- .Value("variant8")
- .Item("children")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("wire_type")
- .Value("string32")
- .EndMap()
- .Item()
- .BeginMap()
- .Item("wire_type")
- .Value("int64")
- .EndMap()
- .EndList()
- .EndMap()
- .EndList()
- .EndMap()));
-
- EXPECT_EQ(
- "{variant8<int64;string32;>,}",
-
- ConvertToSkiffSchemaShortDebugString(
- BuildYsonNodeFluently()
- .BeginMap()
- .Item("skiff_schema_registry")
- .BeginMap()
- .Item("item1")
- .BeginMap()
- .Item("wire_type")
- .Value("int64")
- .EndMap()
- .Item("item2")
- .BeginMap()
- .Item("wire_type")
- .Value("string32")
- .EndMap()
- .EndMap()
- .Item("table_skiff_schemas")
- .BeginList()
- .Item()
- .BeginMap()
- .Item("wire_type")
- .Value("variant8")
- .Item("children")
- .BeginList()
- .Item().Value("$item1")
- .Item().Value("$item2")
- .EndList()
- .EndMap()
- .EndList()
- .EndMap()));
-}
-
-TEST(TSkiffSchemaParse, TestRecursiveTypesAreDisallowed)
-{
- try {
- ConvertToSkiffSchemaShortDebugString(
- BuildYsonNodeFluently()
- .BeginMap()
- .Item("skiff_schema_registry")
- .BeginMap()
- .Item("item1")
- .BeginMap()
- .Item("wire_type")
- .Value("variant8")
- .Item("children")
- .BeginList()
- .Item().Value("$item1")
- .EndList()
- .EndMap()
- .EndMap()
- .Item("table_skiff_schemas")
- .BeginList()
- .Item().Value("$item1")
- .EndList()
- .EndMap());
- ADD_FAILURE();
- } catch (const std::exception& e) {
- EXPECT_THAT(e.what(), testing::HasSubstr("recursive types are forbidden"));
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(TSkiffSchemaDescription, TestDescriptionDerivation)
-{
- auto schema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Foo"),
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Uint64),
- })->SetName("Bar"),
- });
-
- auto tableDescriptionList = CreateTableDescriptionList({schema}, RangeIndexColumnName, RowIndexColumnName);
- EXPECT_EQ(std::ssize(tableDescriptionList), 1);
- EXPECT_EQ(tableDescriptionList[0].HasOtherColumns, false);
- EXPECT_EQ(tableDescriptionList[0].SparseFieldDescriptionList.empty(), true);
-
- auto denseFieldDescriptionList = tableDescriptionList[0].DenseFieldDescriptionList;
- EXPECT_EQ(std::ssize(denseFieldDescriptionList), 2);
-
- EXPECT_EQ(denseFieldDescriptionList[0].Name(), "Foo");
- EXPECT_EQ(denseFieldDescriptionList[0].ValidatedSimplify(), EWireType::Uint64);
-}
-
-TEST(TSkiffSchemaDescription, TestKeySwitchColumn)
-{
- {
- auto schema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Foo"),
- CreateSimpleTypeSchema(EWireType::Boolean)->SetName("$key_switch"),
- });
-
- auto tableDescriptionList = CreateTableDescriptionList({schema}, RangeIndexColumnName, RowIndexColumnName);
- EXPECT_EQ(std::ssize(tableDescriptionList), 1);
- EXPECT_EQ(tableDescriptionList[0].KeySwitchFieldIndex, std::optional<size_t>(1));
- }
- {
- auto schema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Uint64)->SetName("$key_switch"),
- });
-
- try {
- auto tableDescriptionList = CreateTableDescriptionList({schema}, RangeIndexColumnName, RowIndexColumnName);
- ADD_FAILURE();
- } catch (const std::exception& e) {
- EXPECT_THAT(e.what(), testing::HasSubstr("Column \"$key_switch\" has unexpected Skiff type"));
- }
- }
-}
-
-TEST(TSkiffSchemaDescription, TestDisallowEmptyNames)
-{
- auto schema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Foo"),
- CreateSimpleTypeSchema(EWireType::Int64)->SetName(""),
- });
-
- try {
- CreateTableDescriptionList({schema}, RangeIndexColumnName, RowIndexColumnName);
- ADD_FAILURE();
- } catch (const std::exception& e) {
- EXPECT_THAT(e.what(), testing::HasSubstr("must have a name"));
- }
-}
-
-TEST(TSkiffSchemaDescription, TestWrongRowType)
-{
- auto schema = CreateRepeatedVariant16Schema({
- CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Foo"),
- CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Bar"),
- });
-
- try {
- CreateTableDescriptionList({schema}, RangeIndexColumnName, RowIndexColumnName);
- ADD_FAILURE();
- } catch (const std::exception& e) {
- EXPECT_THAT(e.what(), testing::HasSubstr("Invalid wire type for table row"));
- }
-}
-
-TEST(TSkiffSchemaDescription, TestOtherColumnsOk)
-{
- auto schema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Foo"),
- CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Bar"),
- CreateSimpleTypeSchema(EWireType::Yson32)->SetName("$other_columns"),
- });
-
- auto tableDescriptionList = CreateTableDescriptionList({schema}, RangeIndexColumnName, RowIndexColumnName);
- ASSERT_EQ(std::ssize(tableDescriptionList), 1);
- ASSERT_EQ(tableDescriptionList[0].HasOtherColumns, true);
-}
-
-TEST(TSkiffSchemaDescription, TestOtherColumnsWrongType)
-{
- auto schema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Foo"),
- CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Bar"),
- CreateSimpleTypeSchema(EWireType::Uint64)->SetName("$other_columns"),
- });
-
- try {
- CreateTableDescriptionList({schema}, RangeIndexColumnName, RowIndexColumnName);
- ADD_FAILURE();
- } catch (const std::exception& e) {
- EXPECT_THAT(e.what(), testing::HasSubstr("Invalid wire type for column \"$other_columns\""));
- }
-}
-
-TEST(TSkiffSchemaDescription, TestOtherColumnsWrongPlace)
-{
- auto schema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Foo"),
- CreateSimpleTypeSchema(EWireType::Uint64)->SetName("$other_columns"),
- CreateSimpleTypeSchema(EWireType::Uint64)->SetName("Bar"),
- });
-
- try {
- CreateTableDescriptionList({schema}, RangeIndexColumnName, RowIndexColumnName);
- ADD_FAILURE();
- } catch (const std::exception& e) {
- EXPECT_THAT(e.what(), testing::HasSubstr("Invalid placement of special column \"$other_columns\""));
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-ISchemalessFormatWriterPtr CreateSkiffWriter(
- std::shared_ptr<TSkiffSchema> skiffSchema,
- TNameTablePtr nameTable,
- IOutputStream* outputStream,
- const std::vector<TTableSchemaPtr>& tableSchemaList,
- int keyColumnCount = 0,
- bool enableEndOfStream = false)
-{
- auto controlAttributesConfig = New<TControlAttributesConfig>();
- controlAttributesConfig->EnableKeySwitch = (keyColumnCount > 0);
- controlAttributesConfig->EnableEndOfStream = enableEndOfStream;
- return CreateWriterForSkiff(
- {std::move(skiffSchema)},
- std::move(nameTable),
- tableSchemaList,
- NConcurrency::CreateAsyncAdapter(outputStream),
- false,
- controlAttributesConfig,
- keyColumnCount);
-}
-
-TString TableToSkiff(
- const TLogicalTypePtr& logicalType,
- const std::shared_ptr<TSkiffSchema>& typeSchema,
- const TNamedValue::TValue& value)
-{
- auto schema = CreateSingleValueTableSchema(logicalType);
- auto skiffSchema = CreateTupleSchema({
- typeSchema->SetName("value")
- });
-
- auto nameTable = New<TNameTable>();
-
- TStringStream resultStream;
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {schema});
-
- writer->Write({
- MakeRow(nameTable, {
- {"value", value}
- }).Get(),
- });
- writer->Close()
- .Get()
- .ThrowOnError();
-
- auto result = resultStream.Str();
- if (!TStringBuf(result).StartsWith(TString(2, '\0'))) {
- THROW_ERROR_EXCEPTION("Expected skiff value to start with \\x00\\x00, but prefix is %Qv",
- EscapeC(result.substr(0, 2)));
- }
-
- return result.substr(2);
-}
-
-TNamedValue::TValue SkiffToTable(
- const TLogicalTypePtr& logicalType,
- const std::shared_ptr<TSkiffSchema>& typeSchema,
- const TString& skiffValue)
-{
- auto schema = CreateSingleValueTableSchema(logicalType);
- auto skiffSchema = CreateTupleSchema({
- typeSchema->SetName("value")
- });
- auto nameTable = New<TNameTable>();
-
- TCollectingValueConsumer rowCollector(schema);
- auto parser = CreateParserForSkiff(skiffSchema, &rowCollector);
- parser->Read(TString(2, 0));
- parser->Read(skiffValue);
- parser->Finish();
-
- if (rowCollector.Size() != 1) {
- THROW_ERROR_EXCEPTION("Expected 1 row collected, actual %v",
- rowCollector.Size());
- }
- auto value = rowCollector.GetRowValue(0, "value");
- return TNamedValue::ExtractValue(value);
-}
-
-#define CHECK_BIDIRECTIONAL_CONVERSION(logicalTypeArg, skiffSchemaArg, tableValueArg, hexSkiffArg) \
- do { \
- try { \
- TLogicalTypePtr logicalType = (logicalTypeArg); \
- std::shared_ptr<TSkiffSchema> skiffSchema = (skiffSchemaArg); \
- TNamedValue::TValue tableValue = (tableValueArg); \
- TString hexSkiff = (hexSkiffArg); \
- auto nameTable = New<TNameTable>(); \
- auto actualSkiff = TableToSkiff(logicalType, skiffSchema, tableValue); \
- EXPECT_EQ(HexEncode(actualSkiff), hexSkiff); \
- auto actualValue = SkiffToTable(logicalType, skiffSchema, HexDecode(hexSkiff)); \
- EXPECT_EQ(actualValue, tableValue); \
- } catch (const std::exception& ex) { \
- ADD_FAILURE() << "unexpected exception: " << ex.what(); \
- } \
- } while (0)
-
-////////////////////////////////////////////////////////////////////////////////
-
-void TestAllWireTypes(bool useSchema)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Int64)->SetName("int64"),
- CreateSimpleTypeSchema(EWireType::Uint64)->SetName("uint64"),
- CreateSimpleTypeSchema(EWireType::Double)->SetName("double_1"),
- CreateSimpleTypeSchema(EWireType::Double)->SetName("double_2"),
- CreateSimpleTypeSchema(EWireType::Boolean)->SetName("boolean"),
- CreateSimpleTypeSchema(EWireType::String32)->SetName("string32"),
- CreateSimpleTypeSchema(EWireType::Nothing)->SetName("null"),
-
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Int64),
- })->SetName("opt_int64"),
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Uint64),
- })->SetName("opt_uint64"),
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Double),
- })->SetName("opt_double_1"),
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Double),
- })->SetName("opt_double_2"),
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Boolean),
- })->SetName("opt_boolean"),
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::String32),
- })->SetName("opt_string32"),
- });
- std::vector<TTableSchemaPtr> tableSchemas;
- if (useSchema) {
- tableSchemas.push_back(New<TTableSchema>(std::vector{
- TColumnSchema("int64", EValueType::Int64),
- TColumnSchema("uint64", EValueType::Uint64),
- TColumnSchema("double_1", EValueType::Double),
- TColumnSchema("double_2", ESimpleLogicalValueType::Float),
- TColumnSchema("boolean", EValueType::Boolean),
- TColumnSchema("string32", EValueType::String),
- TColumnSchema("null", EValueType::Null),
- TColumnSchema("opt_int64", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))),
- TColumnSchema("opt_uint64", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Uint64))),
- TColumnSchema("opt_double_1", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Double))),
- TColumnSchema("opt_double_2", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Float))),
- TColumnSchema("opt_boolean", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Boolean))),
- TColumnSchema("opt_string32", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::String))),
- }));
- } else {
- tableSchemas.push_back(New<TTableSchema>());
- }
- auto nameTable = New<TNameTable>();
- TString result;
- {
- TStringOutput resultStream(result);
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, tableSchemas);
-
- writer->Write({
- MakeRow(nameTable, {
- {"int64", -1},
- {"uint64", 2u},
- {"double_1", 3.0},
- {"double_2", 3.0},
- {"boolean", true},
- {"string32", "four"},
- {"null", nullptr},
-
- {"opt_int64", -5},
- {"opt_uint64", 6u},
- {"opt_double_1", 7.0},
- {"opt_double_2", 7.0},
- {"opt_boolean", false},
- {"opt_string32", "eight"},
- {TableIndexColumnName, 0},
- }).Get(),
- });
- writer->Write({
- MakeRow(nameTable, {
- {"int64", -9},
- {"uint64", 10u},
- {"double_1", 11.0},
- {"double_2", 11.0},
- {"boolean", false},
- {"string32", "twelve"},
- {"null", nullptr},
-
- {"opt_int64", nullptr},
- {"opt_uint64", nullptr},
- {"opt_double_1", nullptr},
- {"opt_double_2", nullptr},
- {"opt_boolean", nullptr},
- {"opt_string32", nullptr},
- {TableIndexColumnName, 0},
- }).Get()
- });
-
- writer->Close()
- .Get()
- .ThrowOnError();
- }
-
- TStringInput resultInput(result);
- TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
-
- // row 0
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), -1);
- ASSERT_EQ(checkedSkiffParser.ParseUint64(), 2u);
- // double_1
- ASSERT_EQ(checkedSkiffParser.ParseDouble(), 3.0);
- // double_2
- ASSERT_EQ(checkedSkiffParser.ParseDouble(), 3.0);
- ASSERT_EQ(checkedSkiffParser.ParseBoolean(), true);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "four");
-
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), -5);
-
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseUint64(), 6u);
-
- // double_1
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseDouble(), 7.0);
-
- // double_2
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseDouble(), 7.0);
-
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseBoolean(), false);
-
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "eight");
-
- // row 1
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), -9);
- ASSERT_EQ(checkedSkiffParser.ParseUint64(), 10u);
- // double_1
- ASSERT_EQ(checkedSkiffParser.ParseDouble(), 11.0);
- // double_2
- ASSERT_EQ(checkedSkiffParser.ParseDouble(), 11.0);
- ASSERT_EQ(checkedSkiffParser.ParseBoolean(), false);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "twelve");
-
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
- // double_1
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
- // double_2
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
-
- // end
- ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
- checkedSkiffParser.ValidateFinished();
-}
-
-TEST(TSkiffWriter, TestAllWireTypesNoSchema)
-{
- TestAllWireTypes(false);
-}
-
-TEST(TSkiffWriter, TestAllWireTypesWithSchema)
-{
- TestAllWireTypes(true);
-}
-
-class TSkiffYsonWireTypeP
- : public ::testing::TestWithParam<std::tuple<
- TLogicalTypePtr,
- TNamedValue::TValue,
- TString
- >>
-{
-public:
- static std::vector<ParamType> GetCases()
- {
- using namespace NLogicalTypeShortcuts;
- std::vector<ParamType> result;
-
- for (const auto& example : GetPrimitiveValueExamples()) {
- result.emplace_back(example.LogicalType, example.Value, example.PrettyYson);
- result.emplace_back(nullptr, example.Value, example.PrettyYson);
- }
-
- for (const auto type : TEnumTraits<ESimpleLogicalValueType>::GetDomainValues()) {
- auto logicalType = OptionalLogicalType(SimpleLogicalType(type));
- if (IsV3Composite(logicalType)) {
- // Optional<Null> is not v1 type
- continue;
- }
- result.emplace_back(logicalType, nullptr, "#");
- }
- return result;
- }
-
- static const std::vector<ParamType> Cases;
-};
-
-const std::vector<TSkiffYsonWireTypeP::ParamType> TSkiffYsonWireTypeP::Cases = TSkiffYsonWireTypeP::GetCases();
-
-INSTANTIATE_TEST_SUITE_P(
- Cases,
- TSkiffYsonWireTypeP,
- ::testing::ValuesIn(TSkiffYsonWireTypeP::Cases));
-
-TEST_P(TSkiffYsonWireTypeP, Test)
-{
- const auto& [logicalType, value, expectedYson] = GetParam();
- TTableSchemaPtr tableSchema;
- if (logicalType) {
- tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{
- TColumnSchema("column", logicalType),
- });
- } else {
- tableSchema = New<TTableSchema>();
- }
- auto skiffTableSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Yson32)->SetName("column"),
- });
- auto nameTable = New<TNameTable>();
- TStringStream actualSkiffDataStream;
- auto writer = CreateSkiffWriter(skiffTableSchema, nameTable, &actualSkiffDataStream, {tableSchema});
- writer->Write({
- MakeRow(nameTable, {{"column", value}})
- });
- writer->Close()
- .Get()
- .ThrowOnError();
-
- auto actualSkiffData = actualSkiffDataStream.Str();
- {
- TMemoryInput in(actualSkiffData);
- TCheckedSkiffParser parser(CreateVariant16Schema({skiffTableSchema}), &in);
- EXPECT_EQ(parser.ParseVariant16Tag(), 0);
- auto actualYson = parser.ParseYson32();
- parser.ValidateFinished();
-
- EXPECT_EQ(CanonizeYson(actualYson), CanonizeYson(expectedYson));
- }
-
- TCollectingValueConsumer rowCollector(nameTable);
- auto parser = CreateParserForSkiff(skiffTableSchema, tableSchema, &rowCollector);
- parser->Read(actualSkiffDataStream.Str());
- parser->Finish();
- auto actualValue = rowCollector.GetRowValue(0, "column");
- EXPECT_EQ(actualValue, TNamedValue("column", value).ToUnversionedValue(nameTable));
-}
-
-TEST(TSkiffWriter, TestYsonWireType)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Yson32)->SetName("yson32"),
-
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Yson32),
- })->SetName("opt_yson32"),
- });
- auto nameTable = New<TNameTable>();
- TString result;
- {
- TStringOutput resultStream(result);
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()});
-
- // Row 0 (Null)
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
-
- {"yson32", nullptr},
- {"opt_yson32", nullptr},
- }).Get(),
- });
-
- // Row 1 (Int64)
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
-
- {"yson32", -5},
- {"opt_yson32", -6},
- }).Get(),
- });
-
- // Row 2 (Uint64)
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
-
- {"yson32", 42u},
- {"opt_yson32", 43u},
- }).Get(),
- });
-
- // Row 3 ((Double)
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
-
- {"yson32", 2.7182818},
- {"opt_yson32", 3.1415926},
- }).Get(),
- });
-
- // Row 4 ((Boolean)
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
-
- {"yson32", true},
- {"opt_yson32", false},
- }).Get(),
- });
-
- // Row 5 ((String)
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
-
- {"yson32", "Yin"},
- {"opt_yson32", "Yang"},
- }).Get(),
- });
-
- // Row 6 ((Any)
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
-
- {"yson32", EValueType::Any, "{foo=bar;}"},
- {"opt_yson32", EValueType::Any, "{bar=baz;}"},
- }).Get(),
- });
-
- // Row 7 ((missing optional values)
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
- }).Get(),
- });
-
- writer->Close()
- .Get()
- .ThrowOnError();
- }
-
- TStringInput resultInput(result);
- TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
-
- auto parseYson = [] (TCheckedSkiffParser* parser) {
- auto yson = TString{parser->ParseYson32()};
- return ConvertToNode(TYsonString(yson));
- };
-
- // Row 0 (Null)
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(parseYson(&checkedSkiffParser)->GetType(), ENodeType::Entity);
-
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
-
- // Row 1 (Int64)
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(parseYson(&checkedSkiffParser)->AsInt64()->GetValue(), -5);
-
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(parseYson(&checkedSkiffParser)->AsInt64()->GetValue(), -6);
-
- // Row 2 (Uint64)
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(parseYson(&checkedSkiffParser)->AsUint64()->GetValue(), 42u);
-
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(parseYson(&checkedSkiffParser)->AsUint64()->GetValue(), 43u);
-
- // Row 3 (Double)
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(parseYson(&checkedSkiffParser)->AsDouble()->GetValue(), 2.7182818);
-
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(parseYson(&checkedSkiffParser)->AsDouble()->GetValue(), 3.1415926);
-
- // Row 4 (Boolean)
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(parseYson(&checkedSkiffParser)->AsBoolean()->GetValue(), true);
-
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(parseYson(&checkedSkiffParser)->AsBoolean()->GetValue(), false);
-
- // Row 5 (String)
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(parseYson(&checkedSkiffParser)->AsString()->GetValue(), "Yin");
-
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(parseYson(&checkedSkiffParser)->AsString()->GetValue(), "Yang");
-
- // Row 6 (Any)
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(parseYson(&checkedSkiffParser)->AsMap()->GetChildOrThrow("foo")->AsString()->GetValue(), "bar");
-
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(parseYson(&checkedSkiffParser)->AsMap()->GetChildOrThrow("bar")->AsString()->GetValue(), "baz");
-
- // Row 7 (Null)
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(parseYson(&checkedSkiffParser)->GetType(), ENodeType::Entity);
-
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
-
- // end
- ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
- checkedSkiffParser.ValidateFinished();
-}
-
-class TSkiffFormatSmallIntP
-: public ::testing::TestWithParam<std::tuple<
- std::shared_ptr<TSkiffSchema>,
- TLogicalTypePtr,
- TNamedValue::TValue,
- TString
->>
-{
-public:
- static std::vector<ParamType> GetCases()
- {
- using namespace NLogicalTypeShortcuts;
-
- std::vector<ParamType> result;
-
- auto addSimpleCase = [&result] (
- EWireType wireType,
- const TLogicalTypePtr& logicalType,
- auto value,
- TStringBuf skiffValue)
- {
- auto simpleSkiffSchema = CreateSimpleTypeSchema(wireType);
- auto simpleSkiffData = TString(2, 0) + skiffValue;
- result.emplace_back(simpleSkiffSchema, logicalType, value, simpleSkiffData);
- };
-
- auto addListCase = [&result] (
- EWireType wireType,
- const TLogicalTypePtr& logicalType,
- auto value,
- TStringBuf skiffValue)
- {
- auto listSkiffSchema = CreateRepeatedVariant8Schema({CreateSimpleTypeSchema(wireType)});
- auto listSkiffData = TString(3, 0) + skiffValue + TString(1, '\xff');
- auto listValue = TNamedValue::TValue{
- TNamedValue::TComposite{
- BuildYsonStringFluently()
- .BeginList()
- .Item().Value(value)
- .EndList().ToString()
- }
- };
- result.emplace_back(listSkiffSchema, List(logicalType), listValue, listSkiffData);
- };
-
- auto addSimpleAndListCases = [&] (
- EWireType wireType,
- const TLogicalTypePtr& logicalType,
- auto value,
- TStringBuf skiffValue)
- {
- addSimpleCase(wireType, logicalType, value, skiffValue);
- addListCase(wireType, logicalType, value, skiffValue);
- };
-
- auto addMultiCase = [&] (EWireType wireType, auto value, TStringBuf skiffValue) {
- auto add = [&] (const TLogicalTypePtr& logicalType) {
- addSimpleAndListCases(wireType, logicalType, value, skiffValue);
- };
- addSimpleCase(wireType, Yson(), value, skiffValue);
-
- using T = std::decay_t<decltype(value)>;
- static_assert(std::is_integral_v<T>);
- if constexpr (std::is_signed_v<T>) {
- if (std::numeric_limits<i8>::min() <= value && value <= std::numeric_limits<i8>::max()) {
- add(Int8());
- }
- if (std::numeric_limits<i16>::min() <= value && value <= std::numeric_limits<i16>::max()) {
- add(Int16());
- }
- if (std::numeric_limits<i32>::min() <= value && value <= std::numeric_limits<i32>::max()) {
- add(Int32());
- }
- add(Int64());
- } else {
- if (value <= std::numeric_limits<ui8>::max()) {
- add(Uint8());
- }
- if (value <= std::numeric_limits<ui16>::max()) {
- add(Uint16());
- }
- if (value <= std::numeric_limits<ui32>::max()) {
- add(Uint32());
- }
- add(Uint64());
- }
- };
- addMultiCase(EWireType::Int8, 0, TStringBuf("\x00"sv));
- addMultiCase(EWireType::Int8, 42, TStringBuf("*"));
- addMultiCase(EWireType::Int8, -42, TStringBuf("\xd6"sv));
- addMultiCase(EWireType::Int8, 127, TStringBuf("\x7f"sv));
- addMultiCase(EWireType::Int8, -128, TStringBuf("\x80"sv));
-
- addMultiCase(EWireType::Int16, 0, TStringBuf("\x00\x00"sv));
- addMultiCase(EWireType::Int16, 42, TStringBuf("\x2a\x00"sv));
- addMultiCase(EWireType::Int16, -42, TStringBuf("\xd6\xff"sv));
- addMultiCase(EWireType::Int16, 0x7fff, TStringBuf("\xff\x7f"sv));
- addMultiCase(EWireType::Int16, -0x8000, TStringBuf("\x00\x80"sv));
-
- addMultiCase(EWireType::Int32, 0, TStringBuf("\x00\x00\x00\x00"sv));
- addMultiCase(EWireType::Int32, 42, TStringBuf("\x2a\x00\x00\x00"sv));
- addMultiCase(EWireType::Int32, -42, TStringBuf("\xd6\xff\xff\xff"sv));
- addMultiCase(EWireType::Int32, 0x7fffffff, TStringBuf("\xff\xff\xff\x7f"sv));
- addMultiCase(EWireType::Int32, -0x80000000l, TStringBuf("\x00\x00\x00\x80"sv));
-
- addMultiCase(EWireType::Uint8, 0ull, TStringBuf("\x00"sv));
- addMultiCase(EWireType::Uint8, 42ull, TStringBuf("*"));
- addMultiCase(EWireType::Uint8, 255ull, TStringBuf("\xff"sv));
-
- addMultiCase(EWireType::Uint16, 0ull, TStringBuf("\x00\x00"sv));
- addMultiCase(EWireType::Uint16, 42ull, TStringBuf("\x2a\x00"sv));
- addMultiCase(EWireType::Uint16, 0xFFFFull, TStringBuf("\xff\xff"sv));
-
- addMultiCase(EWireType::Uint32, 0ull, TStringBuf("\x00\x00\x00\x00"sv));
- addMultiCase(EWireType::Uint32, 42ull, TStringBuf("\x2a\x00\x00\x00"sv));
- addMultiCase(EWireType::Uint32, 0xFFFFFFFFull, TStringBuf("\xff\xff\xff\xff"sv));
-
- addSimpleAndListCases(EWireType::Uint16, Date(), 0ull, TStringBuf("\x00\x00"sv));
- addSimpleAndListCases(EWireType::Uint16, Date(), 42ull, TStringBuf("\x2a\x00"sv));
- addSimpleAndListCases(EWireType::Uint16, Date(), DateUpperBound - 1, TStringBuf("\x08\xc2"sv));
-
- addSimpleAndListCases(EWireType::Uint32, Datetime(), 0ull, TStringBuf("\x00\x00\x00\x00"sv));
- addSimpleAndListCases(EWireType::Uint32, Datetime(), 42ull, TStringBuf("\x2a\x00\x00\x00"sv));
- addSimpleAndListCases(EWireType::Uint32, Datetime(), DatetimeUpperBound - 1, TStringBuf("\x7f\xdd\xce\xff"sv));
-
- return result;
- }
-
- static const std::vector<ParamType> Cases;
-};
-
-const std::vector<TSkiffFormatSmallIntP::ParamType> TSkiffFormatSmallIntP::Cases = TSkiffFormatSmallIntP::GetCases();
-
-INSTANTIATE_TEST_SUITE_P(
- Cases,
- TSkiffFormatSmallIntP,
- ::testing::ValuesIn(TSkiffFormatSmallIntP::Cases));
-
-TEST_P(TSkiffFormatSmallIntP, Test)
-{
- const auto& [skiffValueSchema, logicalType, value, expectedSkiffData] = GetParam();
-
- const auto nameTable = New<TNameTable>();
-
- TStringStream actualSkiffData;
- auto skiffTableSchema = CreateTupleSchema({
- skiffValueSchema->SetName("column")
- });
- auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{
- TColumnSchema("column", logicalType),
- });
- auto writer = CreateSkiffWriter(skiffTableSchema, nameTable, &actualSkiffData, {tableSchema});
- writer->Write({
- MakeRow(nameTable, {{"column", value}})
- });
- writer->Close()
- .Get()
- .ThrowOnError();
- EXPECT_EQ(actualSkiffData.Str(), expectedSkiffData);
-
- TCollectingValueConsumer rowCollector(nameTable);
- auto parser = CreateParserForSkiff(skiffTableSchema, tableSchema, &rowCollector);
- parser->Read(expectedSkiffData);
- parser->Finish();
- auto actualValue = rowCollector.GetRowValue(0, "column");
-
- EXPECT_EQ(actualValue, TNamedValue("common", value).ToUnversionedValue(nameTable));
-}
-
-TEST(TSkiffWriter, TestBadSmallIntegers)
-{
- using namespace NLogicalTypeShortcuts;
- auto writeSkiffValue = [] (
- std::shared_ptr<TSkiffSchema>&& typeSchema,
- TLogicalTypePtr logicalType,
- TNamedValue::TValue value)
- {
- TStringStream result;
- auto skiffSchema = CreateTupleSchema({
- typeSchema->SetName("column")
- });
- auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{
- TColumnSchema("column", std::move(logicalType)),
- });
- auto nameTable = New<TNameTable>();
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &result, {tableSchema});
- writer->Write({
- MakeRow(nameTable, {{"column", std::move(value)}})
- });
- writer->Close()
- .Get()
- .ThrowOnError();
- return result.Str();
- };
-
- EXPECT_THROW_WITH_SUBSTRING(
- writeSkiffValue(CreateSimpleTypeSchema(EWireType::Int8), Int64(), 128),
- "is out of range for possible values");
- EXPECT_THROW_WITH_SUBSTRING(
- writeSkiffValue(CreateSimpleTypeSchema(EWireType::Int8), Int64(), -129),
- "is out of range for possible values");
-
- EXPECT_THROW_WITH_SUBSTRING(
- writeSkiffValue(CreateSimpleTypeSchema(EWireType::Int16), Int64(), 0x8000),
- "is out of range for possible values");
- EXPECT_THROW_WITH_SUBSTRING(
- writeSkiffValue(CreateSimpleTypeSchema(EWireType::Int16), Int64(), -0x8001),
- "is out of range for possible values");
-
- EXPECT_THROW_WITH_SUBSTRING(
- writeSkiffValue(CreateSimpleTypeSchema(EWireType::Int32), Int64(), 0x80000000ll),
- "is out of range for possible values");
- EXPECT_THROW_WITH_SUBSTRING(
- writeSkiffValue(CreateSimpleTypeSchema(EWireType::Int32), Int64(), -0x80000001ll),
- "is out of range for possible values");
-
- EXPECT_THROW_WITH_SUBSTRING(
- writeSkiffValue(CreateSimpleTypeSchema(EWireType::Uint8), Uint64(), 256ull),
- "is out of range for possible values");
-
- EXPECT_THROW_WITH_SUBSTRING(
- writeSkiffValue(CreateSimpleTypeSchema(EWireType::Uint16), Uint64(), 0x1FFFFull),
- "is out of range for possible values");
-
- EXPECT_THROW_WITH_SUBSTRING(
- writeSkiffValue(CreateSimpleTypeSchema(EWireType::Uint32), Uint64(), 0x100000000ull),
- "is out of range for possible values");
-}
-
-class TSkiffFormatUuidTestP : public ::testing::TestWithParam<std::tuple<
- TNameTablePtr,
- TTableSchemaPtr,
- std::shared_ptr<TSkiffSchema>,
- std::vector<TUnversionedOwningRow>,
- TString
->>
-{
-public:
- static std::vector<ParamType> GetCases()
- {
- using namespace NLogicalTypeShortcuts;
-
- auto nameTable = New<TNameTable>();
- const auto stringUuidValue = TStringBuf("\xee\x1f\x37\x70" "\xb9\x93\x64\xb5" "\xe4\xdf\xe9\x03" "\x67\x5c\x30\x62");
- const auto uint128UuidValue = TStringBuf("\x62\x30\x5c\x67" "\x03\xe9\xdf\xe4" "\xb5\x64\x93\xb9" "\x70\x37\x1f\xee");
-
- const auto requiredTableSchema = New<TTableSchema>(std::vector<TColumnSchema>{TColumnSchema("uuid", Uuid())});
- const auto optionalTableSchema = New<TTableSchema>(std::vector<TColumnSchema>{TColumnSchema("uuid", Optional(Uuid()))});
-
- const auto optionalUint128SkiffSchema = CreateTupleSchema({
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Uint128),
- })->SetName("uuid"),
- });
-
- const auto requiredUint128SkiffSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Uint128)->SetName("uuid"),
- });
-
- const auto optionalStringSkiffSchema = CreateTupleSchema({
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::String32),
- })->SetName("uuid"),
- });
-
- const auto requiredStringSkiffSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("uuid"),
- });
-
- std::vector<ParamType> result;
-
- result.emplace_back(
- nameTable,
- requiredTableSchema,
- requiredUint128SkiffSchema,
- std::vector<TUnversionedOwningRow>{
- MakeRow(nameTable, {{"uuid", stringUuidValue}}),
- },
- TString(2, '\0') + uint128UuidValue);
-
- result.emplace_back(
- nameTable,
- optionalTableSchema,
- requiredUint128SkiffSchema,
- std::vector<TUnversionedOwningRow>{
- MakeRow(nameTable, {{"uuid", stringUuidValue}}),
- },
- TString(2, '\0') + uint128UuidValue);
-
- result.emplace_back(
- nameTable,
- requiredTableSchema,
- optionalUint128SkiffSchema,
- std::vector<TUnversionedOwningRow>{
- MakeRow(nameTable, {{"uuid", stringUuidValue}}),
- },
- TString(2, '\0') + "\1" + uint128UuidValue);
-
- result.emplace_back(
- nameTable,
- optionalTableSchema,
- optionalUint128SkiffSchema,
- std::vector<TUnversionedOwningRow>{
- MakeRow(nameTable, {{"uuid", stringUuidValue}}),
- },
- TString(2, '\0') + "\1" + uint128UuidValue);
-
- const TString uuidLen = TString(TStringBuf("\x10\x00\x00\x00"sv));
-
- result.emplace_back(
- nameTable,
- requiredTableSchema,
- requiredStringSkiffSchema,
- std::vector<TUnversionedOwningRow>{
- MakeRow(nameTable, {{"uuid", stringUuidValue}}),
- },
- TString(2, '\0') + uuidLen + stringUuidValue);
-
- result.emplace_back(
- nameTable,
- optionalTableSchema,
- requiredStringSkiffSchema,
- std::vector<TUnversionedOwningRow>{
- MakeRow(nameTable, {{"uuid", stringUuidValue}}),
- },
- TString(2, '\0') + uuidLen + stringUuidValue);
-
- result.emplace_back(
- nameTable,
- requiredTableSchema,
- optionalStringSkiffSchema,
- std::vector<TUnversionedOwningRow>{
- MakeRow(nameTable, {{"uuid", stringUuidValue}}),
- },
- TString(2, '\0') + "\1" + uuidLen + stringUuidValue);
-
- result.emplace_back(
- nameTable,
- optionalTableSchema,
- optionalStringSkiffSchema,
- std::vector<TUnversionedOwningRow>{
- MakeRow(nameTable, {{"uuid", stringUuidValue}}),
- },
- TString(2, '\0') + "\1" + uuidLen + stringUuidValue);
-
- return result;
- }
-
- static const std::vector<ParamType> Cases;
-};
-
-const std::vector<TSkiffFormatUuidTestP::ParamType> TSkiffFormatUuidTestP::Cases = TSkiffFormatUuidTestP::GetCases();
-
-INSTANTIATE_TEST_SUITE_P(
- Cases,
- TSkiffFormatUuidTestP,
- ::testing::ValuesIn(TSkiffFormatUuidTestP::Cases));
-
-TEST_P(TSkiffFormatUuidTestP, Test)
-{
- const auto& [nameTable, tableSchema, skiffSchema, rows, skiffString] = GetParam();
-
- TStringStream result;
- std::vector<TUnversionedRow> nonOwningRows;
- for (const auto& row : rows) {
- nonOwningRows.emplace_back(row);
- }
- auto skiffWriter = CreateSkiffWriter(skiffSchema, nameTable, &result, {tableSchema});
- skiffWriter->Write(MakeRange(nonOwningRows));
- skiffWriter->Close().Get().ThrowOnError();
- ASSERT_EQ(result.Str(), skiffString);
-
- TCollectingValueConsumer rowCollector(nameTable);
- auto requiredParser = CreateParserForSkiff(skiffSchema, tableSchema, &rowCollector);
- requiredParser->Read(result.Str());
- requiredParser->Finish();
- ASSERT_EQ(rowCollector.GetRowList(), rows);
-}
-
-TEST(TSkiffFormatUuidTest, TestError)
-{
- using namespace NLogicalTypeShortcuts;
-
- auto nameTable = New<TNameTable>();
- auto tableSchema = New<TTableSchema>(
- std::vector<TColumnSchema>{TColumnSchema("uuid", Optional(Uuid()))});
-
- auto skiffSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Uint128)->SetName("uuid"),
- });
-
- TStringStream result;
- auto skiffWriter = CreateSkiffWriter(skiffSchema, nameTable, &result, {tableSchema});
- skiffWriter->Write({
- MakeRow(nameTable, {{"uuid", nullptr}}),
- });
- EXPECT_THROW_WITH_SUBSTRING(skiffWriter->Close().Get().ThrowOnError(),
- "Unexpected type");
-
-}
-
-class TSkiffWriterSingular
- : public ::testing::Test
- , public ::testing::WithParamInterface<ESimpleLogicalValueType>
-{};
-
-INSTANTIATE_TEST_SUITE_P(
- Singular,
- TSkiffWriterSingular,
- ::testing::Values(ESimpleLogicalValueType::Null, ESimpleLogicalValueType::Void));
-
-TEST_P(TSkiffWriterSingular, TestOptionalSingular)
-{
- const auto singularType = GetParam();
-
- auto skiffSchema = CreateTupleSchema({
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Nothing),
- })->SetName("opt_null"),
- });
-
- auto nameTable = New<TNameTable>();
- const std::vector<TTableSchemaPtr> tableSchemas = {
- New<TTableSchema>(std::vector{
- TColumnSchema("opt_null", OptionalLogicalType(SimpleLogicalType(singularType))),
- }),
- };
-
- TString result;
- {
- TStringOutput resultStream(result);
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, tableSchemas);
- // Row 0
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
- {"opt_null", nullptr},
- }).Get(),
- });
- // Row 1
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
- {"opt_null", EValueType::Composite, "[#]"},
- }).Get(),
- });
- writer->Close()
- .Get()
- .ThrowOnError();
- }
-
- TStringInput resultInput(result);
- TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
-
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
-
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
-
- ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
- checkedSkiffParser.ValidateFinished();
-}
-
-TEST(TSkiffWriter, TestRearrange)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Int64)->SetName("number"),
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::String32),
- })->SetName("eng"),
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::String32),
- })->SetName("rus"),
- });
- auto nameTable = New<TNameTable>();
- TString result;
- {
- TStringOutput resultStream(result);
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()});
-
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
- {"number", 1},
- {"eng", "one"},
- {"rus", nullptr},
- }).Get()
- });
-
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
- {"eng", nullptr},
- {"number", 2},
- {"rus", "dva"},
- }).Get()
- });
-
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
- {"rus", "tri"},
- {"eng", "three"},
- {"number", 3},
- }).Get()
- });
-
- writer->Close()
- .Get()
- .ThrowOnError();
- }
-
- TStringInput resultInput(result);
- TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
-
- // row 0
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "one");
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
-
- // row 1
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 2);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "dva");
-
- // row 2
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 3);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "three");
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "tri");
-
- // end
- ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
- checkedSkiffParser.ValidateFinished();
-}
-
-TEST(TSkiffWriter, TestMissingRequiredField)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Int64)->SetName("number"),
- CreateSimpleTypeSchema(EWireType::String32)->SetName("eng"),
- });
- auto nameTable = New<TNameTable>();
- TString result;
- try {
- TStringOutput resultStream(result);
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()});
-
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
- {"number", 1},
- }).Get()
- });
- writer->Close()
- .Get()
- .ThrowOnError();
- ADD_FAILURE();
- } catch (const std::exception& e) {
- EXPECT_THAT(e.what(), testing::HasSubstr("Unexpected type of \"eng\" column"));
- }
-}
-
-TEST(TSkiffWriter, TestSparse)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateRepeatedVariant16Schema({
- CreateSimpleTypeSchema(EWireType::Int64)->SetName("int64"),
- CreateSimpleTypeSchema(EWireType::Uint64)->SetName("uint64"),
- CreateSimpleTypeSchema(EWireType::String32)->SetName("string32"),
- })->SetName("$sparse_columns"),
- });
-
- auto nameTable = New<TNameTable>();
- TString result;
- TStringOutput resultStream(result);
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()});
-
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
- {"int64", -1},
- {"string32", "minus one"},
- }).Get(),
- });
-
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
- {"string32", "minus five"},
- {"int64", -5},
- }).Get(),
- });
-
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
- {"uint64", 42u},
- }).Get(),
- });
-
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
- {"int64", -8},
- {"uint64", nullptr},
- {"string32", nullptr},
- }).Get(),
- });
-
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
- }).Get(),
- });
-
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TStringInput resultInput(result);
- TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
-
- // row 0
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), -1);
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 2);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "minus one");
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), EndOfSequenceTag<ui16>());
-
- // row 1
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 2);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "minus five");
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), -5);
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), EndOfSequenceTag<ui16>());
-
- // row 2
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseUint64(), 42u);
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), EndOfSequenceTag<ui16>());
-
- // row 3
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), -8);
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), EndOfSequenceTag<ui16>());
-
- // row 4
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), EndOfSequenceTag<ui16>());
-
- // end
- ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
- checkedSkiffParser.ValidateFinished();
-}
-
-TEST(TSkiffWriter, TestMissingFields)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("value"),
- });
-
- try {
- TStringStream resultStream;
- auto nameTable = New<TNameTable>();
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()});
-
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
- {"unknown_column", "four"},
- }).Get(),
- });
- writer->Close()
- .Get()
- .ThrowOnError();
- ADD_FAILURE();
- } catch (const std::exception& e) {
- EXPECT_THAT(e.what(), testing::HasSubstr("Column \"unknown_column\" is not described by Skiff schema"));
- }
-
- try {
- TStringStream resultStream;
- auto nameTable = New<TNameTable>();
- auto unknownColumnId = nameTable->RegisterName("unknown_column");
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, std::vector{New<TTableSchema>()});
-
- ASSERT_TRUE(unknownColumnId < nameTable->GetId("value"));
-
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
- {"unknown_column", "four"},
- }).Get(),
- });
- writer->Close()
- .Get()
- .ThrowOnError();
- ADD_FAILURE();
- } catch (const std::exception& e) {
- EXPECT_THAT(e.what(), testing::HasSubstr("Column \"unknown_column\" is not described by Skiff schema"));
- }
-}
-
-TEST(TSkiffWriter, TestOtherColumns)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Int64)
- })->SetName("int64_column"),
- CreateSimpleTypeSchema(EWireType::Yson32)->SetName("$other_columns"),
- });
-
- TStringStream resultStream;
- auto nameTable = New<TNameTable>();
- nameTable->RegisterName("string_column");
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()});
-
- // Row 0.
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
- {"string_column", "foo"},
- }).Get(),
- });
-
- // Row 1.
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
- {"int64_column", 42},
- }).Get(),
- });
- // Row 2.
- writer->Write({
- MakeRow(nameTable, {
- {TableIndexColumnName, 0},
- {"other_string_column", "bar"},
- }).Get(),
- });
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TStringInput resultInput(resultStream.Str());
- TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
-
- auto parseYson = [] (TCheckedSkiffParser* parser) {
- auto yson = TString{parser->ParseYson32()};
- return ConvertToYsonTextStringStable(ConvertToNode(TYsonString(yson)));
- };
-
- // row 0
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
- ASSERT_EQ(parseYson(&checkedSkiffParser), "{\"string_column\"=\"foo\";}");
-
- // row 1
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 42);
- ASSERT_EQ(parseYson(&checkedSkiffParser), "{}");
-
- // row 2
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
- ASSERT_EQ(parseYson(&checkedSkiffParser), "{\"other_string_column\"=\"bar\";}");
-
- // end
- ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
- checkedSkiffParser.ValidateFinished();
-}
-
-TEST(TSkiffWriter, TestKeySwitch)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("value"),
- CreateSimpleTypeSchema(EWireType::Boolean)->SetName("$key_switch"),
- });
-
- TStringStream resultStream;
- auto nameTable = New<TNameTable>();
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()}, 1);
-
- writer->Write({
- // Row 0.
- MakeRow(nameTable, {
- {"value", "one"},
- {TableIndexColumnName, 0},
- }).Get(),
- });
- // Row 1.
- writer->Write({
- MakeRow(nameTable, {
- {"value", "one"},
- {TableIndexColumnName, 0},
- }).Get(),
- });
- // Row 2.
- writer->Write({
- MakeRow(nameTable, {
- {"value", "two"},
- {TableIndexColumnName, 0},
- }).Get(),
- });
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TStringInput resultInput(resultStream.Str());
- TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
-
- TString buf;
-
- // row 0
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "one");
- ASSERT_EQ(checkedSkiffParser.ParseBoolean(), false);
-
- // row 1
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "one");
- ASSERT_EQ(checkedSkiffParser.ParseBoolean(), false);
-
- // row 2
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "two");
- ASSERT_EQ(checkedSkiffParser.ParseBoolean(), true);
-
- // end
- ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
- checkedSkiffParser.ValidateFinished();
-}
-
-TEST(TSkiffWriter, TestEndOfStream)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("value"),
- });
-
- TStringStream resultStream;
- auto nameTable = New<TNameTable>();
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()}, 1, true);
-
- // Row 0.
- writer->Write({
- MakeRow(nameTable, {
- {"value", "zero"},
- {TableIndexColumnName, 0},
- }).Get(),
- });
- // Row 1.
- writer->Write({
- MakeRow(nameTable, {
- {"value", "one"},
- {TableIndexColumnName, 0},
- }).Get(),
- });
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TStringInput resultInput(resultStream.Str());
- TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
-
- TString buf;
-
- // Row 0.
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "zero");
-
- // Row 1.
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "one");
-
- // End of stream.
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0xffff);
-
- // The End.
- ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
- checkedSkiffParser.ValidateFinished();
-}
-
-TEST(TSkiffWriter, TestRowRangeIndex)
-{
- const auto rowAndRangeIndex = CreateTupleSchema({
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Int64),
- })->SetName("$range_index"),
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Int64),
- })->SetName("$row_index"),
- });
-
- struct TRow {
- int TableIndex;
- std::optional<int> RangeIndex;
- std::optional<int> RowIndex;
- };
- auto generateUnversionedRow = [] (const TRow& row, const TNameTablePtr& nameTable) {
- std::vector<TNamedValue> values = {
- {TableIndexColumnName, row.TableIndex},
- };
- if (row.RangeIndex) {
- values.emplace_back(RangeIndexColumnName, *row.RangeIndex);
- }
- if (row.RowIndex) {
- values.push_back({RowIndexColumnName, *row.RowIndex});
- }
- return MakeRow(nameTable, values);
- };
-
- auto skiffWrite = [generateUnversionedRow] (const std::vector<TRow>& rows, const std::shared_ptr<TSkiffSchema>& skiffSchema) {
- std::vector<TTableSchemaPtr> tableSchemas;
- {
- THashSet<int> tableIndices;
- for (const auto& row : rows) {
- tableIndices.insert(row.TableIndex);
- }
- tableSchemas.assign(tableIndices.size(), New<TTableSchema>());
- }
-
-
- TStringStream resultStream;
- auto nameTable = New<TNameTable>();
- auto writer = CreateSkiffWriter(
- skiffSchema,
- nameTable,
- &resultStream,
- tableSchemas);
-
- for (const auto& row : rows) {
- writer->Write({generateUnversionedRow(row, nameTable)});
- }
- writer->Close()
- .Get()
- .ThrowOnError();
-
- return HexEncode(resultStream.Str());
- };
-
- EXPECT_STREQ(
- skiffWrite({
- {0, 0, 0},
- {0, 0, 1},
- {0, 0, 2},
- }, rowAndRangeIndex).data(),
-
- "0000" "01""00000000""00000000" "01""00000000""00000000"
- "0000" "00" "00"
- "0000" "00" "00"
- );
-
- EXPECT_STREQ(
- skiffWrite({
- {0, 0, 0},
- {0, 0, 1},
- {0, 0, 3},
- }, rowAndRangeIndex).data(),
-
- "0000" "01""00000000""00000000" "01""00000000""00000000"
- "0000" "00" "00"
- "0000" "00" "01""03000000""00000000"
- );
-
- EXPECT_STREQ(
- skiffWrite({
- {0, 0, 0},
- {0, 0, 1},
- {0, 1, 2},
- {0, 1, 3},
- }, rowAndRangeIndex).data(),
-
- "0000" "01""00000000""00000000" "01""00000000""00000000"
- "0000" "00" "00"
- "0000" "01""01000000""00000000" "01""02000000""00000000"
- "0000" "00" "00"
- );
-
- EXPECT_THROW_WITH_SUBSTRING(skiffWrite({{0, 0, {}}}, rowAndRangeIndex), "index requested but reader did not return it");
- EXPECT_THROW_WITH_SUBSTRING(skiffWrite({{0, {}, 0}}, rowAndRangeIndex), "index requested but reader did not return it");
-
- const auto rowAndRangeIndexAllowMissing = CreateTupleSchema({
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Int64),
- CreateSimpleTypeSchema(EWireType::Nothing),
- })->SetName("$range_index"),
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Int64),
- CreateSimpleTypeSchema(EWireType::Nothing),
- })->SetName("$row_index"),
- });
-
- EXPECT_STREQ(
- skiffWrite({
- {0, 0, 0},
- {0, 0, 1},
- {0, 0, 2},
- }, rowAndRangeIndexAllowMissing).data(),
-
- "0000" "01""00000000""00000000" "01""00000000""00000000"
- "0000" "00" "00"
- "0000" "00" "00"
- );
-
- EXPECT_STREQ(
- skiffWrite({
- {0, 0, 0},
- {0, 0, 1},
- {0, 0, 3},
- }, rowAndRangeIndexAllowMissing).data(),
-
- "0000" "01""00000000""00000000" "01""00000000""00000000"
- "0000" "00" "00"
- "0000" "00" "01""03000000""00000000"
- );
-
- EXPECT_STREQ(
- skiffWrite({
- {0, 0, 0},
- {0, 0, 1},
- {0, 1, 2},
- {0, 1, 3},
- }, rowAndRangeIndexAllowMissing).data(),
-
- "0000" "01""00000000""00000000" "01""00000000""00000000"
- "0000" "00" "00"
- "0000" "01""01000000""00000000" "01""02000000""00000000"
- "0000" "00" "00"
- );
-
- EXPECT_STREQ(
- skiffWrite({
- {0, {}, {}},
- {0, {}, {}},
- {0, {}, {}},
- {0, {}, {}},
- }, rowAndRangeIndexAllowMissing).data(),
-
- "0000" "02" "02"
- "0000" "02" "02"
- "0000" "02" "02"
- "0000" "02" "02"
- );
-
- EXPECT_STREQ(
- skiffWrite({
- {0, {}, 0},
- {0, {}, 1},
- {0, {}, 3},
- {0, {}, 4},
- }, rowAndRangeIndexAllowMissing).data(),
-
- "0000" "02" "01""00000000""00000000"
- "0000" "02" "00"
- "0000" "02" "01""03000000""00000000"
- "0000" "02" "00"
- );
-
- EXPECT_STREQ(
- skiffWrite({
- {0, 0, {}},
- {0, 0, {}},
- {0, 1, {}},
- {0, 1, {}},
- }, rowAndRangeIndexAllowMissing).data(),
-
- "0000" "01""00000000""00000000" "02"
- "0000" "00" "02"
- "0000" "01""01000000""00000000" "02"
- "0000" "00" "02"
- );
-}
-
-TEST(TSkiffWriter, TestRowIndexOnlyOrRangeIndexOnly)
-{
- TString columnNameList[] = {
- RowIndexColumnName,
- RangeIndexColumnName,
- };
-
- for (const auto& columnName : columnNameList) {
- auto skiffSchema = CreateTupleSchema({
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Int64),
- })->SetName(columnName),
- });
-
- TStringStream resultStream;
- auto nameTable = New<TNameTable>();
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()}, 1);
-
- // Row 0.
- writer->Write({
- MakeRow(nameTable, {
- {columnName, 0},
- }).Get(),
- });
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TStringInput resultInput(resultStream.Str());
- TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
-
- // row 0
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 0);
-
- ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
- checkedSkiffParser.ValidateFinished();
- }
-}
-
-TEST(TSkiffWriter, TestComplexType)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("name"),
- CreateRepeatedVariant8Schema({
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Int64)->SetName("x"),
- CreateSimpleTypeSchema(EWireType::Int64)->SetName("y"),
- })
- })->SetName("points")
- })->SetName("value"),
- });
-
- {
- TStringStream resultStream;
- auto nameTable = New<TNameTable>();
- auto tableSchema = New<TTableSchema>(std::vector{
- TColumnSchema("value", StructLogicalType({
- {"name", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {
- "points",
- ListLogicalType(
- StructLogicalType({
- {"x", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
- {"y", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
- })
- )
- }
- })),
- });
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, std::vector{tableSchema});
-
- // Row 0.
- writer->Write({
- MakeRow(nameTable, {
- {"value", EValueType::Composite, "[foo;[[0; 1];[2;3]]]"},
- {TableIndexColumnName, 0},
- }).Get(),
- });
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TStringInput resultInput(resultStream.Str());
- TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
-
- // row 0
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "foo");
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 2);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 3);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), EndOfSequenceTag<ui8>());
-
- ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
- checkedSkiffParser.ValidateFinished();
- }
-}
-
-TEST(TSkiffWriter, TestEmptyComplexType)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("name"),
- CreateSimpleTypeSchema(EWireType::String32)->SetName("value"),
- })
- })->SetName("value"),
- });
-
- {
- TStringStream resultStream;
- auto nameTable = New<TNameTable>();
- auto tableSchema = New<TTableSchema>(std::vector{
- TColumnSchema("value", OptionalLogicalType(
- StructLogicalType({
- {"name", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"value", SimpleLogicalType(ESimpleLogicalValueType::String)},
- }))
- ),
- });
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, std::vector{tableSchema});
-
- // Row 0.
- writer->Write({
- MakeRow(nameTable, {
- {"value", nullptr},
- {TableIndexColumnName, 0},
- }).Get(),
- });
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TStringInput resultInput(resultStream.Str());
- TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
-
- // row 0
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 0);
-
- ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
- checkedSkiffParser.ValidateFinished();
- }
-}
-
-TEST(TSkiffWriter, TestSparseComplexType)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateRepeatedVariant16Schema({
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("name"),
- CreateSimpleTypeSchema(EWireType::String32)->SetName("value"),
- })->SetName("value"),
- })->SetName("$sparse_columns"),
- });
-
- {
- TStringStream resultStream;
- auto nameTable = New<TNameTable>();
- auto tableSchema = New<TTableSchema>(std::vector{
- TColumnSchema("value", OptionalLogicalType(
- StructLogicalType({
- {"name", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"value", SimpleLogicalType(ESimpleLogicalValueType::String)},
- }))
- ),
- });
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, std::vector{tableSchema});
-
- // Row 0.
- writer->Write({
- MakeRow(nameTable, {
- {"value", EValueType::Composite, "[foo;bar;]"},
- {TableIndexColumnName, 0},
- }).Get(),
- });
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TStringInput resultInput(resultStream.Str());
- TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
-
- // row 0
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "foo");
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "bar");
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), EndOfSequenceTag<ui16>());
-
- ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
- checkedSkiffParser.ValidateFinished();
- }
-}
-
-TEST(TSkiffWriter, TestSparseComplexTypeWithExtraOptional)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateRepeatedVariant16Schema({
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("name"),
- CreateSimpleTypeSchema(EWireType::String32)->SetName("value"),
- })
- })->SetName("value"),
- })->SetName("$sparse_columns"),
- });
-
- TStringStream resultStream;
- auto nameTable = New<TNameTable>();
- auto tableSchema = New<TTableSchema>(std::vector{
- TColumnSchema("value", OptionalLogicalType(
- StructLogicalType({
- {"name", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"value", SimpleLogicalType(ESimpleLogicalValueType::String)},
- }))
- ),
- });
-
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, std::vector{tableSchema});
-
- // Row 0.
- writer->Write({
- MakeRow(nameTable, {
- {"value", EValueType::Composite, "[foo;bar;]"},
- {TableIndexColumnName, 0},
- }).Get(),
- });
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TStringInput resultInput(resultStream.Str());
- TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
-
- // row 0
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "foo");
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "bar");
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), EndOfSequenceTag<ui16>());
-
- ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
- checkedSkiffParser.ValidateFinished();
-}
-
-TEST(TSkiffWriter, TestBadWireTypeForSimpleColumn)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateVariant8Schema({
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Yson32),
- })
- })->SetName("opt_yson32"),
- });
- auto nameTable = New<TNameTable>();
- TStringStream resultStream;
- EXPECT_THROW_WITH_SUBSTRING(
- CreateSkiffWriter(skiffSchema, nameTable, &resultStream, std::vector{New<TTableSchema>()}),
- "cannot be represented with Skiff schema"
- );
-}
-
-TEST(TSkiffWriter, TestMissingComplexColumn)
-{
- auto optionalSkiffSchema = CreateTupleSchema({
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateRepeatedVariant8Schema({CreateSimpleTypeSchema(EWireType::Int64)}),
- })->SetName("opt_list"),
- });
- auto requiredSkiffSchema = CreateTupleSchema({
- CreateRepeatedVariant8Schema({CreateSimpleTypeSchema(EWireType::Int64)})->SetName("opt_list"),
- });
-
- { // Non optional Skiff schema
- auto nameTable = New<TNameTable>();
- EXPECT_THROW_WITH_SUBSTRING(
- CreateSkiffWriter(requiredSkiffSchema, nameTable, &Cnull, std::vector{New<TTableSchema>()}),
- "cannot be represented with Skiff schema"
- );
- }
-
- {
- auto nameTable = New<TNameTable>();
- TStringStream resultStream;
- auto writer = CreateSkiffWriter(optionalSkiffSchema, nameTable, &resultStream, std::vector{New<TTableSchema>()});
- writer->Write({
- MakeRow(nameTable, { }).Get(),
- MakeRow(nameTable, {
- {"opt_list", nullptr},
- }).Get(),
- MakeRow(nameTable, { }).Get(),
- });
- writer->Close()
- .Get()
- .ThrowOnError();
-
- EXPECT_EQ(HexEncode(resultStream.Str()), "0000" "00" "0000" "00" "0000" "00");
- }
-}
-
-TEST(TSkiffWriter, TestSkippedFields)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Int64)->SetName("number"),
- CreateSimpleTypeSchema(EWireType::Nothing)->SetName("string"),
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Int64),
- })->SetName(RangeIndexColumnName),
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Int64),
- })->SetName(RowIndexColumnName),
- CreateSimpleTypeSchema(EWireType::Double)->SetName("double"),
- });
- auto tableSchema = New<TTableSchema>(std::vector{
- TColumnSchema("number", EValueType::Int64),
- TColumnSchema("string", EValueType::String),
- TColumnSchema("double", EValueType::Double),
- });
-
- auto nameTable = New<TNameTable>();
- TString result;
- {
- TStringOutput resultStream(result);
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {tableSchema});
-
- writer->Write({
- MakeRow(nameTable, {
- {"number", 1},
- {"string", "hello"},
- {RangeIndexColumnName, 0},
- {RowIndexColumnName, 0},
- {"double", 1.5},
- }).Get()
- });
- writer->Write({
- MakeRow(nameTable, {
- {"number", 1},
- {RangeIndexColumnName, 5},
- {RowIndexColumnName, 1},
- {"double", 2.5},
- }).Get()
- });
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TStringInput resultInput(result);
- TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
-
- // row 0
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseDouble(), 1.5);
- // row 1
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 5);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseDouble(), 2.5);
- ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
- checkedSkiffParser.ValidateFinished();
- }
-
-}
-
-TEST(TSkiffWriter, TestSkippedFieldsOutOfRange)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Nothing)->SetName("string"),
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Int64),
- })->SetName(RangeIndexColumnName),
- });
- auto tableSchema = New<TTableSchema>(std::vector{
- TColumnSchema("string", EValueType::String),
- });
-
- auto nameTable = New<TNameTable>();
- TString result;
- {
- TStringOutput resultStream(result);
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {tableSchema});
-
- writer->Write({
- MakeRow(nameTable, {
- {"string", "hello"},
- {RangeIndexColumnName, 0},
- }).Get()
- });
- writer->Write({
- MakeRow(nameTable, {
- {RangeIndexColumnName, 5},
- }).Get()
- });
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TStringInput resultInput(result);
- TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
-
- // row 0
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 0);
- // row 1
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseVariant8Tag(), 1);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 5);
- ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
- checkedSkiffParser.ValidateFinished();
- }
-
-}
-
-TEST(TSkiffWriter, TestSkippedFieldsAndKeySwitch)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("value"),
- CreateSimpleTypeSchema(EWireType::Nothing)->SetName("skipped"),
- CreateSimpleTypeSchema(EWireType::Boolean)->SetName("$key_switch"),
- CreateSimpleTypeSchema(EWireType::Int64)->SetName("value1"),
- });
- TStringStream resultStream;
- auto nameTable = New<TNameTable>();
- auto writer = CreateSkiffWriter(skiffSchema, nameTable, &resultStream, {New<TTableSchema>()}, 1);
-
- writer->Write({
- // Row 0.
- MakeRow(nameTable, {
- {"value", "one"},
- {"value1", 0},
- {TableIndexColumnName, 0},
- }).Get(),
- });
- // Row 1.
- writer->Write({
- MakeRow(nameTable, {
- {"value", "one"},
- {"value1", 1},
- {TableIndexColumnName, 0},
- }).Get(),
- });
- // Row 2.
- writer->Write({
- MakeRow(nameTable, {
- {"value", "two"},
- {"value1", 2},
- {TableIndexColumnName, 0},
- }).Get(),
- });
- writer->Close()
- .Get()
- .ThrowOnError();
-
- TStringInput resultInput(resultStream.Str());
- TCheckedSkiffParser checkedSkiffParser(CreateVariant16Schema({skiffSchema}), &resultInput);
-
- TString buf;
-
- // row 0
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "one");
- ASSERT_EQ(checkedSkiffParser.ParseBoolean(), false);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 0);
-
- // row 1
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "one");
- ASSERT_EQ(checkedSkiffParser.ParseBoolean(), false);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 1);
-
- // row 2
- ASSERT_EQ(checkedSkiffParser.ParseVariant16Tag(), 0);
- ASSERT_EQ(checkedSkiffParser.ParseString32(), "two");
- ASSERT_EQ(checkedSkiffParser.ParseBoolean(), true);
- ASSERT_EQ(checkedSkiffParser.ParseInt64(), 2);
-
- // end
- ASSERT_EQ(checkedSkiffParser.HasMoreData(), false);
- checkedSkiffParser.ValidateFinished();
-
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(TSkiffParser, Simple)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Int64)->SetName("int64"),
- CreateSimpleTypeSchema(EWireType::Uint64)->SetName("uint64"),
- CreateSimpleTypeSchema(EWireType::Double)->SetName("double"),
- CreateSimpleTypeSchema(EWireType::Boolean)->SetName("boolean"),
- CreateSimpleTypeSchema(EWireType::String32)->SetName("string32"),
- CreateSimpleTypeSchema(EWireType::Nothing)->SetName("null"),
-
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Int64),
- })->SetName("opt_int64"),
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Uint64),
- })->SetName("opt_uint64"),
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Double),
- })->SetName("opt_double"),
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Boolean),
- })->SetName("opt_boolean"),
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::String32),
- })->SetName("opt_string32"),
- });
-
- TCollectingValueConsumer collectedRows;
- auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
-
- TStringStream dataStream;
- TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
-
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteInt64(-1);
- checkedSkiffWriter.WriteUint64(2);
- checkedSkiffWriter.WriteDouble(3.0);
- checkedSkiffWriter.WriteBoolean(true);
- checkedSkiffWriter.WriteString32("foo");
-
- checkedSkiffWriter.WriteVariant8Tag(0);
- checkedSkiffWriter.WriteVariant8Tag(0);
- checkedSkiffWriter.WriteVariant8Tag(0);
- checkedSkiffWriter.WriteVariant8Tag(0);
- checkedSkiffWriter.WriteVariant8Tag(0);
-
- checkedSkiffWriter.Finish();
-
- parser->Read(dataStream.Str());
- parser->Finish();
-
- ASSERT_EQ(static_cast<int>(collectedRows.Size()), 1);
-
- ASSERT_EQ(GetInt64(collectedRows.GetRowValue(0, "int64")), -1);
- ASSERT_EQ(GetUint64(collectedRows.GetRowValue(0, "uint64")), 2u);
- ASSERT_EQ(GetDouble(collectedRows.GetRowValue(0, "double")), 3.0);
- ASSERT_EQ(GetBoolean(collectedRows.GetRowValue(0, "boolean")), true);
- ASSERT_EQ(GetString(collectedRows.GetRowValue(0, "string32")), "foo");
- ASSERT_EQ(IsNull(collectedRows.GetRowValue(0, "null")), true);
-
- ASSERT_EQ(IsNull(collectedRows.GetRowValue(0, "opt_int64")), true);
- ASSERT_EQ(IsNull(collectedRows.GetRowValue(0, "opt_uint64")), true);
- ASSERT_EQ(IsNull(collectedRows.GetRowValue(0, "opt_double")), true);
- ASSERT_EQ(IsNull(collectedRows.GetRowValue(0, "opt_boolean")), true);
- ASSERT_EQ(IsNull(collectedRows.GetRowValue(0, "opt_string32")), true);
-}
-
-TEST(TSkiffParser, TestOptionalNull)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Nothing),
- })->SetName("opt_null"),
- });
- auto nameTable = New<TNameTable>();
-
- {
- TCollectingValueConsumer collectedRows;
- EXPECT_THROW_WITH_SUBSTRING(
- CreateParserForSkiff(skiffSchema, &collectedRows),
- "cannot be represented with Skiff schema");
- }
-
- auto tableSchema = New<TTableSchema>(std::vector{
- TColumnSchema("opt_null", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Null))),
- });
-
- TCollectingValueConsumer collectedRows(tableSchema);
- auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
-
- TStringStream dataStream;
- TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
-
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteVariant8Tag(0);
-
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteVariant8Tag(1);
-
- checkedSkiffWriter.Finish();
-
- parser->Read(dataStream.Str());
- parser->Finish();
-
- ASSERT_EQ(static_cast<int>(collectedRows.Size()), 2);
-
- ASSERT_EQ(collectedRows.GetRowValue(0, "opt_null").Type, EValueType::Null);
-}
-
-TEST(TSkiffParser, TestSparse)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateRepeatedVariant16Schema({
- CreateSimpleTypeSchema(EWireType::Int64)->SetName("int64"),
- CreateSimpleTypeSchema(EWireType::Uint64)->SetName("uint64"),
- CreateSimpleTypeSchema(EWireType::String32)->SetName("string32"),
- })->SetName("$sparse_columns"),
- });
-
- TCollectingValueConsumer collectedRows;
- auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
-
- TStringStream dataStream;
- TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
-
- // row 1
- checkedSkiffWriter.WriteVariant16Tag(0);
- // sparse fields begin
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteInt64(-42);
- checkedSkiffWriter.WriteVariant16Tag(1);
- checkedSkiffWriter.WriteUint64(54);
- checkedSkiffWriter.WriteVariant16Tag(EndOfSequenceTag<ui16>());
-
- // row 2
- checkedSkiffWriter.WriteVariant16Tag(0);
- // sparse fields begin
- checkedSkiffWriter.WriteVariant16Tag(2);
- checkedSkiffWriter.WriteString32("foo");
- checkedSkiffWriter.WriteVariant16Tag(EndOfSequenceTag<ui16>());
-
- checkedSkiffWriter.Finish();
-
- parser->Read(dataStream.Str());
- parser->Finish();
-
- ASSERT_EQ(static_cast<int>(collectedRows.Size()), 2);
-
- ASSERT_EQ(GetInt64(collectedRows.GetRowValue(0, "int64")), -42);
- ASSERT_EQ(GetUint64(collectedRows.GetRowValue(0, "uint64")), 54u);
- ASSERT_FALSE(collectedRows.FindRowValue(0, "string32"));
-
- ASSERT_FALSE(collectedRows.FindRowValue(1, "int64"));
- ASSERT_FALSE(collectedRows.FindRowValue(1, "uint64"));
- ASSERT_EQ(GetString(collectedRows.GetRowValue(1, "string32")), "foo");
-}
-
-TEST(TSkiffParser, TestYsonWireType)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Yson32)->SetName("yson"),
- });
-
- TCollectingValueConsumer collectedRows;
- auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
-
- TStringStream dataStream;
- TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
-
- // Row 0.
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteYson32("-42");
-
- // Row 1.
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteYson32("42u");
-
- // Row 2.
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteYson32("\"foobar\"");
-
- // Row 3.
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteYson32("%true");
-
- // Row 4.
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteYson32("{foo=bar}");
-
- // Row 5.
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteYson32("#");
-
- checkedSkiffWriter.Finish();
-
- parser->Read(dataStream.Str());
- parser->Finish();
-
- ASSERT_EQ(static_cast<int>(collectedRows.Size()), 6);
- ASSERT_EQ(GetInt64(collectedRows.GetRowValue(0, "yson")), -42);
- ASSERT_EQ(GetUint64(collectedRows.GetRowValue(1, "yson")), 42u);
- ASSERT_EQ(GetString(collectedRows.GetRowValue(2, "yson")), "foobar");
- ASSERT_EQ(GetBoolean(collectedRows.GetRowValue(3, "yson")), true);
- ASSERT_EQ(GetAny(collectedRows.GetRowValue(4, "yson"))->AsMap()->GetChildOrThrow("foo")->AsString()->GetValue(), "bar");
- ASSERT_EQ(IsNull(collectedRows.GetRowValue(5, "yson")), true);
-}
-
-TEST(TSkiffParser, TestBadYsonWireType)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Yson32)->SetName("yson"),
- });
-
- auto parseYsonUsingSkiff = [&] (TStringBuf ysonValue) {
- TCollectingValueConsumer collectedRows;
- auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
- TStringStream dataStream;
- ASSERT_NO_THROW({
- TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
-
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteYson32(ysonValue);
-
- checkedSkiffWriter.Finish();
- });
-
- parser->Read(dataStream.Str());
- parser->Finish();
- };
-
- try {
- parseYsonUsingSkiff("[42");
- } catch (const std::exception& e) {
- EXPECT_THAT(e.what(), testing::HasSubstr("Premature end of stream"));
- }
-
- try {
- parseYsonUsingSkiff("<foo=bar>42");
- } catch (const std::exception& e) {
- EXPECT_THAT(e.what(), testing::HasSubstr("Table values cannot have top-level attributes"));
- }
-}
-
-TEST(TSkiffParser, TestSpecialColumns)
-{
- std::shared_ptr<TSkiffSchema> skiffSchemaList[] = {
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Yson32)->SetName("yson"),
- CreateSimpleTypeSchema(EWireType::Boolean)->SetName("$key_switch"),
- }),
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Yson32)->SetName("yson"),
- CreateSimpleTypeSchema(EWireType::Boolean)->SetName("$row_switch"),
- }),
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Yson32)->SetName("yson"),
- CreateSimpleTypeSchema(EWireType::Boolean)->SetName("$range_switch"),
- }),
- };
-
- for (const auto& skiffSchema : skiffSchemaList) {
- try {
- TCollectingValueConsumer collectedRows;
- auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
- } catch (std::exception& e) {
- EXPECT_THAT(e.what(), testing::HasSubstr("Skiff parser does not support \"$key_switch\""));
- }
- }
-}
-
-TEST(TSkiffParser, TestOtherColumns)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("name"),
- CreateSimpleTypeSchema(EWireType::Yson32)->SetName("$other_columns"),
- });
-
- TCollectingValueConsumer collectedRows;
- auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
-
- TStringStream dataStream;
- TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
-
- // Row 0.
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteString32("row_0");
- checkedSkiffWriter.WriteYson32("{foo=-42;}");
-
- // Row 1.
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteString32("row_1");
- checkedSkiffWriter.WriteYson32("{bar=qux;baz={boolean=%false;};}");
-
- // Row 2.
- checkedSkiffWriter.Finish();
-
- parser->Read(dataStream.Str());
- parser->Finish();
-
- ASSERT_EQ(static_cast<int>(collectedRows.Size()), 2);
- ASSERT_EQ(GetString(collectedRows.GetRowValue(0, "name")), "row_0");
- ASSERT_EQ(GetInt64(collectedRows.GetRowValue(0, "foo")), -42);
-
- ASSERT_EQ(GetString(collectedRows.GetRowValue(1, "name")), "row_1");
- ASSERT_EQ(GetString(collectedRows.GetRowValue(1, "bar")), "qux");
- ASSERT_EQ(ConvertToYsonTextStringStable(GetAny(collectedRows.GetRowValue(1, "baz"))), "{\"boolean\"=%false;}");
-}
-
-TEST(TSkiffParser, TestComplexColumn)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("key"),
- CreateSimpleTypeSchema(EWireType::Int64)->SetName("value"),
- })->SetName("column")
- });
-
- TCollectingValueConsumer collectedRows(
- New<TTableSchema>(std::vector{
- TColumnSchema("column", NTableClient::StructLogicalType({
- {"key", NTableClient::SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"value", NTableClient::SimpleLogicalType(ESimpleLogicalValueType::Int64)}
- }))
- }));
- auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
-
- TStringStream dataStream;
- TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
-
- // Row 0.
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteString32("row_0");
- checkedSkiffWriter.WriteInt64(42);
-
- checkedSkiffWriter.Finish();
-
- parser->Read(dataStream.Str());
- parser->Finish();
-
- ASSERT_EQ(static_cast<int>(collectedRows.Size()), 1);
- ASSERT_EQ(ConvertToYsonTextStringStable(GetComposite(collectedRows.GetRowValue(0, "column"))), "[\"row_0\";42;]");
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(TSkiffParser, TestEmptyInput)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("column"),
- });
-
- TCollectingValueConsumer collectedRows;
-
- {
- auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
- parser->Finish();
- ASSERT_EQ(static_cast<int>(collectedRows.Size()), 0);
- }
- {
- auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
- parser->Read("");
- parser->Finish();
- ASSERT_EQ(static_cast<int>(collectedRows.Size()), 0);
- }
- {
- auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
- parser->Read("");
- parser->Read("");
- parser->Finish();
- ASSERT_EQ(static_cast<int>(collectedRows.Size()), 0);
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(TSkiffParser, ColumnIds)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Int64)->SetName("field_a"),
- CreateSimpleTypeSchema(EWireType::Uint64)->SetName("field_b")
- });
-
- TCollectingValueConsumer collectedRows;
- collectedRows.GetNameTable()->GetIdOrRegisterName("field_b");
- auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
-
- TStringStream dataStream;
- TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
-
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteInt64(-1);
- checkedSkiffWriter.WriteUint64(2);
-
- checkedSkiffWriter.Finish();
-
- parser->Read(dataStream.Str());
- parser->Finish();
-
- ASSERT_EQ(static_cast<int>(collectedRows.Size()), 1);
-
- ASSERT_EQ(GetInt64(collectedRows.GetRowValue(0, "field_a")), -1);
- ASSERT_EQ(GetUint64(collectedRows.GetRowValue(0, "field_b")), 2u);
-}
-
-TEST(TSkiffParser, TestSparseComplexType)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateRepeatedVariant16Schema({
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("name"),
- CreateSimpleTypeSchema(EWireType::Int64)->SetName("value"),
- })->SetName("value"),
- })->SetName("$sparse_columns"),
- });
-
- TCollectingValueConsumer collectedRows(
- New<TTableSchema>(std::vector{
- TColumnSchema("value", OptionalLogicalType(
- StructLogicalType({
- {"name", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"value", SimpleLogicalType(ESimpleLogicalValueType::Int64)}
- })
- ))
- }));
- auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
-
- TStringStream dataStream;
- TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
-
- // Row 0.
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteString32("row_0");
- checkedSkiffWriter.WriteInt64(10);
- checkedSkiffWriter.WriteVariant16Tag(EndOfSequenceTag<ui16>());
-
- // Row 1.
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteVariant16Tag(EndOfSequenceTag<ui16>());
-
- checkedSkiffWriter.Finish();
-
- parser->Read(dataStream.Str());
- parser->Finish();
-
- ASSERT_EQ(static_cast<int>(collectedRows.Size()), 2);
- EXPECT_EQ(ConvertToYsonTextStringStable(GetComposite(collectedRows.GetRowValue(0, "value"))), "[\"row_0\";10;]");
- EXPECT_FALSE(collectedRows.FindRowValue(1, "value"));
-}
-
-TEST(TSkiffParser, TestSparseComplexTypeWithExtraOptional)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateRepeatedVariant16Schema({
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("key"),
- CreateSimpleTypeSchema(EWireType::Int64)->SetName("value"),
- })
- })->SetName("column"),
- })->SetName("$sparse_columns"),
- });
-
- TCollectingValueConsumer collectedRows(
- New<TTableSchema>(std::vector{
- TColumnSchema("column", OptionalLogicalType(
- StructLogicalType({
- {"key", NTableClient::SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"value", NTableClient::SimpleLogicalType(ESimpleLogicalValueType::Int64)}
- })
- ))
- }));
- auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
-
- TStringStream dataStream;
- TCheckedSkiffWriter checkedSkiffWriter(CreateVariant16Schema({skiffSchema}), &dataStream);
-
- // Row 0.
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteVariant8Tag(1);
- checkedSkiffWriter.WriteString32("row_0");
- checkedSkiffWriter.WriteInt64(42);
- checkedSkiffWriter.WriteVariant16Tag(EndOfSequenceTag<ui16>());
-
- // Row 1.
- checkedSkiffWriter.WriteVariant16Tag(0);
- checkedSkiffWriter.WriteVariant16Tag(EndOfSequenceTag<ui16>());
-
- checkedSkiffWriter.Finish();
-
- parser->Read(dataStream.Str());
- parser->Finish();
-
- ASSERT_EQ(static_cast<int>(collectedRows.Size()), 2);
- ASSERT_EQ(ConvertToYsonTextStringStable(GetComposite(collectedRows.GetRowValue(0, "column"))), "[\"row_0\";42;]");
- ASSERT_FALSE(collectedRows.FindRowValue(1, "column"));
-}
-
-
-TEST(TSkiffParser, TestBadWireTypeForSimpleColumn)
-{
- auto skiffSchema = CreateTupleSchema({
- CreateVariant8Schema({
- CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Yson32),
- })
- })->SetName("opt_yson32"),
- });
-
- TCollectingValueConsumer collectedRows;
- EXPECT_THROW_WITH_SUBSTRING(
- CreateParserForSkiff(skiffSchema, &collectedRows),
- "cannot be represented with Skiff schema"
- );
-}
-
-TEST(TSkiffParser, TestEmptyColumns)
-{
- auto skiffSchema = CreateTupleSchema({});
- TCollectingValueConsumer collectedRows;
- auto parser = CreateParserForSkiff(skiffSchema, &collectedRows);
-
- parser->Read(TStringBuf("\x00\x00\x00\x00"sv));
- parser->Finish();
-
- ASSERT_EQ(static_cast<int>(collectedRows.Size()), 2);
-}
-
-TEST(TSkiffFormat, TestTimestamp)
-{
- using namespace NLogicalTypeShortcuts;
- CHECK_BIDIRECTIONAL_CONVERSION(Timestamp(), CreateSimpleTypeSchema(EWireType::Uint64), 42ull, "2A000000" "00000000");
- CHECK_BIDIRECTIONAL_CONVERSION(Interval(), CreateSimpleTypeSchema(EWireType::Int64), 42, "2A000000" "00000000");
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace
-} // namespace NYT
diff --git a/yt/yt/client/unittests/skiff_yson_converter_ut.cpp b/yt/yt/client/unittests/skiff_yson_converter_ut.cpp
deleted file mode 100644
index 18ecfac352..0000000000
--- a/yt/yt/client/unittests/skiff_yson_converter_ut.cpp
+++ /dev/null
@@ -1,728 +0,0 @@
-#include "logical_type_shortcuts.h"
-
-#include <yt/yt/core/test_framework/framework.h>
-
-#include <yt/yt/client/table_client/logical_type.h>
-#include <yt/yt/client/formats/skiff_yson_converter.h>
-
-#include <yt/yt/core/yson/parser.h>
-#include <yt/yt/core/yson/pull_parser.h>
-#include <yt/yt/core/yson/token_writer.h>
-#include <yt/yt/core/yson/writer.h>
-
-#include <library/cpp/skiff/skiff.h>
-#include <library/cpp/skiff/skiff_schema.h>
-
-#include <util/string/hex.h>
-
-#include <util/stream/mem.h>
-
-namespace NYT::NFormats {
-namespace {
-
-using namespace NTableClient;
-using namespace NSkiff;
-using namespace NYson;
-using namespace NTableClient::NLogicalTypeShortcuts;
-
-////////////////////////////////////////////////////////////////////////////////
-
-std::shared_ptr<TSkiffSchema> SkiffOptional(std::shared_ptr<TSkiffSchema> skiffSchema)
-{
- return CreateVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- std::move(skiffSchema)
- });
-}
-
-TString ConvertYsonHex(
- const TLogicalTypePtr& logicalType,
- const std::shared_ptr<TSkiffSchema>& skiffSchema,
- TStringBuf ysonString,
- const TYsonToSkiffConverterConfig& config = {})
-{
- auto converter = CreateYsonToSkiffConverter(
- TComplexTypeFieldDescriptor("test-field", logicalType),
- skiffSchema,
- config);
-
- // Yson parsers have a bug when they can't parse some values that end unexpectedly.
- TString spacedYsonInput = TString{ysonString} + " ";
-
- TStringStream out;
- {
- TCheckedInDebugSkiffWriter writer(skiffSchema, &out);
-
- TMemoryInput in(spacedYsonInput);
- TYsonPullParser pullParser(&in, EYsonType::Node);
- TYsonPullParserCursor cursor(&pullParser);
-
- converter(&cursor, &writer);
-
- EXPECT_EQ(cursor.GetCurrent().GetType(), EYsonItemType::EndOfStream);
- writer.Finish();
- }
-
- auto result = HexEncode(out.Str());
- result.to_lower();
- return result;
-}
-
-TString ConvertHexToTextYson(
- const TLogicalTypePtr& logicalType,
- const std::shared_ptr<TSkiffSchema>& skiffSchema,
- TStringBuf hexString,
- const TSkiffToYsonConverterConfig& config = {})
-{
- auto converter = CreateSkiffToYsonConverter(TComplexTypeFieldDescriptor("test-field", logicalType), skiffSchema, config);
-
-
- TStringStream binaryOut;
- {
- TString binaryString = HexDecode(hexString);
- TMemoryInput in(binaryString);
- TCheckedInDebugSkiffParser parser(skiffSchema, &in);
-
- auto writer = TCheckedInDebugYsonTokenWriter(&binaryOut);
- converter(&parser, &writer);
- EXPECT_EQ(parser.GetReadBytesCount(), binaryString.size());
- }
- binaryOut.Finish();
-
- TStringStream out;
- {
- auto writer = TYsonWriter(&out, EYsonFormat::Text);
- ParseYsonStringBuffer(binaryOut.Str(), EYsonType::Node, &writer);
- }
- out.Finish();
-
- return out.Str();
-}
-
-
-#define CHECK_BIDIRECTIONAL_CONVERSION(logicalType, skiffSchema, ysonString, skiffString, ...) \
- do { \
- std::tuple<TYsonToSkiffConverterConfig,TSkiffToYsonConverterConfig> cfg = {__VA_ARGS__}; \
- auto actualSkiffString = ConvertYsonHex(logicalType, skiffSchema, ysonString, std::get<0>(cfg)); \
- EXPECT_EQ(actualSkiffString, skiffString) << "Yson -> Skiff conversion error"; \
- auto actualYsonString = ConvertHexToTextYson(logicalType, skiffSchema, skiffString, std::get<1>(cfg)); \
- EXPECT_EQ(actualYsonString, ysonString) << "Skiff -> Yson conversion error"; \
- } while (0)
-
-
-TEST(TYsonSkiffConverterTest, TestSimpleTypes)
-{
- CHECK_BIDIRECTIONAL_CONVERSION(
- Int8(),
- CreateSimpleTypeSchema(EWireType::Int64),
- "-42",
- "d6ffffff" "ffffffff");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Uint64(),
- CreateSimpleTypeSchema(EWireType::Uint64),
- "42u",
- "2a000000" "00000000");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Uint64(),
- CreateSimpleTypeSchema(EWireType::Uint64),
- "8u",
- "08000000" "00000000");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Bool(),
- CreateSimpleTypeSchema(EWireType::Boolean),
- "%true",
- "01");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Double(),
- CreateSimpleTypeSchema(EWireType::Double),
- "0.",
- "00000000" "00000000");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Float(),
- CreateSimpleTypeSchema(EWireType::Double),
- "0.",
- "00000000" "00000000");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- String(),
- CreateSimpleTypeSchema(EWireType::String32),
- "\"foo\"",
- "03000000" "666f6f");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Null(),
- CreateSimpleTypeSchema(EWireType::Nothing),
- "#",
- "");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Uuid(),
- CreateSimpleTypeSchema(EWireType::Uint128),
- "\"\\xF0\\xF1\\xF2\\xF3\\xF4\\xF5\\xF6\\xF7\\xF8\\xF9\\xFA\\xFB\\xFC\\xFD\\xFE\\xFF\"",
- "fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Uuid(),
- CreateSimpleTypeSchema(EWireType::String32),
- "\"\\xF0\\xF1\\xF2\\xF3\\xF4\\xF5\\xF6\\xF7\\xF8\\xF9\\xFA\\xFB\\xFC\\xFD\\xFE\\xFF\"",
- "10000000f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff");
-}
-
-TEST(TYsonSkiffConverterTest, TestYson32)
-{
- CHECK_BIDIRECTIONAL_CONVERSION(
- Yson(),
- CreateSimpleTypeSchema(EWireType::Yson32),
- "-42",
- "02000000" "0253");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Yson(),
- CreateSimpleTypeSchema(EWireType::Yson32),
- "#",
- "01000000" "23");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Yson(),
- CreateSimpleTypeSchema(EWireType::Yson32),
- "[1;2;[3;];]",
- "0e000000" "5b02023b02043b5b02063b5d3b5d");
-}
-
-TEST(TYsonSkiffConverterTest, TestOptionalTypes)
-{
- CHECK_BIDIRECTIONAL_CONVERSION(
- Optional(Int64()),
- SkiffOptional(CreateSimpleTypeSchema(EWireType::Int64)),
- "-42",
- "01" "d6ffffff" "ffffffff");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Optional(Int64()),
- SkiffOptional(CreateSimpleTypeSchema(EWireType::Int64)),
- "#",
- "00");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Optional(Optional(Bool())),
- SkiffOptional(SkiffOptional(CreateSimpleTypeSchema(EWireType::Boolean))),
- "[%true;]",
- "01" "01" "01");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Optional(Optional(Bool())),
- SkiffOptional(SkiffOptional(CreateSimpleTypeSchema(EWireType::Boolean))),
- "[#;]",
- "01" "00");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Optional(Optional(Bool())),
- SkiffOptional(SkiffOptional(CreateSimpleTypeSchema(EWireType::Boolean))),
- "#",
- "00");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Optional(List(Bool())),
- SkiffOptional(CreateRepeatedVariant8Schema({CreateSimpleTypeSchema(EWireType::Boolean)})),
- "#",
- "00");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Optional(Optional(List(Bool()))),
- SkiffOptional(
- SkiffOptional(
- CreateRepeatedVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Boolean)
- })
- )
- ),
- "[[%true;%false;%true;];]",
- "01" "01" "0001" "0000" "0001" "ff");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Optional(Optional(List(Bool()))),
- SkiffOptional(
- SkiffOptional(
- CreateRepeatedVariant8Schema({
- CreateSimpleTypeSchema(EWireType::Boolean)
- })
- )
- ),
- "[#;]",
- "0100");
-
- EXPECT_THROW_WITH_SUBSTRING(
- ConvertYsonHex(
- Optional(Optional(Bool())),
- SkiffOptional(CreateSimpleTypeSchema(EWireType::Boolean)),
- " [ %true ] "),
- "Optional nesting mismatch");
-
- EXPECT_THROW_WITH_SUBSTRING(
- ConvertHexToTextYson(
- Optional(Bool()),
- CreateSimpleTypeSchema(EWireType::Boolean),
- "00"),
- "Optional nesting mismatch");
-
- TYsonToSkiffConverterConfig ysonToSkiffConfig;
- ysonToSkiffConfig.AllowOmitTopLevelOptional = true;
-
- TSkiffToYsonConverterConfig skiffToYsonConfig;
- skiffToYsonConfig.AllowOmitTopLevelOptional = true;
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Optional(Optional(Bool())),
- SkiffOptional(CreateSimpleTypeSchema(EWireType::Boolean)),
- "[%true;]",
- "01" "01",
- ysonToSkiffConfig,
- skiffToYsonConfig);
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Optional(Optional(Bool())),
- SkiffOptional(CreateSimpleTypeSchema(EWireType::Boolean)),
- "[#;]",
- "00",
- ysonToSkiffConfig,
- skiffToYsonConfig);
-
- EXPECT_THROW_WITH_SUBSTRING(
- ConvertYsonHex(
- Optional(Optional(Bool())),
- SkiffOptional(CreateSimpleTypeSchema(EWireType::Boolean)),
- " # ",
- ysonToSkiffConfig),
- "value expected to be nonempty");
-}
-
-TEST(TYsonSkiffConverterTest, TestListTypes)
-{
- CHECK_BIDIRECTIONAL_CONVERSION(
- List(Bool()),
- CreateRepeatedVariant8Schema({CreateSimpleTypeSchema(EWireType::Boolean)}),
- "[]",
- "ff");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- List(Bool()),
- CreateRepeatedVariant8Schema({CreateSimpleTypeSchema(EWireType::Boolean)}),
- "[%true;%true;%true;]",
- "00" "01" "00" "01" "00" "01" "ff");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- List(List(Bool())),
- CreateRepeatedVariant8Schema({CreateRepeatedVariant8Schema({CreateSimpleTypeSchema(EWireType::Boolean)})}),
- "[[];[%true;];[%true;%true;];]",
- "00" "ff" "00" "0001ff" "00" "00010001ff" "ff");
-}
-
-TEST(TYsonSkiffConverterTest, TestStruct)
-{
- CHECK_BIDIRECTIONAL_CONVERSION(
- Struct(
- "key", String(),
- "value", Bool()
- ),
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("key"),
- CreateSimpleTypeSchema(EWireType::Boolean)->SetName("value"),
- }),
- "[\"true\";%true;]",
- "04000000" "74727565" "01");
-}
-
-TEST(TYsonSkiffConverterTest, TestSkippedFields)
-{
- TString skiffString;
- skiffString = ConvertYsonHex(
- Struct(
- "key", String(),
- "subkey", Int64(),
- "value", Bool()
- ),
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("key"),
- CreateSimpleTypeSchema(EWireType::Boolean)->SetName("value"),
- }),
- " [ true ; 1; %true ] ");
- EXPECT_EQ(skiffString, "04000000" "74727565" "01"sv);
-
- skiffString = ConvertYsonHex(
- Struct(
- "key", String(),
- "subkey", Int64(),
- "value", Bool()
- ),
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Int64)->SetName("subkey"),
- }),
- " [ true ; 1; %true ] ");
- EXPECT_EQ(skiffString, "01000000" "00000000"sv);
-
- try {
- ConvertHexToTextYson(
- Struct(
- "key", String(),
- "subkey", Int64(),
- "value", Bool()
- ),
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Int64)->SetName("subkey"),
- }),
- "01000000" "00000000");
- } catch (const std::exception& e) {
- EXPECT_THAT(e.what(), testing::ContainsRegex("Non optional struct field .* is missing"));
- }
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Struct(
- "key", Optional(String()),
- "subkey", Int64(),
- "value", Optional(Bool())
- ),
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Int64)->SetName("subkey"),
- }),
- "[#;15;#;]",
- "0f000000" "00000000");
-}
-
-TEST(TYsonSkiffConverterTest, TestUnknownSkiffFields)
-{
- TString skiffString;
- skiffString = ConvertYsonHex(
- Struct(
- "key", String(),
- "subkey", Int64(),
- "value", Bool()
- ),
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("key"),
- SkiffOptional(CreateSimpleTypeSchema(EWireType::String32))->SetName("key2"),
- CreateSimpleTypeSchema(EWireType::Boolean)->SetName("value"),
- }),
- " [ true ; 1; %true ] ");
- EXPECT_EQ(skiffString, "04000000" "74727565" "00" "01"sv);
-
- skiffString = ConvertYsonHex(
- Struct(
- "key", String(),
- "subkey", Int64(),
- "value", Bool()
- ),
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("key"),
- CreateSimpleTypeSchema(EWireType::Boolean)->SetName("value"),
- SkiffOptional(CreateSimpleTypeSchema(EWireType::Yson32))->SetName("value2"),
- }),
- " [ true ; 1; %true ] ");
- EXPECT_EQ(skiffString, "04000000" "74727565" "01" "00"sv);
-
-
- try {
- ConvertYsonHex(
- Struct(
- "key", String(),
- "subkey", Int64(),
- "value", Bool()
- ),
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("key"),
- CreateSimpleTypeSchema(EWireType::Boolean)->SetName("value"),
- CreateSimpleTypeSchema(EWireType::Yson32)->SetName("value2"),
- }),
- " [ true ; 1; %true ] ");
- GTEST_FAIL() << "exception expected";
- } catch (const std::exception& e) {
- EXPECT_THAT(e.what(), testing::ContainsRegex("Non optional Skiff field .* is missing corresponding logical struct field"));
- }
-
- try {
- ConvertHexToTextYson(
- Struct(
- "key", String(),
- "subkey", Int64(),
- "value", Bool()
- ),
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32)->SetName("key"),
- SkiffOptional(CreateSimpleTypeSchema(EWireType::String32))->SetName("key2"),
- CreateSimpleTypeSchema(EWireType::Boolean)->SetName("value"),
- }),
- "04000000" "74727565" "00" "01"sv);
- GTEST_FAIL() << "expected_exception";
- } catch (const std::exception& e) {
- EXPECT_THAT(e.what(), testing::ContainsRegex("is not found in logical type"));
- }
-}
-
-TEST(TYsonSkiffConverterTest, TestTuple)
-{
- CHECK_BIDIRECTIONAL_CONVERSION(
- Tuple(String(), Bool()),
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32),
- CreateSimpleTypeSchema(EWireType::Boolean),
- }),
- "[\"true\";%true;]",
- "04000000" "74727565" "01");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- Tuple(Int64(), Optional(Int64())),
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Int64),
- SkiffOptional(CreateSimpleTypeSchema(EWireType::Int64)),
- }),
- "[2;42;]",
- "02000000" "00000000" "01" "2a000000" "00000000");
-}
-
-TEST(TYsonSkiffConverterTest, TestTupleSkippedFields)
-{
- TString skiffString;
- skiffString = ConvertYsonHex(
- Tuple(String(), Int64(), Bool()),
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32),
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Boolean),
- }),
- " [ true ; 1; %true ] ");
- EXPECT_EQ(skiffString, "04000000" "74727565" "01"sv);
-
- skiffString = ConvertYsonHex(
- Tuple(String(), Int64(), Bool()),
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Int64),
- CreateSimpleTypeSchema(EWireType::Nothing),
- }),
- " [ true ; 1; %true ] ");
- EXPECT_EQ(skiffString, "01000000" "00000000"sv);
-
- skiffString = ConvertYsonHex(
- Tuple(Optional(String()), Int64(), Optional(Bool())),
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::Nothing),
- CreateSimpleTypeSchema(EWireType::Int64),
- CreateSimpleTypeSchema(EWireType::Nothing)
- }),
- "[#;15;#;]"
- );
- EXPECT_EQ(skiffString, "0f000000" "00000000"sv);
-}
-
-TEST(TYsonSkiffConverterTest, TestDict)
-{
- const auto logicalType = Dict(String(), Int64());
- const auto skiffSchema = CreateRepeatedVariant8Schema({
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32),
- CreateSimpleTypeSchema(EWireType::Int64)
- })
- });
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- logicalType,
- skiffSchema,
- "[[\"one\";1;];[\"two\";2;];]",
- "00" "03000000" "6f6e65" "01000000" "00000000"
- "00" "03000000" "74776f" "02000000" "00000000"
- "ff"
- );
-
- EXPECT_THROW_WITH_SUBSTRING(
- ConvertHexToTextYson(logicalType, skiffSchema, "01" "01000000" "6f" "01000000" "00000000" "ff"),
- "Unexpected repeated_variant8 tag"
- );
-
- EXPECT_THROW_WITH_SUBSTRING(
- ConvertHexToTextYson(logicalType, skiffSchema, "00" "01000000" "6f" "01000000" "00000000"),
- "Premature end of stream"
- );
-}
-
-TEST(TYsonSkiffConverterTest, TestTagged)
-{
- const auto logicalType = Tagged(
- "tag",
- Dict(Tagged("tag", String()), Int64()));
- const auto skiffSchema = CreateRepeatedVariant8Schema({
- CreateTupleSchema({
- CreateSimpleTypeSchema(EWireType::String32),
- CreateSimpleTypeSchema(EWireType::Int64)
- })
- });
- CHECK_BIDIRECTIONAL_CONVERSION(
- logicalType,
- skiffSchema,
- "[[\"one\";1;];[\"two\";2;];]",
- "00" "03000000" "6f6e65" "01000000" "00000000"
- "00" "03000000" "74776f" "02000000" "00000000"
- "ff"
- );
-}
-
-TEST(TYsonSkiffConverterTest, TestOptionalVariantSimilarity)
-{
- auto logicalType = Optional(
- VariantTuple(Null(), Int64())
- );
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- logicalType,
- SkiffOptional(SkiffOptional(CreateSimpleTypeSchema(EWireType::Int64))),
- "[1;42;]",
- "01" "01" "2a000000" "00000000");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- logicalType,
- SkiffOptional(SkiffOptional(CreateSimpleTypeSchema(EWireType::Int64))),
- "[0;#;]",
- "01" "00");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- logicalType,
- SkiffOptional(SkiffOptional(CreateSimpleTypeSchema(EWireType::Int64))),
- "#",
- "00");
-
- TYsonToSkiffConverterConfig ysonToSkiffConfig;
- ysonToSkiffConfig.AllowOmitTopLevelOptional = true;
-
- TSkiffToYsonConverterConfig skiffToYsonConfig;
- skiffToYsonConfig.AllowOmitTopLevelOptional = true;
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- logicalType,
- SkiffOptional(CreateSimpleTypeSchema(EWireType::Int64)),
- "[1;42;]",
- "01" "2a000000" "00000000",
- ysonToSkiffConfig,
- skiffToYsonConfig);
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- logicalType,
- SkiffOptional(CreateSimpleTypeSchema(EWireType::Int64)),
- "[0;#;]",
- "00",
- ysonToSkiffConfig,
- skiffToYsonConfig);
-
- EXPECT_THROW_WITH_SUBSTRING(
- ConvertYsonHex(
- logicalType,
- SkiffOptional(CreateSimpleTypeSchema(EWireType::Int64)),
- "#",
- ysonToSkiffConfig),
- "value expected to be nonempty"
- );
-}
-
-class TYsonSkiffConverterTestVariant
- : public ::testing::TestWithParam<std::tuple<ELogicalMetatype, EWireType>>
-{
-public:
- TLogicalTypePtr VariantLogicalType(const std::vector<TLogicalTypePtr>& elements)
- {
- auto [metatype, wireType] = GetParam();
- if (metatype == ELogicalMetatype::VariantTuple) {
- return VariantTupleLogicalType(elements);
- } else {
- std::vector<TStructField> fields;
- for (size_t i = 0; i < elements.size(); ++i) {
- fields.push_back({Format("field%v", i), elements[i]});
- }
- return VariantStructLogicalType(fields);
- }
- }
-
- std::shared_ptr<TSkiffSchema> VariantSkiffSchema(std::vector<std::shared_ptr<TSkiffSchema>> elements)
- {
- for (size_t i = 0; i < elements.size(); ++i) {
- elements[i]->SetName(Format("field%v", i));
- }
- auto [metatype, wireType] = GetParam();
- if (wireType == EWireType::Variant8) {
- return CreateVariant8Schema(std::move(elements));
- } else if (wireType == EWireType::Variant16) {
- return CreateVariant16Schema(std::move(elements));
- }
- Y_UNREACHABLE();
- }
-
- TString VariantTagInfix() const
- {
- auto [metatype, wireType] = GetParam();
- if (wireType == EWireType::Variant16) {
- return "00";
- }
- return {};
- }
-};
-
-TEST_P(TYsonSkiffConverterTestVariant, TestVariant)
-{
- CHECK_BIDIRECTIONAL_CONVERSION(
- VariantLogicalType({
- Int64(),
- Bool()
- }),
- VariantSkiffSchema({
- CreateSimpleTypeSchema(EWireType::Int64),
- CreateSimpleTypeSchema(EWireType::Boolean),
- }),
- "[0;42;]",
- "00" + VariantTagInfix() + "2a000000" "00000000");
-
- CHECK_BIDIRECTIONAL_CONVERSION(
- VariantLogicalType({
- Int64(),
- Bool()
- }),
- VariantSkiffSchema({
- CreateSimpleTypeSchema(EWireType::Int64),
- CreateSimpleTypeSchema(EWireType::Boolean),
- }),
- "[1;%true;]",
- "01" + VariantTagInfix() + "01");
-}
-
-TEST_P(TYsonSkiffConverterTestVariant, TestMalformedVariants)
-{
- auto logicalType = VariantLogicalType({
- Bool(),
- Int64(),
- });
- auto skiffSchema = VariantSkiffSchema({
- CreateSimpleTypeSchema(EWireType::Boolean),
- CreateSimpleTypeSchema(EWireType::Int64),
- });
-
- EXPECT_THROW_WITH_SUBSTRING(ConvertYsonHex(logicalType, skiffSchema, "[2; 42]"), "Yson to Skiff conversion error");
- EXPECT_THROW_WITH_SUBSTRING(ConvertYsonHex(logicalType, skiffSchema, "[]"), "Yson to Skiff conversion error");
- EXPECT_THROW_WITH_SUBSTRING(ConvertYsonHex(logicalType, skiffSchema, "[0]"), "Yson to Skiff conversion error");
-
- EXPECT_THROW_WITH_SUBSTRING(ConvertHexToTextYson(logicalType, skiffSchema, "02" + VariantTagInfix() + "00"),
- "Skiff to Yson conversion error");
-}
-
-INSTANTIATE_TEST_SUITE_P(
- Variants,
- TYsonSkiffConverterTestVariant,
- ::testing::Combine(
- ::testing::ValuesIn({ELogicalMetatype::VariantStruct, ELogicalMetatype::VariantTuple}),
- ::testing::ValuesIn({EWireType::Variant8, EWireType::Variant16})
- )
-);
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace
-} // namespace NYT::NFormats
diff --git a/yt/yt/client/unittests/validate_logical_type_ut.cpp b/yt/yt/client/unittests/validate_logical_type_ut.cpp
index 1bebe1c952..dce4792430 100644
--- a/yt/yt/client/unittests/validate_logical_type_ut.cpp
+++ b/yt/yt/client/unittests/validate_logical_type_ut.cpp
@@ -1,4 +1,4 @@
-#include "logical_type_shortcuts.h"
+#include <yt/yt/library/logical_type_shortcuts/logical_type_shortcuts.h>
#include <yt/yt/core/test_framework/framework.h>
diff --git a/yt/yt/client/unittests/value_examples.cpp b/yt/yt/client/unittests/value_examples.cpp
deleted file mode 100644
index c2714fc856..0000000000
--- a/yt/yt/client/unittests/value_examples.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
-#include "value_examples.h"
-
-#include "logical_type_shortcuts.h"
-
-#include <yt/yt/library/decimal/decimal.h>
-
-#include <cmath>
-
-namespace NYT::NTableClient {
-
-////////////////////////////////////////////////////////////////////////////////
-
-using namespace NLogicalTypeShortcuts;
-using namespace NNamedValue;
-
-////////////////////////////////////////////////////////////////////////////////
-
-TValueExample::TValueExample(TLogicalTypePtr logicalType, TNamedValue::TValue value, TString prettyYson)
- : LogicalType(std::move(logicalType))
- , Value(std::move(value))
- , PrettyYson(std::move(prettyYson))
-{ }
-
-////////////////////////////////////////////////////////////////////////////////
-
-std::vector<TValueExample> GetPrimitiveValueExamples()
-{
- static const std::vector<TValueExample> valueExamples = {
- TValueExample{Int8(), 0, "0"},
- TValueExample{Int8(), -5, "-5"},
- TValueExample{Int8(), 42, "42"},
- TValueExample{Int8(), -128, "-128"},
- TValueExample{Int8(), 127, "127"},
-
- TValueExample{Int16(), 0, "0"},
- TValueExample{Int16(), -6, "-6"},
- TValueExample{Int16(), 43, "43"},
- TValueExample{Int16(), 0x7FFF, "32767"},
- TValueExample{Int16(), -0x8000, "-32768"},
-
- TValueExample{Int32(), 0, "0"},
- TValueExample{Int32(), -7, "-7"},
- TValueExample{Int32(), 44, "44"},
- TValueExample{Int32(), 0x7FFFFFFF, "2147483647"},
- TValueExample{Int32(), -0x80000000ll, "-2147483648"},
-
- TValueExample{Int64(), 0, "0"},
- TValueExample{Int64(), -7, "-7"},
- TValueExample{Int64(), 45, "45"},
- TValueExample{Int64(), 0x7FFFFFFFFFFFFFFFll, "9223372036854775807"},
- TValueExample{Int64(), i64(-0x8000000000000000ll), "-9223372036854775808"},
-
- TValueExample{Uint8(), 0ull, "0u"},
- TValueExample{Uint8(), 46ull, "46u"},
- TValueExample{Uint8(), 255ull, "255u"},
-
- TValueExample{Uint16(), 0ull, "0u"},
- TValueExample{Uint16(), 47ull, "47u"},
- TValueExample{Uint16(), 0xFFFFull, "65535u"},
-
- TValueExample{Uint32(), 0ull, "0u"},
- TValueExample{Uint32(), 48ull, "48u"},
- TValueExample{Uint32(), 0xFFFFFFFFull, "4294967295u"},
-
- TValueExample{Uint64(), 0ull, "0u"},
- TValueExample{Uint64(), 49ull, "49u"},
- TValueExample{Uint64(), 0xFFFFFFFFFFFFFFFFull, "18446744073709551615u"},
-
- TValueExample{String(), "", R"("")"},
- TValueExample{String(), "foo", R"("foo")"},
- TValueExample{String(), TString(TStringBuf("\xf0\x00"sv)), R"("\xf0\x00")"},
-
- TValueExample{Utf8(), "", R"("")"},
- TValueExample{Utf8(), "bar", R"("bar")"},
-
- TValueExample{Bool(), true, "%true"},
- TValueExample{Bool(), false, "%false"},
-
- // NB. .125 = 1 / 8 is
- TValueExample{Double(), 3.125, "3.125"},
- TValueExample{Double(), 2.775, "2.775"},
- // TPrimitiveTypeExample{Double(), std::nan("1"), "%nan"},
- TValueExample{Double(), INFINITY, "%inf"},
- TValueExample{Double(), -INFINITY, "%-inf"},
-
- TValueExample{Float(), 5.125, "5.125"},
- TValueExample{Float(), 6.775, "6.775"},
-
- TValueExample{Null(), nullptr, "#"},
- TValueExample{Void(), nullptr, "#"},
-
- TValueExample{Json(), "83", R"("83")"},
- TValueExample{Json(), "[]", R"("[]")"},
-
- TValueExample{
- Uuid(),
- TString(16, 0),
- TString(TStringBuf(R"("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00")"))
- },
- TValueExample{
- Uuid(),
- TString(TStringBuf("\x01\x23\x45\x67\x89\xAB\xCD\xEF\xFE\xDC\xBA\x98\x76\x54\x32\x10"sv)),
- TString(TStringBuf(R"("\x01\x23\x45\x67\x89\xAB\xCD\xEF\xFE\xDC\xBA\x98\x76\x54\x32\x10")"))
- },
-
- TValueExample{Date(), 0ull, "0u"},
- TValueExample{Date(), 18431ull, "18431u"},
- TValueExample{Date(), 49672ull, "49672u"},
-
- TValueExample{Datetime(), 0ull, "0u"},
- TValueExample{Datetime(), 668800588ull, "668800588u"},
- TValueExample{Datetime(), 4291747199ull, "4291747199u"},
-
- TValueExample{Timestamp(), 0ull, "0u"},
- TValueExample{Timestamp(), 2508452463052426ull, "2508452463052426u"},
- TValueExample{Timestamp(), 4291747199999999ull, "4291747199999999u"},
-
- TValueExample{Interval(), 0, "0"},
- TValueExample{Timestamp(), 2208610308646589ll, "2208610308646589"},
- TValueExample{Timestamp(), 1187314596653899ll, "1187314596653899"},
- TValueExample{Timestamp(), 4291747199999999ll, "4291747199999999"},
- TValueExample{Timestamp(), -4291747199999999ll, "-4291747199999999"},
-
- TValueExample{Yson(), "qux", R"("qux")"},
-
- TValueExample{Decimal(3, 2), NDecimal::TDecimal::TextToBinary("3.14", 3, 2), R"("\x80\x00\x01\x3a")"},
- };
-
- THashSet<ESimpleLogicalValueType> allValueTypes;
- for (const auto value : TEnumTraits<ESimpleLogicalValueType>::GetDomainValues()) {
- allValueTypes.insert(value);
- }
- for (const auto& example : valueExamples) {
- if (example.LogicalType->GetMetatype() == ELogicalMetatype::Simple) {
- allValueTypes.erase(example.LogicalType->AsSimpleTypeRef().GetElement());
- }
- }
- if (!allValueTypes.empty()) {
- THROW_ERROR_EXCEPTION("PrimitiveTypeExample variable doesn't contain values: %v",
- allValueTypes);
- }
- return valueExamples;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NTableClient
diff --git a/yt/yt/client/unittests/value_examples.h b/yt/yt/client/unittests/value_examples.h
deleted file mode 100644
index 06644e2cd6..0000000000
--- a/yt/yt/client/unittests/value_examples.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#pragma once
-
-#include <yt/yt/library/named_value/named_value.h>
-
-#include <yt/yt/client/table_client/logical_type.h>
-
-namespace NYT::NTableClient {
-
-////////////////////////////////////////////////////////////////////////////////
-
-struct TValueExample
-{
- TLogicalTypePtr LogicalType;
- NNamedValue::TNamedValue::TValue Value;
- TString PrettyYson;
-
- TValueExample(TLogicalTypePtr logicalType, NNamedValue::TNamedValue::TValue value, TString prettyYson);
-};
-
-std::vector<TValueExample> GetPrimitiveValueExamples();
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NTableClient
diff --git a/yt/yt/client/unittests/web_json_writer_ut.cpp b/yt/yt/client/unittests/web_json_writer_ut.cpp
deleted file mode 100644
index a440002a8b..0000000000
--- a/yt/yt/client/unittests/web_json_writer_ut.cpp
+++ /dev/null
@@ -1,1570 +0,0 @@
-#include <yt/yt/core/test_framework/framework.h>
-
-#include <yt/yt/client/formats/web_json_writer.h>
-
-#include <yt/yt/client/table_client/logical_type.h>
-#include <yt/yt/client/table_client/name_table.h>
-#include <yt/yt/client/table_client/schema.h>
-
-#include <yt/yt/core/concurrency/async_stream.h>
-
-#include <yt/yt/core/json/json_parser.h>
-
-#include <yt/yt/core/ytree/fluent.h>
-
-#include <yt/yt/library/named_value/named_value.h>
-
-#include <limits>
-
-namespace NYT::NFormats {
-namespace {
-
-////////////////////////////////////////////////////////////////////////////////
-
-using namespace NYTree;
-using namespace NYson;
-using namespace NConcurrency;
-using namespace NTableClient;
-
-using NNamedValue::MakeRow;
-
-INodePtr ParseJsonToNode(TStringBuf string)
-{
- TBuildingYsonConsumerViaTreeBuilder<INodePtr> builder(EYsonType::Node);
- TMemoryInput stream(string);
-
- // For plain (raw) JSON parsing we need to switch off
- // "smart" attribute analysis and UTF-8 decoding.
- auto config = New<NJson::TJsonFormatConfig>();
- config->EncodeUtf8 = false;
- config->Plain = true;
-
- NJson::ParseJson(&stream, &builder, std::move(config));
- return builder.Finish();
-}
-
-class TWriterForWebJson
- : public ::testing::Test
-{
-protected:
- TNameTablePtr NameTable_ = New<TNameTable>();
- TWebJsonFormatConfigPtr Config_ = New<TWebJsonFormatConfig>();
- TStringStream OutputStream_;
- ISchemalessFormatWriterPtr Writer_;
-
- void CreateStandardWriter(const std::vector<TTableSchemaPtr>& schemas = {New<TTableSchema>()})
- {
- Writer_ = CreateWriterForWebJson(
- Config_,
- NameTable_,
- schemas,
- CreateAsyncAdapter(static_cast<IOutputStream*>(&OutputStream_)));
- }
-};
-
-TEST_F(TWriterForWebJson, Simple)
-{
- Config_->MaxAllColumnNamesCount = 2;
-
- CreateStandardWriter();
-
- bool written = Writer_->Write({
- MakeRow(NameTable_, {
- {"column_a", 100500u},
- {"column_b", true},
- {"column_c", "row1_c"},
- {RowIndexColumnName, 0},
- }).Get(),
- MakeRow(NameTable_, {
- {"column_c", "row2_c"},
- {"column_b", "row2_b"},
- {RowIndexColumnName, 1},
- }).Get(),
- });
- EXPECT_TRUE(written);
- WaitFor(Writer_->Close())
- .ThrowOnError();
-
- TString expectedOutput =
- "{"
- "\"rows\":["
- "{"
- "\"column_a\":{"
- "\"$type\":\"uint64\","
- "\"$value\":\"100500\""
- "},"
- "\"column_b\":{"
- "\"$type\":\"boolean\","
- "\"$value\":\"true\""
- "},"
- "\"column_c\":{"
- "\"$type\":\"string\","
- "\"$value\":\"row1_c\""
- "}"
- "},"
- "{"
- "\"column_c\":{"
- "\"$type\":\"string\","
- "\"$value\":\"row2_c\""
- "},"
- "\"column_b\":{"
- "\"$type\":\"string\","
- "\"$value\":\"row2_b\""
- "}"
- "}"
- "],"
- "\"incomplete_columns\":\"false\","
- "\"incomplete_all_column_names\":\"true\","
- "\"all_column_names\":["
- "\"column_a\","
- "\"column_b\""
- "]"
- "}";
-
- EXPECT_EQ(std::ssize(expectedOutput), Writer_->GetWrittenSize());
- EXPECT_EQ(expectedOutput, OutputStream_.Str());
-}
-
-TEST_F(TWriterForWebJson, SliceColumnsByMaxCount)
-{
- Config_->MaxSelectedColumnCount = 2;
-
- CreateStandardWriter();
- bool written = Writer_->Write({
- MakeRow(NameTable_, {
- {"column_a", "row1_a"},
- {"column_b", "row1_b"},
- {"column_c", "row1_c"},
- }).Get(),
- MakeRow(NameTable_, {
- {"column_c", "row2_c"},
- {"column_b", "row2_b"},
- }).Get(),
- MakeRow(NameTable_, {
- {"column_c", "row3_c"},
- }).Get(),
- });
- EXPECT_TRUE(written);
- Writer_->Close();
-
- TString expectedOutput =
- "{"
- "\"rows\":["
- "{"
- "\"column_a\":{"
- "\"$type\":\"string\","
- "\"$value\":\"row1_a\""
- "},"
- "\"column_b\":{"
- "\"$type\":\"string\","
- "\"$value\":\"row1_b\""
- "}"
- "},"
- "{"
- "\"column_b\":{"
- "\"$type\":\"string\","
- "\"$value\":\"row2_b\""
- "}"
- "},"
- "{"
- "}"
- "],"
- "\"incomplete_columns\":\"true\","
- "\"incomplete_all_column_names\":\"false\","
- "\"all_column_names\":["
- "\"column_a\","
- "\"column_b\","
- "\"column_c\""
- "]"
- "}";
-
- EXPECT_EQ(std::ssize(expectedOutput), Writer_->GetWrittenSize());
- EXPECT_EQ(expectedOutput, OutputStream_.Str());
-}
-
-TEST_F(TWriterForWebJson, SliceStrings)
-{
- Config_->FieldWeightLimit = 6;
-
- CreateStandardWriter();
-
- bool written = Writer_->Write({
- MakeRow(NameTable_, {
- {"column_b", "row1_b"},
- {"column_c", "rooooow1_c"},
- {"column_a", "row1_a"},
- }).Get(),
- MakeRow(NameTable_, {
- {"column_c", "row2_c"},
- {"column_b", "rooow2_b"},
- }).Get(),
- MakeRow(NameTable_, {
- {"column_c", "row3_c"},
- }).Get(),
- });
- EXPECT_TRUE(written);
- Writer_->Close();
-
- TString expectedOutput =
- "{"
- "\"rows\":["
- "{"
- "\"column_b\":{"
- "\"$type\":\"string\","
- "\"$value\":\"row1_b\""
- "},"
- "\"column_c\":{"
- "\"$incomplete\":true,"
- "\"$type\":\"string\","
- "\"$value\":\"rooooo\""
- "},"
- "\"column_a\":{"
- "\"$type\":\"string\","
- "\"$value\":\"row1_a\""
- "}"
- "},"
- "{"
- "\"column_c\":{"
- "\"$type\":\"string\","
- "\"$value\":\"row2_c\""
- "},"
- "\"column_b\":{"
- "\"$incomplete\":true,"
- "\"$type\":\"string\","
- "\"$value\":\"rooow2\""
- "}"
- "},"
- "{"
- "\"column_c\":{"
- "\"$type\":\"string\","
- "\"$value\":\"row3_c\""
- "}"
- "}"
- "],"
- "\"incomplete_columns\":\"false\","
- "\"incomplete_all_column_names\":\"false\","
- "\"all_column_names\":["
- "\"column_a\","
- "\"column_b\","
- "\"column_c\""
- "]"
- "}";
-
- EXPECT_EQ(std::ssize(expectedOutput), Writer_->GetWrittenSize());
- EXPECT_EQ(expectedOutput, OutputStream_.Str());
-}
-
-TEST_F(TWriterForWebJson, ReplaceAnyWithNull)
-{
- Config_->FieldWeightLimit = 8;
-
- CreateStandardWriter();
-
- bool written = Writer_->Write({
- MakeRow(NameTable_, {
- {"column_b", EValueType::Any, "{key=a}"},
- {"column_c", "row1_c"},
- {"column_a", "row1_a"},
- }).Get(),
- MakeRow(NameTable_, {
- {"column_c", EValueType::Any, "{key=aaaaaa}"},
- {"column_b", "row2_b"},
- }).Get(),
- MakeRow(NameTable_, {
- {"column_c", "row3_c"},
- }).Get(),
- });
- EXPECT_TRUE(written);
- WaitFor(Writer_->Close())
- .ThrowOnError();
-
- TString expectedOutput =
- "{"
- "\"rows\":["
- "{"
- "\"column_b\":{"
- "\"key\":{"
- "\"$type\":\"string\","
- "\"$value\":\"a\""
- "}"
- "},"
- "\"column_c\":{"
- "\"$type\":\"string\","
- "\"$value\":\"row1_c\""
- "},"
- "\"column_a\":{"
- "\"$type\":\"string\","
- "\"$value\":\"row1_a\""
- "}"
- "},"
- "{"
- "\"column_c\":{"
- "\"$incomplete\":true,"
- "\"$type\":\"any\","
- "\"$value\":\"\""
- "},"
- "\"column_b\":{"
- "\"$type\":\"string\","
- "\"$value\":\"row2_b\""
- "}"
- "},"
- "{"
- "\"column_c\":{"
- "\"$type\":\"string\","
- "\"$value\":\"row3_c\""
- "}"
- "}"
- "],"
- "\"incomplete_columns\":\"false\","
- "\"incomplete_all_column_names\":\"false\","
- "\"all_column_names\":["
- "\"column_a\","
- "\"column_b\","
- "\"column_c\""
- "]"
- "}";
-
- EXPECT_EQ(std::ssize(expectedOutput), Writer_->GetWrittenSize());
- EXPECT_EQ(expectedOutput, OutputStream_.Str());
-}
-
-TEST_F(TWriterForWebJson, SkipSystemColumns)
-{
- Config_->SkipSystemColumns = false;
-
- CreateStandardWriter();
-
- bool written = Writer_->Write({
- MakeRow(NameTable_, {
- {TableIndexColumnName, 0},
- {RowIndexColumnName, 1},
- {TabletIndexColumnName, 2},
- }).Get(),
- });
- EXPECT_TRUE(written);
- WaitFor(Writer_->Close())
- .ThrowOnError();
-
- TString expectedOutput =
- "{"
- "\"rows\":["
- "{"
- "\"$$table_index\":{"
- "\"$type\":\"int64\","
- "\"$value\":\"0\""
- "},"
- "\"$$row_index\":{"
- "\"$type\":\"int64\","
- "\"$value\":\"1\""
- "},"
- "\"$$tablet_index\":{"
- "\"$type\":\"int64\","
- "\"$value\":\"2\""
- "}"
- "}"
- "],"
- "\"incomplete_columns\":\"false\","
- "\"incomplete_all_column_names\":\"false\","
- "\"all_column_names\":["
- "\"$row_index\","
- "\"$table_index\","
- "\"$tablet_index\""
- "]"
- "}";
-
- EXPECT_EQ(std::ssize(expectedOutput), Writer_->GetWrittenSize());
- EXPECT_EQ(expectedOutput, OutputStream_.Str());
-}
-
-TEST_F(TWriterForWebJson, SkipUnregisteredColumns)
-{
- CreateStandardWriter();
-
- TUnversionedRowBuilder row;
- int keyDId = -1;
- row.AddValue(MakeUnversionedBooleanValue(true, keyDId));
- std::vector<TUnversionedRow> rows = {row.GetRow()};
-
- EXPECT_EQ(true, Writer_->Write(rows));
-
- keyDId = NameTable_->RegisterName("column_d");
-
- rows.clear();
- row.Reset();
- row.AddValue(MakeUnversionedBooleanValue(true, keyDId));
- rows.push_back(row.GetRow());
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close();
-
- TString expectedOutput =
- "{"
- "\"rows\":["
- "{"
- "},"
- "{"
- "\"column_d\":{"
- "\"$type\":\"boolean\","
- "\"$value\":\"true\""
- "}"
- "}"
- "],"
- "\"incomplete_columns\":\"false\","
- "\"incomplete_all_column_names\":\"false\","
- "\"all_column_names\":["
- "\"column_d\""
- "]"
- "}";
-
- EXPECT_EQ(std::ssize(expectedOutput), Writer_->GetWrittenSize());
- EXPECT_EQ(expectedOutput, OutputStream_.Str());
-}
-
-TEST_F(TWriterForWebJson, SliceColumnsByName)
-{
- Config_->ColumnNames = {
- "column_b",
- "column_c",
- "$tablet_index"};
- Config_->MaxSelectedColumnCount = 2;
- Config_->SkipSystemColumns = false;
-
- CreateStandardWriter();
-
- bool written = Writer_->Write({
- MakeRow(NameTable_, {
- {"column_a", 100500u},
- {"column_b", 0.42},
- {"column_c", "abracadabra"},
- {TabletIndexColumnName, 10},
- }).Get(),
- });
- EXPECT_TRUE(written);
- WaitFor(Writer_->Close())
- .ThrowOnError();
- auto result = ParseJsonToNode(OutputStream_.Str());
-
- TString expectedOutput =
- "{"
- "\"rows\":["
- "{"
- "\"column_b\":{"
- "\"$type\":\"double\","
- "\"$value\":\"0.42\""
- "},"
- "\"column_c\":{"
- "\"$type\":\"string\","
- "\"$value\":\"abracadabra\""
- "},"
- "\"$$tablet_index\":{"
- "\"$type\":\"int64\","
- "\"$value\":\"10\""
- "}"
- "}"
- "],"
- "\"incomplete_columns\":\"true\","
- "\"incomplete_all_column_names\":\"false\","
- "\"all_column_names\":["
- "\"$tablet_index\","
- "\"column_a\","
- "\"column_b\","
- "\"column_c\""
- "]"
- "}";
-
- EXPECT_EQ(std::ssize(expectedOutput), Writer_->GetWrittenSize());
- EXPECT_EQ(expectedOutput, OutputStream_.Str());
-}
-
-template <typename TValue>
-void CheckYqlValue(
- const INodePtr& valueNode,
- const TValue& expectedValue)
-{
- using TDecayedValue = std::decay_t<TValue>;
- if constexpr (std::is_convertible_v<TDecayedValue, TString>) {
- ASSERT_EQ(valueNode->GetType(), ENodeType::String);
- EXPECT_EQ(valueNode->GetValue<TString>(), expectedValue);
- } else if constexpr (std::is_same_v<TDecayedValue, double>) {
- ASSERT_EQ(valueNode->GetType(), ENodeType::String);
- EXPECT_FLOAT_EQ(FromString<double>(valueNode->GetValue<TString>()), expectedValue);
- } else if constexpr (std::is_same_v<TDecayedValue, bool>) {
- ASSERT_EQ(valueNode->GetType(), ENodeType::Boolean);
- EXPECT_EQ(valueNode->GetValue<bool>(), expectedValue);
- } else if constexpr (std::is_same_v<TDecayedValue, INodePtr>) {
- EXPECT_TRUE(AreNodesEqual(valueNode, expectedValue))
- << "actualValueNode is " << ConvertToYsonString(valueNode, EYsonFormat::Pretty).AsStringBuf()
- << "\nexpectedValue is " << ConvertToYsonString(expectedValue, EYsonFormat::Pretty).AsStringBuf();
- } else {
- static_assert(TDependentFalse<TDecayedValue>, "Type not allowed");
- }
-}
-
-template <typename TType>
-void CheckYqlType(
- const INodePtr& typeNode,
- const TType& expectedType,
- const std::vector<INodePtr>& yqlTypes)
-{
- ASSERT_EQ(typeNode->GetType(), ENodeType::String);
- auto typeIndexString = typeNode->GetValue<TString>();
- auto typeIndex = FromString<int>(typeIndexString);
- ASSERT_LT(typeIndex, static_cast<int>(yqlTypes.size()));
- ASSERT_GE(typeIndex, 0);
- const auto& yqlType = yqlTypes[typeIndex];
- EXPECT_EQ(yqlType->GetType(), ENodeType::List);
-
- auto expectedTypeNode = [&] () -> INodePtr {
- using TDecayedType = std::decay_t<TType>;
- if constexpr (std::is_convertible_v<TDecayedType, TString>) {
- return ConvertToNode(TYsonString(TString(expectedType)));
- } else if constexpr (std::is_same_v<TDecayedType, INodePtr>) {
- return expectedType;
- } else {
- static_assert(TDependentFalse<TDecayedType>, "Type not allowed");
- }
- }();
- EXPECT_TRUE(AreNodesEqual(yqlType, expectedTypeNode))
- << "yqlType is " << ConvertToYsonString(yqlType, EYsonFormat::Pretty).AsStringBuf()
- << "\nexpectedTypeNode is " << ConvertToYsonString(expectedTypeNode, EYsonFormat::Pretty).AsStringBuf();
-}
-
-template <typename TValue, typename TType>
-void CheckYqlTypeAndValue(
- const INodePtr& row,
- TStringBuf name,
- const TType& expectedType,
- const TValue& expectedValue,
- const std::vector<INodePtr>& yqlTypes)
-{
- ASSERT_EQ(row->GetType(), ENodeType::Map);
- auto entry = row->AsMap()->FindChild(TString(name));
- ASSERT_TRUE(entry);
- ASSERT_EQ(entry->GetType(), ENodeType::List);
- ASSERT_EQ(entry->AsList()->GetChildCount(), 2);
- auto valueNode = entry->AsList()->GetChildOrThrow(0);
- CheckYqlValue(valueNode, expectedValue);
- auto typeNode = entry->AsList()->GetChildOrThrow(1);
- CheckYqlType(typeNode, expectedType, yqlTypes);
-}
-
-#define CHECK_YQL_TYPE_AND_VALUE(row, name, expectedType, expectedValue, yqlTypes) \
- do { \
- SCOPED_TRACE(name); \
- CheckYqlTypeAndValue(row, name, expectedType, expectedValue, yqlTypes); \
- } while (0)
-
-TEST_F(TWriterForWebJson, YqlValueFormat_SimpleTypes)
-{
- Config_->MaxAllColumnNamesCount = 2;
- Config_->ValueFormat = EWebJsonValueFormat::Yql;
-
- // We will emulate writing rows from two tables.
- CreateStandardWriter(std::vector{New<TTableSchema>(), New<TTableSchema>()});
-
- {
- bool written = Writer_->Write({
- MakeRow(NameTable_, {
- {"column_a", 100500u},
- {"column_b", true},
- {"column_c", "row1_c"},
- {RowIndexColumnName, 0},
- {TableIndexColumnName, 0},
- }).Get(),
- MakeRow(NameTable_, {
- {"column_c", "row2_c"},
- {"column_b", "row2_b"},
- {RowIndexColumnName, 1},
- {TableIndexColumnName, 0},
- }).Get(),
- MakeRow(NameTable_, {
- {"column_a", -100500},
- {"column_b", EValueType::Any, "{x=2;y=3}"},
- {"column_c", 2.71828},
- {RowIndexColumnName, 1},
- }).Get(),
- });
- EXPECT_TRUE(written);
- Writer_->Close().Get().ThrowOnError();
- }
-
- auto result = ParseJsonToNode(OutputStream_.Str());
- ASSERT_EQ(result->GetType(), ENodeType::Map);
-
- auto rows = result->AsMap()->FindChild("rows");
- ASSERT_TRUE(rows);
- auto incompleteColumns = result->AsMap()->FindChild("incomplete_columns");
- ASSERT_TRUE(incompleteColumns);
- auto incompleteAllColumnNames = result->AsMap()->FindChild("incomplete_all_column_names");
- ASSERT_TRUE(incompleteAllColumnNames);
- auto allColumnNames = result->AsMap()->FindChild("all_column_names");
- ASSERT_TRUE(allColumnNames);
- auto yqlTypeRegistry = result->AsMap()->FindChild("yql_type_registry");
- ASSERT_TRUE(yqlTypeRegistry);
-
- ASSERT_EQ(incompleteColumns->GetType(), ENodeType::String);
- EXPECT_EQ(incompleteColumns->GetValue<TString>(), "false");
-
- ASSERT_EQ(incompleteAllColumnNames->GetType(), ENodeType::String);
- EXPECT_EQ(incompleteAllColumnNames->GetValue<TString>(), "true");
-
- ASSERT_EQ(allColumnNames->GetType(), ENodeType::List);
- std::vector<TString> allColumnNamesVector;
- ASSERT_NO_THROW(allColumnNamesVector = ConvertTo<decltype(allColumnNamesVector)>(allColumnNames));
- EXPECT_EQ(allColumnNamesVector, (std::vector<TString>{"column_a", "column_b"}));
-
- ASSERT_EQ(yqlTypeRegistry->GetType(), ENodeType::List);
- auto yqlTypes = ConvertTo<std::vector<INodePtr>>(yqlTypeRegistry);
-
- ASSERT_EQ(rows->GetType(), ENodeType::List);
- ASSERT_EQ(rows->AsList()->GetChildCount(), 3);
-
- auto row1 = rows->AsList()->GetChildOrThrow(0);
- auto row2 = rows->AsList()->GetChildOrThrow(1);
- auto row3 = rows->AsList()->GetChildOrThrow(2);
-
- ASSERT_EQ(row1->GetType(), ENodeType::Map);
- EXPECT_EQ(row1->AsMap()->GetChildCount(), 3);
- CHECK_YQL_TYPE_AND_VALUE(row1, "column_a", R"(["DataType"; "Uint64"])", "100500", yqlTypes);
- CHECK_YQL_TYPE_AND_VALUE(row1, "column_b", R"(["DataType"; "Boolean"])", true, yqlTypes);
- CHECK_YQL_TYPE_AND_VALUE(row1, "column_c", R"(["DataType"; "String"])", "row1_c", yqlTypes);
-
- ASSERT_EQ(row2->GetType(), ENodeType::Map);
- EXPECT_EQ(row2->AsMap()->GetChildCount(), 2);
- CHECK_YQL_TYPE_AND_VALUE(row2, "column_b", R"(["DataType"; "String"])", "row2_b", yqlTypes);
- CHECK_YQL_TYPE_AND_VALUE(row2, "column_c", R"(["DataType"; "String"])", "row2_c", yqlTypes);
-
- ASSERT_EQ(row3->GetType(), ENodeType::Map);
- EXPECT_EQ(row3->AsMap()->GetChildCount(), 3);
- CHECK_YQL_TYPE_AND_VALUE(row3, "column_a", R"(["DataType"; "Int64"])", "-100500", yqlTypes);
- auto row3BValue = ConvertToNode(TYsonString(TStringBuf(R"({
- val = {
- x = {
- "$type" = "int64";
- "$value" = "2";
- };
- y = {
- "$type" = "int64";
- "$value" = "3";
- }
- }
- })")));
- CHECK_YQL_TYPE_AND_VALUE(row3, "column_b", R"(["DataType"; "Yson"])", row3BValue, yqlTypes);
- CHECK_YQL_TYPE_AND_VALUE(row3, "column_c", R"(["DataType"; "Double"])", 2.71828, yqlTypes);
-}
-
-TEST_F(TWriterForWebJson, ColumnNameEncoding)
-{
- Config_->MaxAllColumnNamesCount = 2;
- Config_->ValueFormat = EWebJsonValueFormat::Yql;
-
- CreateStandardWriter();
-
- {
- bool written = Writer_->Write({
- MakeRow(NameTable_, {
- {"column_a", 100500u},
- {"column_non_ascii_\xd0\x81", -100500},
- }).Get()
- });
- EXPECT_TRUE(written);
- Writer_->Close().Get().ThrowOnError();
- }
-
- auto result = ParseJsonToNode(OutputStream_.Str());
- ASSERT_EQ(result->GetType(), ENodeType::Map);
-
- auto rows = result->AsMap()->FindChild("rows");
- ASSERT_TRUE(rows);
- auto incompleteColumns = result->AsMap()->FindChild("incomplete_columns");
- ASSERT_TRUE(incompleteColumns);
- auto incompleteAllColumnNames = result->AsMap()->FindChild("incomplete_all_column_names");
- ASSERT_TRUE(incompleteAllColumnNames);
- auto allColumnNames = result->AsMap()->FindChild("all_column_names");
- ASSERT_TRUE(allColumnNames);
- auto yqlTypeRegistry = result->AsMap()->FindChild("yql_type_registry");
- ASSERT_TRUE(yqlTypeRegistry);
-
- ASSERT_EQ(allColumnNames->GetType(), ENodeType::List);
- std::vector<TString> allColumnNamesVector;
- ASSERT_NO_THROW(allColumnNamesVector = ConvertTo<decltype(allColumnNamesVector)>(allColumnNames));
- EXPECT_EQ(allColumnNamesVector, (std::vector<TString>{"column_a", "column_non_ascii_\xc3\x90\xc2\x81"}));
-
- ASSERT_EQ(yqlTypeRegistry->GetType(), ENodeType::List);
- auto yqlTypes = ConvertTo<std::vector<INodePtr>>(yqlTypeRegistry);
-
- ASSERT_EQ(rows->GetType(), ENodeType::List);
- ASSERT_EQ(rows->AsList()->GetChildCount(), 1);
-
- auto row1 = rows->AsList()->GetChildOrThrow(0);
-
- ASSERT_EQ(row1->GetType(), ENodeType::Map);
- EXPECT_EQ(row1->AsMap()->GetChildCount(), 2);
- CHECK_YQL_TYPE_AND_VALUE(row1, "column_a", R"(["DataType"; "Uint64"])", "100500", yqlTypes);
- CHECK_YQL_TYPE_AND_VALUE(row1, "column_non_ascii_\xc3\x90\xc2\x81", R"(["DataType"; "Int64"])", "-100500", yqlTypes);
-}
-
-TEST_F(TWriterForWebJson, YqlValueFormat_ComplexTypes)
-{
- Config_->ValueFormat = EWebJsonValueFormat::Yql;
-
- auto firstSchema = New<TTableSchema>(std::vector<TColumnSchema>{
- {"column_a", OptionalLogicalType(
- ListLogicalType(MakeLogicalType(ESimpleLogicalValueType::Int64, true)))},
- {"column_b", StructLogicalType({
- {"key", MakeLogicalType(ESimpleLogicalValueType::String, true)},
- {"value", MakeLogicalType(ESimpleLogicalValueType::String, true)},
- {"variant_tuple", VariantTupleLogicalType({
- MakeLogicalType(ESimpleLogicalValueType::Int8, true),
- MakeLogicalType(ESimpleLogicalValueType::Boolean, false),
- })},
- {"variant_struct", VariantStructLogicalType({
- {"a", MakeLogicalType(ESimpleLogicalValueType::Int8, true)},
- {"b", MakeLogicalType(ESimpleLogicalValueType::Boolean, false)},
- })},
- {"dict", DictLogicalType(
- SimpleLogicalType(ESimpleLogicalValueType::Int64),
- SimpleLogicalType(ESimpleLogicalValueType::String)
- )},
- {"tagged", TaggedLogicalType(
- "MyTag",
- SimpleLogicalType(ESimpleLogicalValueType::Int64)
- )},
- {"timestamp", SimpleLogicalType(ESimpleLogicalValueType::Timestamp)},
- {"date", SimpleLogicalType(ESimpleLogicalValueType::Date)},
- {"datetime", SimpleLogicalType(ESimpleLogicalValueType::Datetime)},
- {"interval", SimpleLogicalType(ESimpleLogicalValueType::Interval)},
- {"json", SimpleLogicalType(ESimpleLogicalValueType::Json)},
- {"float", SimpleLogicalType(ESimpleLogicalValueType::Float)},
- })},
- {"column_c", ListLogicalType(StructLogicalType({
- {"very_optional_key", OptionalLogicalType(MakeLogicalType(ESimpleLogicalValueType::String, false))},
- {"optional_value", MakeLogicalType(ESimpleLogicalValueType::String, false)},
- }))},
- });
-
- auto secondSchema = New<TTableSchema>(std::vector<TColumnSchema>{
- {"column_a", VariantTupleLogicalType({
- SimpleLogicalType(ESimpleLogicalValueType::Null),
- SimpleLogicalType(ESimpleLogicalValueType::Any),
- })},
- {"column_b", SimpleLogicalType(ESimpleLogicalValueType::Null)},
- {"column_c", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Null))},
- {"column_d", OptionalLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Int64))},
- });
-
- auto firstColumnAType = ConvertToNode(TYsonString(TStringBuf(R"([
- "OptionalType";
- [
- "ListType";
- ["DataType"; "Int64"]
- ]
- ])")));
- auto firstColumnBType = ConvertToNode(TYsonString(TStringBuf(R"([
- "StructType";
- [
- [
- "key";
- ["DataType"; "String"]
- ];
- [
- "value";
- ["DataType"; "String"]
- ];
- [
- "variant_tuple";
- [
- "VariantType";
- [
- "TupleType";
- [
- ["DataType"; "Int8"];
- [
- "OptionalType";
- ["DataType"; "Boolean"]
- ]
- ]
- ]
- ]
- ];
- [
- "variant_struct";
- [
- "VariantType";
- [
- "StructType";
- [
- [
- "a";
- ["DataType"; "Int8"]
- ];
- [
- "b";
- [
- "OptionalType";
- ["DataType"; "Boolean"]
- ]
- ]
- ]
- ]
- ]
- ];
- [
- "dict";
- [
- "DictType";
- ["DataType"; "Int64"];
- ["DataType"; "String"]
- ]
- ];
- [
- "tagged";
- [
- "TaggedType";
- "MyTag";
- ["DataType"; "Int64"]
- ]
- ];
- [
- "timestamp";
- ["DataType"; "Timestamp"]
- ];
- [
- "date";
- ["DataType"; "Date"]
- ];
- [
- "datetime";
- ["DataType"; "Datetime"]
- ];
- [
- "interval";
- ["DataType"; "Interval"]
- ];
- [
- "json";
- ["DataType"; "Json"]
- ];
- [
- "float";
- ["DataType"; "Float"]
- ];
- ]
- ])")));
- auto firstColumnCType = ConvertToNode(TYsonString(TStringBuf(R"([
- "ListType";
- [
- "StructType";
- [
- [
- "very_optional_key";
- [
- "OptionalType";
- [
- "OptionalType";
- ["DataType"; "String"]
- ]
- ]
- ];
- [
- "optional_value";
- [
- "OptionalType";
- ["DataType"; "String"]
- ]
- ]
- ]
- ]
- ])")));
- auto secondColumnAType = ConvertToNode(TYsonString(TStringBuf(R"([
- "VariantType";
- [
- "TupleType";
- [
- ["NullType"];
- ["DataType"; "Yson"];
- ]
- ]
- ])")));
- auto secondColumnBType = ConvertToNode(TYsonString(TStringBuf(R"(["NullType"])")));
- auto secondColumnCType = ConvertToNode(TYsonString(TStringBuf(R"([
- "OptionalType";
- [
- "NullType";
- ]
- ])")));
- auto secondColumnDType = ConvertToNode(TYsonString(TStringBuf(R"([
- "OptionalType";
- ["DataType"; "Int64"]
- ])")));
-
- CreateStandardWriter(std::vector{firstSchema, secondSchema});
- {
- bool written = Writer_->Write({
- MakeRow(NameTable_, {
- {"column_a", EValueType::Composite, R"([-1; -2; -5])"},
- {
- "column_b",
- EValueType::Composite,
- R"([
- "key";
- "value";
- [0; 7];
- [1; #];
- [[1; "a"]; [2; "b"]];
- 99;
- 100u;
- 101u;
- 102u;
- 103;
- "[\"a\", {\"b\": 42}]";
- -3.25;
- ])",
- },
- {"column_c", EValueType::Composite, R"([[[#]; "value"]; [["key"]; #]])"},
- {"column_d", -49},
- {TableIndexColumnName, 0},
- {RowIndexColumnName, 0},
- }).Get(),
- MakeRow(NameTable_, {
- {"column_a", EValueType::Composite, R"([0; -2; -5; 177])"},
- {
- "column_b",
- EValueType::Composite,
- R"([
- "key1";
- "value1";
- [1; %false];
- [1; #];
- [];
- 199;
- 0u;
- 1101u;
- 1102u;
- 1103;
- "null";
- 0.0;
- ])",
- },
- {"column_c", EValueType::Composite, R"([[#; #]; [["key1"]; #]])"},
- {"column_d", 49u},
- {RowIndexColumnName, 1},
- }).Get(),
- MakeRow(NameTable_, {
- {"column_a", EValueType::Composite, "[]"},
- {
- "column_b",
- EValueType::Composite,
- R"([
- "key2";
- "value2";
- [0; 127];
- [1; %true];
- [[0; ""]];
- 399;
- 30u;
- 3101u;
- 3202u;
- 3103;
- "{\"x\": false}";
- 1e10;
- ])"
- },
- {"column_c", EValueType::Composite, "[[[key]; #]]"},
- {"column_d", "49"},
- {RowIndexColumnName, 2},
- }).Get(),
-
- MakeRow(NameTable_, {
- {"column_a", nullptr},
- {
- "column_b",
- EValueType::Composite,
- // First string is valid UTF-8, the second one should be Base64 encoded.
- "["
- "\"\xC3\xBF\";"
- "\"\xFA\xFB\xFC\xFD\";"
- R"(
- [0; 127];
- [1; %true];
- [[-1; "-1"]; [0; ""]];
- 499;
- 40u;
- 4101u;
- 4202u;
- 4103;
- "{}";
- -2.125;
- ])",
- },
- {"column_c", EValueType::Composite, "[]"},
- {"column_d", EValueType::Any, "{x=49}"},
- {RowIndexColumnName, 3},
- }).Get(),
-
- // Here come rows from the second table.
- MakeRow(NameTable_, {
- {"column_a", EValueType::Composite, "[0; #]"},
- {"column_b", nullptr},
- {"column_c", nullptr},
- {"column_d", -49},
- {TableIndexColumnName, 1},
- {RowIndexColumnName, 0},
- }).Get(),
-
- MakeRow(NameTable_, {
- {"column_a", EValueType::Composite, "[1; {z=z}]"},
- {"column_b", nullptr},
- {"column_c", EValueType::Composite, "[#]"},
- {"column_d", nullptr},
- {TableIndexColumnName, 1},
- {RowIndexColumnName, 1},
- }).Get(),
- });
- EXPECT_TRUE(written);
- Writer_->Close().Get().ThrowOnError();
- }
-
- auto result = ParseJsonToNode(OutputStream_.Str());
- ASSERT_EQ(result->GetType(), ENodeType::Map);
-
- auto rows = result->AsMap()->FindChild("rows");
- ASSERT_TRUE(rows);
- auto incompleteColumns = result->AsMap()->FindChild("incomplete_columns");
- ASSERT_TRUE(incompleteColumns);
- auto incompleteAllColumnNames = result->AsMap()->FindChild("incomplete_all_column_names");
- ASSERT_TRUE(incompleteAllColumnNames);
- auto allColumnNames = result->AsMap()->FindChild("all_column_names");
- ASSERT_TRUE(allColumnNames);
- auto yqlTypeRegistry = result->AsMap()->FindChild("yql_type_registry");
- ASSERT_TRUE(yqlTypeRegistry);
-
- ASSERT_EQ(incompleteColumns->GetType(), ENodeType::String);
- EXPECT_EQ(incompleteColumns->GetValue<TString>(), "false");
-
- ASSERT_EQ(incompleteAllColumnNames->GetType(), ENodeType::String);
- EXPECT_EQ(incompleteAllColumnNames->GetValue<TString>(), "false");
-
- ASSERT_EQ(allColumnNames->GetType(), ENodeType::List);
- std::vector<TString> allColumnNamesVector;
- ASSERT_NO_THROW(allColumnNamesVector = ConvertTo<decltype(allColumnNamesVector)>(allColumnNames));
- EXPECT_EQ(allColumnNamesVector, (std::vector<TString>{"column_a", "column_b", "column_c", "column_d"}));
-
- ASSERT_EQ(yqlTypeRegistry->GetType(), ENodeType::List);
- auto yqlTypes = ConvertTo<std::vector<INodePtr>>(yqlTypeRegistry);
-
- ASSERT_EQ(rows->GetType(), ENodeType::List);
- ASSERT_EQ(rows->AsList()->GetChildCount(), 6);
-
- auto row1 = rows->AsList()->GetChildOrThrow(0);
- auto row2 = rows->AsList()->GetChildOrThrow(1);
- auto row3 = rows->AsList()->GetChildOrThrow(2);
- auto row4 = rows->AsList()->GetChildOrThrow(3);
- auto row5 = rows->AsList()->GetChildOrThrow(4);
- auto row6 = rows->AsList()->GetChildOrThrow(5);
-
- ASSERT_EQ(row1->GetType(), ENodeType::Map);
- EXPECT_EQ(row1->AsMap()->GetChildCount(), 4);
- auto row1AValue = ConvertToNode(TYsonString(TStringBuf(R"([{"val"=["-1"; "-2"; "-5"]}])")));
- CHECK_YQL_TYPE_AND_VALUE(row1, "column_a", firstColumnAType, row1AValue, yqlTypes);
- auto row1BValue = ConvertToNode(TYsonString(TStringBuf(
- R"([
- "key";
- "value";
- ["0"; "7"];
- ["1"; #];
- {"val"=[["1"; "a"]; ["2"; "b"]]};
- "99";
- "100";
- "101";
- "102";
- "103";
- "[\"a\", {\"b\": 42}]";
- "-3.25";
- ])")));
- CHECK_YQL_TYPE_AND_VALUE(row1, "column_b", firstColumnBType, row1BValue, yqlTypes);
- auto row1CValue = ConvertToNode(TYsonString(TStringBuf(R"({
- "val"=[
- [[#]; ["value"]];
- [[["key"]]; #]
- ]
- })")));
- CHECK_YQL_TYPE_AND_VALUE(row1, "column_c", firstColumnCType, row1CValue, yqlTypes);
- CHECK_YQL_TYPE_AND_VALUE(row1, "column_d", R"(["DataType"; "Int64"])", "-49", yqlTypes);
-
- ASSERT_EQ(row2->GetType(), ENodeType::Map);
- EXPECT_EQ(row2->AsMap()->GetChildCount(), 4);
- auto row2AValue = ConvertToNode(TYsonString(TStringBuf(R"([{"val"=["0"; "-2"; "-5"; "177"]}])")));
- CHECK_YQL_TYPE_AND_VALUE(row2, "column_a", firstColumnAType, row2AValue, yqlTypes);
- auto row2BValue = ConvertToNode(TYsonString(TStringBuf(
- R"([
- "key1";
- "value1";
- ["1"; [%false]];
- ["1"; #];
- {"val"=[]};
- "199";
- "0";
- "1101";
- "1102";
- "1103";
- "null";
- "0";
- ])")));
- CHECK_YQL_TYPE_AND_VALUE(row2, "column_b", firstColumnBType, row2BValue, yqlTypes);
- auto row2CValue = ConvertToNode(TYsonString(TStringBuf(R"({
- "val"=[
- [#; #];
- [[["key1"]]; #]
- ]
- })")));
- CHECK_YQL_TYPE_AND_VALUE(row2, "column_c", firstColumnCType, row2CValue, yqlTypes);
- CHECK_YQL_TYPE_AND_VALUE(row2, "column_d", R"(["DataType"; "Uint64"])", "49", yqlTypes);
-
- ASSERT_EQ(row3->GetType(), ENodeType::Map);
- EXPECT_EQ(row3->AsMap()->GetChildCount(), 4);
- auto row3AValue = ConvertToNode(TYsonString(TStringBuf(R"([{"val"=[]}])")));
- CHECK_YQL_TYPE_AND_VALUE(row3, "column_a", firstColumnAType, row3AValue, yqlTypes);
- auto row3BValue = ConvertToNode(TYsonString(TStringBuf(
- R"([
- "key2";
- "value2";
- ["0"; "127"];
- ["1"; [%true]];
- {"val"=[["0"; ""]]};
- "399";
- "30";
- "3101";
- "3202";
- "3103";
- "{\"x\": false}";
- "10000000000";
- ])")));
- CHECK_YQL_TYPE_AND_VALUE(row3, "column_b", firstColumnBType, row3BValue, yqlTypes);
- auto row3CValue = ConvertToNode(TYsonString(TStringBuf(R"({
- "val"=[
- [[["key"]]; #]
- ]
- })")));
- CHECK_YQL_TYPE_AND_VALUE(row3, "column_c", firstColumnCType, row3CValue, yqlTypes);
- CHECK_YQL_TYPE_AND_VALUE(row3, "column_d", R"(["DataType"; "String"])", "49", yqlTypes);
-
- ASSERT_EQ(row4->GetType(), ENodeType::Map);
- EXPECT_EQ(row4->AsMap()->GetChildCount(), 4);
- auto row4AValue = ConvertToNode(TYsonString(TStringBuf(R"(#)")));
- CHECK_YQL_TYPE_AND_VALUE(row4, "column_a", firstColumnAType, row4AValue, yqlTypes);
-
- auto row4BValue = ConvertToNode(TYsonString(TStringBuf(
- "["
- "\"\xC3\xBF\";"
- R"(
- {"b64" = %true; "val" = "+vv8/Q=="};
- ["0"; "127"];
- ["1"; [%true]];
- {"val"=[["-1"; "-1"]; ["0"; ""]]};
- "499";
- "40";
- "4101";
- "4202";
- "4103";
- "{}";
- "-2.125";
- ])")));
- CHECK_YQL_TYPE_AND_VALUE(row4, "column_b", firstColumnBType, row4BValue, yqlTypes);
-
- auto row4CValue = ConvertToNode(TYsonString(TStringBuf(R"({"val"=[]})")));
- CHECK_YQL_TYPE_AND_VALUE(row4, "column_c", firstColumnCType, row4CValue, yqlTypes);
- auto row4DValue = ConvertToNode(TYsonString(TStringBuf(R"({
- val = {
- x = {
- "$type" = "int64";
- "$value" = "49";
- }
- }
- })")));
- CHECK_YQL_TYPE_AND_VALUE(row4, "column_d", R"(["DataType"; "Yson"])", row4DValue, yqlTypes);
-
- // Here must come rows from the second table.
-
- ASSERT_EQ(row5->GetType(), ENodeType::Map);
- EXPECT_EQ(row5->AsMap()->GetChildCount(), 4);
- auto row5AValue = ConvertToNode(TYsonString(TStringBuf(R"(["0"; #])")));
- CHECK_YQL_TYPE_AND_VALUE(row5, "column_a", secondColumnAType, row5AValue, yqlTypes);
- auto row5BValue = ConvertToNode(TYsonString(TStringBuf(R"(#)")));
- CHECK_YQL_TYPE_AND_VALUE(row5, "column_b", secondColumnBType, row5BValue, yqlTypes);
- auto row5CValue = ConvertToNode(TYsonString(TStringBuf(R"(#)")));
- CHECK_YQL_TYPE_AND_VALUE(row5, "column_c", secondColumnCType, row5CValue, yqlTypes);
- auto row5DValue = ConvertToNode(TYsonString(TStringBuf(R"(["-49"])")));
- CHECK_YQL_TYPE_AND_VALUE(row5, "column_d", secondColumnDType, row5DValue, yqlTypes);
-
- ASSERT_EQ(row6->GetType(), ENodeType::Map);
- EXPECT_EQ(row6->AsMap()->GetChildCount(), 4);
- auto row6AValue = ConvertToNode(TYsonString(TStringBuf(R"([
- "1";
- {
- val = {
- z = {
- "$type" = "string";
- "$value" = "z";
- }
- }
- };
- ])")));
- CHECK_YQL_TYPE_AND_VALUE(row6, "column_a", secondColumnAType, row6AValue, yqlTypes);
- auto row6BValue = ConvertToNode(TYsonString(TStringBuf(R"(#)")));
- CHECK_YQL_TYPE_AND_VALUE(row6, "column_b", secondColumnBType, row6BValue, yqlTypes);
- auto row6CValue = ConvertToNode(TYsonString(TStringBuf(R"([#])")));
- CHECK_YQL_TYPE_AND_VALUE(row6, "column_c", secondColumnCType, row6CValue, yqlTypes);
- auto row6DValue = ConvertToNode(TYsonString(TStringBuf(R"(#)")));
- CHECK_YQL_TYPE_AND_VALUE(row6, "column_d", secondColumnDType, row6DValue, yqlTypes);
-}
-
-TEST_F(TWriterForWebJson, YqlValueFormat_Incomplete)
-{
- Config_->ValueFormat = EWebJsonValueFormat::Yql;
- Config_->FieldWeightLimit = 215;
- Config_->StringWeightLimit = 10;
-
- auto schema = New<TTableSchema>(std::vector<TColumnSchema>{
- {"column_a", StructLogicalType({
- {"field1", SimpleLogicalType(ESimpleLogicalValueType::Int64)},
- {"list", ListLogicalType(
- VariantStructLogicalType({
- {"a", DictLogicalType(
- SimpleLogicalType(ESimpleLogicalValueType::Int64),
- SimpleLogicalType(ESimpleLogicalValueType::String)
- )},
- {"b", SimpleLogicalType(ESimpleLogicalValueType::Any)},
- })
- )},
- {"field2", SimpleLogicalType(ESimpleLogicalValueType::String)},
- {"field3", MakeLogicalType(ESimpleLogicalValueType::Int64, false)},
- })},
- {"column_b", SimpleLogicalType(ESimpleLogicalValueType::Any)},
- {"column_c", MakeLogicalType(ESimpleLogicalValueType::String, false)},
- });
-
- auto yqlTypeA = ConvertToNode(TYsonString(TStringBuf(R"([
- "StructType";
- [
- [
- "field1";
- ["DataType"; "Int64"]
- ];
- [
- "list";
- [
- "ListType";
- [
- "VariantType";
- [
- "StructType";
- [
- [
- "a";
- [
- "DictType";
- ["DataType"; "Int64"];
- ["DataType"; "String"]
- ]
- ];
- [
- "b";
- ["DataType"; "Yson"]
- ];
- ]
- ]
- ]
- ]
- ];
- [
- "field2";
- ["DataType"; "String"]
- ];
- [
- "field3";
- [
- "OptionalType";
- ["DataType"; "Int64"]
- ]
- ];
- ]
- ])")));
-
- auto yqlTypeB = ConvertToNode(TYsonString(TStringBuf(R"(["DataType"; "Yson"])")));
- auto yqlTypeC = ConvertToNode(TYsonString(TStringBuf(R"(["OptionalType"; ["DataType"; "String"]])")));
- {
- CreateStandardWriter({schema});
- bool written = Writer_->Write({
- MakeRow(NameTable_, {
- {
- "column_a",
- EValueType::Composite,
- R"([
- -1;
- [
- [
- 0;
- [
- [-2; "UTF:)" + TString("\xF0\x90\x8D\x88") + "\xF0\x90\x8D\x88" + R"("];
- [2; "!UTF:)" + TString("\xFA\xFB\xFC\xFD\xFA\xFB\xFC\xFD") + R"("];
- [0; ""];
- ]
- ];
- [
- 1;
- "{kinda_long_key = kinda_even_longer_value}"
- ];
- [
- 0;
- [
- [0; "One more quite long string"];
- [1; "One more quite long string"];
- [2; "One more quite long string"];
- [3; "One more quite long string"];
- [4; "One more quite long string"];
- [5; "One more quite long string"];
- ]
- ];
- [
- 1;
- "{kinda_long_key = kinda_even_longer_value}"
- ];
- ];
- "I'm short";
- 424242238133245
- ])"
- },
- {"column_b", EValueType::Any, "{kinda_long_key = kinda_even_longer_value}"},
- {"column_c", "One more quite long string"},
- }).Get(),
- });
- EXPECT_TRUE(written);
- Writer_->Close().Get().ThrowOnError();
- }
-
- auto result = ParseJsonToNode(OutputStream_.Str());
- ASSERT_EQ(result->GetType(), ENodeType::Map);
-
- auto rows = result->AsMap()->FindChild("rows");
- ASSERT_TRUE(rows);
- auto yqlTypeRegistry = result->AsMap()->FindChild("yql_type_registry");
- ASSERT_TRUE(yqlTypeRegistry);
-
- ASSERT_EQ(yqlTypeRegistry->GetType(), ENodeType::List);
- auto yqlTypes = ConvertTo<std::vector<INodePtr>>(yqlTypeRegistry);
-
- ASSERT_EQ(rows->GetType(), ENodeType::List);
- ASSERT_EQ(rows->AsList()->GetChildCount(), 1);
-
- auto row = rows->AsList()->GetChildOrThrow(0);
- ASSERT_EQ(row->GetType(), ENodeType::Map);
- EXPECT_EQ(row->AsMap()->GetChildCount(), 3);
-
- auto rowAValue = ConvertToNode(TYsonString(R"([
- "-1";
- {
- "inc" = %true;
- "val" = [
- [
- "0";
- {
- "val" = [
- ["-2"; {"inc"=%true; "val"="UTF:)" + TString("\xF0\x90\x8D\x88") + R"("}];
- ["2"; {"inc"=%true; "b64"=%true; "val"="IVVURjr6"}];
- ["0"; ""];
- ]
- }
- ];
- [
- "1";
- {"val"=""; "inc"=%true}
- ];
- [
- "0";
- {
- "inc" = %true;
- "val" = [
- ["0"; {"val"="One more q"; "inc"=%true}];
- ["1"; {"val"="One more "; "inc"=%true}];
- ];
- }
- ];
- ];
- };
- {
- "val" = "";
- "inc" = %true;
- };
- ["424242238133245"];
- ])"));
- CHECK_YQL_TYPE_AND_VALUE(row, "column_a", yqlTypeA, rowAValue, yqlTypes);
-
- // Simple values are not truncated to |StringWeightLimit|
- auto rowBValue = ConvertToNode(TYsonString(TStringBuf(R"({
- val = {
- kinda_long_key = {
- "$type" = "string";
- "$value" = kinda_even_longer_value;
- }
- }
- })")));
- CHECK_YQL_TYPE_AND_VALUE(row, "column_b", yqlTypeB, rowBValue, yqlTypes);
- auto rowCValue = ConvertToNode(TYsonString(TStringBuf(R"(["One more quite long string"])")));
- CHECK_YQL_TYPE_AND_VALUE(row, "column_c", yqlTypeC, rowCValue, yqlTypes);
-}
-
-
-TEST_F(TWriterForWebJson, YqlValueFormat_Any)
-{
- Config_->ValueFormat = EWebJsonValueFormat::Yql;
-
- auto schema = New<TTableSchema>(std::vector<TColumnSchema>{
- {"column_a", MakeLogicalType(ESimpleLogicalValueType::Any, false)},
- });
-
- auto yqlTypeA = ConvertToNode(TYsonString(TStringBuf(R"([
- "OptionalType";
- ["DataType"; "Yson"]
- ])")));
-
- CreateStandardWriter({schema});
- {
- bool written = Writer_->Write({
- MakeRow(NameTable_, {{"column_a", EValueType::Any, "{x=y;z=2}"}}).Get(),
- MakeRow(NameTable_, {{"column_a", true}}).Get(),
- MakeRow(NameTable_, {{"column_a", -42}}).Get(),
- MakeRow(NameTable_, {{"column_a", 42u}}).Get(),
- });
- EXPECT_TRUE(written);
- Writer_->Close().Get().ThrowOnError();
- }
-
- auto result = ParseJsonToNode(OutputStream_.Str());
- ASSERT_EQ(result->GetType(), ENodeType::Map);
-
- auto rows = result->AsMap()->FindChild("rows");
- ASSERT_TRUE(rows);
- auto yqlTypeRegistry = result->AsMap()->FindChild("yql_type_registry");
- ASSERT_TRUE(yqlTypeRegistry);
-
- ASSERT_EQ(yqlTypeRegistry->GetType(), ENodeType::List);
- auto yqlTypes = ConvertTo<std::vector<INodePtr>>(yqlTypeRegistry);
-
- ASSERT_EQ(rows->GetType(), ENodeType::List);
- ASSERT_EQ(rows->AsList()->GetChildCount(), 4);
-
- {
- auto row = rows->AsList()->GetChildOrThrow(0);
- ASSERT_EQ(row->GetType(), ENodeType::Map);
- auto rowAValue = ConvertToNode(TYsonString(TStringBuf(R"([
- {
- val = {
- x = {
- "$type" = "string";
- "$value" = "y";
- };
- z = {
- "$type" = "int64";
- "$value" = "2";
- }
- }
- }
- ])")));
- CHECK_YQL_TYPE_AND_VALUE(row, "column_a", yqlTypeA, rowAValue, yqlTypes);
- }
- {
- auto row = rows->AsList()->GetChildOrThrow(1);
- ASSERT_EQ(row->GetType(), ENodeType::Map);
- auto rowAValue = ConvertToNode(TYsonString(TStringBuf(R"([
- {
- val = {
- "$type" = "boolean";
- "$value" = "true";
- }
- }
- ])")));
- CHECK_YQL_TYPE_AND_VALUE(row, "column_a", yqlTypeA, rowAValue, yqlTypes);
- }
- {
- auto row = rows->AsList()->GetChildOrThrow(2);
- ASSERT_EQ(row->GetType(), ENodeType::Map);
- auto rowAValue = ConvertToNode(TYsonString(TStringBuf(R"([
- {
- val = {
- "$type" = "int64";
- "$value" = "-42";
- }
- }
- ])")));
- CHECK_YQL_TYPE_AND_VALUE(row, "column_a", yqlTypeA, rowAValue, yqlTypes);
- }
- {
- auto row = rows->AsList()->GetChildOrThrow(3);
- ASSERT_EQ(row->GetType(), ENodeType::Map);
- auto rowAValue = ConvertToNode(TYsonString(TStringBuf(R"([
- {
- val = {
- "$type" = "uint64";
- "$value" = "42";
- }
- }
- ])")));
- CHECK_YQL_TYPE_AND_VALUE(row, "column_a", yqlTypeA, rowAValue, yqlTypes);
- }
-}
-
-TEST_F(TWriterForWebJson, YqlValueFormat_CompositeNoSchema)
-{
- Config_->ValueFormat = EWebJsonValueFormat::Yql;
-
- auto schema = New<TTableSchema>();
-
- auto yqlTypeA = ConvertToNode(TYsonString(TStringBuf(R"(["DataType"; "Yson"])")));
-
- CreateStandardWriter({schema});
- {
- bool written = Writer_->Write({
- MakeRow(NameTable_, {{"column_a", EValueType::Composite, "[1;2]"}}).Get(),
- });
- EXPECT_TRUE(written);
- Writer_->Close().Get().ThrowOnError();
- }
-
- auto result = ParseJsonToNode(OutputStream_.Str());
- ASSERT_EQ(result->GetType(), ENodeType::Map);
-
- auto rows = result->AsMap()->FindChild("rows");
- ASSERT_TRUE(rows);
- auto yqlTypeRegistry = result->AsMap()->FindChild("yql_type_registry");
- ASSERT_TRUE(yqlTypeRegistry);
-
- ASSERT_EQ(yqlTypeRegistry->GetType(), ENodeType::List);
- auto yqlTypes = ConvertTo<std::vector<INodePtr>>(yqlTypeRegistry);
-
- ASSERT_EQ(rows->GetType(), ENodeType::List);
- ASSERT_EQ(rows->AsList()->GetChildCount(), 1);
-
- {
- auto row = rows->AsList()->GetChildOrThrow(0);
- ASSERT_EQ(row->GetType(), ENodeType::Map);
- auto rowAValue = ConvertToNode(TYsonString(TStringBuf(R"({
- "val" = [
- {
- "$type" = "int64";
- "$value" = "1";
- };
- {
- "$type" = "int64";
- "$value" = "2";
- }
- ]
- })")));
- CHECK_YQL_TYPE_AND_VALUE(row, "column_a", yqlTypeA, rowAValue, yqlTypes);
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace
-} // namespace NYT::NFormats
diff --git a/yt/yt/client/unittests/ya.make b/yt/yt/client/unittests/ya.make
index ab9f547e19..747cfe2aa8 100644
--- a/yt/yt/client/unittests/ya.make
+++ b/yt/yt/client/unittests/ya.make
@@ -7,8 +7,6 @@ ALLOCATOR(YT)
PROTO_NAMESPACE(yt)
SRCS(
- protobuf_format_ut.proto
-
check_schema_compatibility_ut.cpp
check_type_compatibility_ut.cpp
chunk_replica_ut.cpp
@@ -16,8 +14,6 @@ SRCS(
comparator_ut.cpp
composite_compare_ut.cpp
connection_ut.cpp
- dsv_parser_ut.cpp
- dsv_writer_ut.cpp
farm_fingerprint_stability_ut.cpp
key_bound_ut.cpp
key_bound_compressor_ut.cpp
@@ -28,30 +24,17 @@ SRCS(
uuid_text_ut.cpp
time_text_ut.cpp
node_directory_ut.cpp
- protobuf_format_ut.cpp
query_builder_ut.cpp
read_limit_ut.cpp
replication_progress_ut.cpp
- row_helpers.cpp
row_ut.cpp
- schemaful_dsv_parser_ut.cpp
- schemaful_dsv_writer_ut.cpp
schema_ut.cpp
- skiff_format_ut.cpp
- skiff_yson_converter_ut.cpp
table_consumer_ut.cpp
unordered_reader_ut.cpp
unversioned_row_ut.cpp
validate_logical_type_ut.cpp
- value_examples.cpp
- web_json_writer_ut.cpp
wire_protocol_ut.cpp
- yamred_dsv_parser_ut.cpp
- yamred_dsv_writer_ut.cpp
- yamr_parser_ut.cpp
- yamr_writer_ut.cpp
ypath_ut.cpp
- yson_helpers.cpp
zookeeper_bus_ut.cpp
zookeeper_protocol_ut.cpp
)
diff --git a/yt/yt/client/unittests/yamr_parser_ut.cpp b/yt/yt/client/unittests/yamr_parser_ut.cpp
deleted file mode 100644
index 74b8f530a1..0000000000
--- a/yt/yt/client/unittests/yamr_parser_ut.cpp
+++ /dev/null
@@ -1,606 +0,0 @@
-#include <yt/yt/core/test_framework/framework.h>
-
-#include <yt/yt/core/test_framework/yson_consumer_mock.h>
-
-#include <yt/yt/client/formats/yamr_parser.h>
-
-#include <yt/yt/core/yson/null_consumer.h>
-
-namespace NYT::NFormats {
-namespace {
-
-using namespace NYson;
-
-using ::testing::InSequence;
-using ::testing::StrictMock;
-using ::testing::NiceMock;
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(TYamrParserTest, Simple)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("key1"));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar("value1"));
- EXPECT_CALL(Mock, OnEndMap());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginAttributes());
- EXPECT_CALL(Mock, OnKeyedItem("table_index"));
- EXPECT_CALL(Mock, OnInt64Scalar(2));
- EXPECT_CALL(Mock, OnEndAttributes());
- EXPECT_CALL(Mock, OnEntity());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("key2"));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar("value2"));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input =
- "key1\tvalue1\n"
- "2\n"
- "key2\tvalue2\n";
-
- ParseYamr(input, &Mock);
-}
-
-TEST(TYamrParserTest, ValueWithTabs)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar(TStringBuf("key1\0", 5)));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar("value with \t and some other"));
- EXPECT_CALL(Mock, OnEndMap());
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("key2"));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar(TStringBuf("another\0 value with \t", 21)));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input(
- "key1\0\tvalue with \t and some other\n"
- "key2\tanother\0 value with \t\n",
- 34 +
- 27);
-
- ParseYamr(input, &Mock);
-}
-
-TEST(TYamrParserTest, SimpleWithSubkey)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("key1"));
- EXPECT_CALL(Mock, OnKeyedItem("subkey"));
- EXPECT_CALL(Mock, OnStringScalar("subkey1"));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar("value1"));
- EXPECT_CALL(Mock, OnEndMap());
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("key2"));
- EXPECT_CALL(Mock, OnKeyedItem("subkey"));
- EXPECT_CALL(Mock, OnStringScalar("subkey2"));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar("value2"));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input =
- "key1\tsubkey1\tvalue1\n"
- "key2\tsubkey2\tvalue2\n";
-
- auto config = New<TYamrFormatConfig>();
- config->HasSubkey = true;
-
- ParseYamr(input, &Mock, config);
-}
-
-TEST(TYamrParserTest, IncompleteRows)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("key1"));
- EXPECT_CALL(Mock, OnKeyedItem("subkey"));
- EXPECT_CALL(Mock, OnStringScalar("subkey1"));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar("value1"));
- EXPECT_CALL(Mock, OnEndMap());
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("key"));
- EXPECT_CALL(Mock, OnKeyedItem("subkey"));
- EXPECT_CALL(Mock, OnStringScalar("subkey"));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar(""));
- EXPECT_CALL(Mock, OnEndMap());
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("key2"));
- EXPECT_CALL(Mock, OnKeyedItem("subkey"));
- EXPECT_CALL(Mock, OnStringScalar("subkey2"));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar("value2"));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input =
- "key1\tsubkey1\tvalue1\n"
- "key\tsubkey\n"
- "key2\tsubkey2\tvalue2\n";
-
- auto config = New<TYamrFormatConfig>();
- config->HasSubkey = true;
-
- ParseYamr(input, &Mock, config);
-}
-
-TEST(TYamrParserTest, IncorrectIncompleteRows)
-{
- auto config = New<TYamrFormatConfig>();
- config->HasSubkey = false;
-
- EXPECT_THROW(ParseYamr("\n", GetNullYsonConsumer(), config), std::exception);
- EXPECT_THROW(ParseYamr("key\n", GetNullYsonConsumer(), config), std::exception);
- EXPECT_THROW(ParseYamr("key\tvalue\nkey\n", GetNullYsonConsumer(), config), std::exception);
-}
-
-TEST(TYamrParserTest, TabsInValue)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("key"));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar("a\tb\\tc\t"));
- EXPECT_CALL(Mock, OnEndMap());
-
- auto config = New<TYamrFormatConfig>();
- TString input = "key\ta\tb\\tc\t";
- ParseYamr(input, &Mock, config);
-}
-
-TEST(TYamrParserTest, Escaping)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("\tkey\t"));
- EXPECT_CALL(Mock, OnKeyedItem("subkey"));
- EXPECT_CALL(Mock, OnStringScalar("\n"));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar("a\tb\t\n"));
- EXPECT_CALL(Mock, OnEndMap());
-
- auto config = New<TYamrFormatConfig>();
- config->HasSubkey = true;
- config->EnableEscaping = true;
-
- TString input = "\\tkey\\t\t\\n\ta\tb\t\\n\n";
- ParseYamr(input, &Mock, config);
-}
-
-TEST(TYamrParserTest, CustomSeparators)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("key"));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar("value"));
- EXPECT_CALL(Mock, OnEndMap());
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("key2"));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar("value2"));
- EXPECT_CALL(Mock, OnEndMap());
-
- auto config = New<TYamrFormatConfig>();
- config->RecordSeparator = 'Y';
- config->FieldSeparator = 'X';
-
- TString input = "keyXvalueYkey2Xvalue2Y";
- ParseYamr(input, &Mock, config);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(TYamrLenvalParserTest, Simple)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("key1"));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar("value1"));
- EXPECT_CALL(Mock, OnEndMap());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginAttributes());
- EXPECT_CALL(Mock, OnKeyedItem("table_index"));
- EXPECT_CALL(Mock, OnInt64Scalar(1));
- EXPECT_CALL(Mock, OnEndAttributes());
- EXPECT_CALL(Mock, OnEntity());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("key2"));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar("value2"));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input = TString(
- "\x04\x00\x00\x00" "key1"
- "\x06\x00\x00\x00" "value1"
-
- "\xff\xff\xff\xff" "\x01\x00\x00\x00"
-
- "\x04\x00\x00\x00" "key2"
- "\x06\x00\x00\x00" "value2"
- , 2 * (2 * 4 + 4 + 6) + 8 // all i32 + lengths of keys
- );
-
- auto config = New<TYamrFormatConfig>();
- config->Lenval = true;
-
- ParseYamr(input, &Mock, config);
-}
-
-TEST(TYamrLenvalParserTest, SimpleWithSubkey)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("key1"));
- EXPECT_CALL(Mock, OnKeyedItem("subkey"));
- EXPECT_CALL(Mock, OnStringScalar("subkey1"));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar("value1"));
- EXPECT_CALL(Mock, OnEndMap());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("key2"));
- EXPECT_CALL(Mock, OnKeyedItem("subkey"));
- EXPECT_CALL(Mock, OnStringScalar("subkey2"));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar("value2"));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input = TString(
- "\x04\x00\x00\x00" "key1"
- "\x07\x00\x00\x00" "subkey1"
- "\x06\x00\x00\x00" "value1"
-
- "\x04\x00\x00\x00" "key2"
- "\x07\x00\x00\x00" "subkey2"
- "\x06\x00\x00\x00" "value2"
- , 2 * (3 * 4 + 4 + 7 + 6) // all i32 + lengths of keys
- );
-
- auto config = New<TYamrFormatConfig>();
- config->HasSubkey = true;
- config->Lenval = true;
-
- ParseYamr(input, &Mock, config);
-}
-
-TEST(TYamrLenvalParserTest, EmptyFields)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar(""));
- EXPECT_CALL(Mock, OnKeyedItem("subkey"));
- EXPECT_CALL(Mock, OnStringScalar(""));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar(""));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input = TString(
- "\x00\x00\x00\x00"
- "\x00\x00\x00\x00"
- "\x00\x00\x00\x00"
- , 3 * 4
- );
-
- auto config = New<TYamrFormatConfig>();
- config->HasSubkey = true;
- config->Lenval = true;
-
- ParseYamr(input, &Mock, config);
-}
-
-TEST(TYamrLenvalParserTest, HugeLength)
-{
- TString input = TString(
- "\xFF\xFF\xFF\xFF"
- "\x00\x00\x00\x00"
- "\x00\x00\x00\x00"
- , 3 * 4
- );
-
- auto config = New<TYamrFormatConfig>();
- config->HasSubkey = true;
- config->Lenval = true;
-
- EXPECT_THROW(ParseYamr(input, GetNullYsonConsumer(), config), std::exception);
-}
-
-TEST(TYamrLenvalParserTest, SimpleEndOfMessage)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("key1"));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar("value1"));
- EXPECT_CALL(Mock, OnEndMap());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginAttributes());
- EXPECT_CALL(Mock, OnKeyedItem("table_index"));
- EXPECT_CALL(Mock, OnInt64Scalar(1));
- EXPECT_CALL(Mock, OnEndAttributes());
- EXPECT_CALL(Mock, OnEntity());
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("key2"));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar("value2"));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input = TString(
- "\x04\x00\x00\x00" "key1"
- "\x06\x00\x00\x00" "value1"
-
- "\xff\xff\xff\xff" "\x01\x00\x00\x00"
-
- "\x04\x00\x00\x00" "key2"
- "\x06\x00\x00\x00" "value2"
-
- "\xfb\xff\xff\xff" "\x02\x00\x00\x00\x00\x00\x00\x00"
- , 2 * (2 * 4 + 4 + 6) + 8 + 12 // all i32 + lengths of keys
- );
-
- auto config = New<TYamrFormatConfig>();
- config->Lenval = true;
- config->EnableEom = true;
-
- ParseYamr(input, &Mock, config);
-}
-
-TEST(TYamrLenvalParserTest, EmptyFieldsWithEOM)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar(""));
- EXPECT_CALL(Mock, OnKeyedItem("subkey"));
- EXPECT_CALL(Mock, OnStringScalar(""));
- EXPECT_CALL(Mock, OnKeyedItem("value"));
- EXPECT_CALL(Mock, OnStringScalar(""));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input = TString(
- "\x00\x00\x00\x00"
- "\x00\x00\x00\x00"
- "\x00\x00\x00\x00"
- "\xfb\xff\xff\xff" "\x01\x00\x00\x00\x00\x00\x00\x00"
- , 3 * 4 + 12
- );
-
- auto config = New<TYamrFormatConfig>();
- config->HasSubkey = true;
- config->Lenval = true;
- config->EnableEom = true;
-
- ParseYamr(input, &Mock, config);
-}
-
-TEST(TYamrParserTest, IncorrectPlaceOfEOM)
-{
- auto config = New<TYamrFormatConfig>();
- config->HasSubkey = false;
- config->Lenval = true;
- config->EnableEom = true;
-
- TString input1 = TString(
- "\x04\x00\x00\x00" "key1"
- "\x06\x00\x00\x00" "value1"
-
- "\xff\xff\xff\xff" "\x01\x00\x00\x00"
-
- "\xfb\xff\xff\xff" "\x02\x00\x00\x00\x00\x00\x00\x00"
-
- "\x04\x00\x00\x00" "key2"
- "\x06\x00\x00\x00" "value2"
- , 2 * (2 * 4 + 4 + 6) + 8 + 12 // all i32 + lengths of keys
- );
-
- TString input2 = TString(
- "\x04\x00\x00\x00" "key1"
- "\x06\x00\x00\x00" "value1"
-
- "\xff\xff\xff\xff" "\x01\x00\x00\x00"
-
- "\x04\x00\x00\x00" "key2"
-
- "\xfb\xff\xff\xff" "\x02\x00\x00\x00\x00\x00\x00\x00"
-
- "\x06\x00\x00\x00" "value2"
- , 2 * (2 * 4 + 4 + 6) + 8 + 12 // all i32 + lengths of keys
- );
-
- EXPECT_THROW(ParseYamr(input1, GetNullYsonConsumer(), config), std::exception);
- EXPECT_THROW(ParseYamr(input2, GetNullYsonConsumer(), config), std::exception);
-}
-
-TEST(TYamrParserTest, IncorrectEOM)
-{
- auto config = New<TYamrFormatConfig>();
- config->HasSubkey = false;
- config->Lenval = true;
- config->EnableEom = true;
-
- // Garbage after EOM marker
- TString input1 = TString(
- "\x04\x00\x00\x00" "key1"
- "\x06\x00\x00\x00" "value1"
-
- "\xff\xff\xff\xff" "\x01\x00\x00\x00"
-
- "\xfb\xff\xff\xff" "\x01\x00\x00\x00\x00\x00\x00\x00"
-
- "\x04\x00\x00\x00" "key2"
- "\x06\x00\x00\x00" "value2"
- , 2 * (2 * 4 + 4 + 6) + 8 + 12 // all i32 + lengths of keys
- );
-
- // Row count mismatch
- TString input2 = TString(
- "\x04\x00\x00\x00" "key1"
- "\x06\x00\x00\x00" "value1"
-
- "\xff\xff\xff\xff" "\x01\x00\x00\x00"
-
- "\x04\x00\x00\x00" "key2"
- "\x06\x00\x00\x00" "value2"
-
- "\xfb\xff\xff\xff" "\x03\x00\x00\x00\x00\x00\x00\x00"
- , 2 * (2 * 4 + 4 + 6) + 8 + 12 // all i32 + lengths of keys
- );
-
- // Missing EOM marker
- TString input3 = TString(
- "\x04\x00\x00\x00" "key1"
- "\x06\x00\x00\x00" "value1"
-
- "\xff\xff\xff\xff" "\x01\x00\x00\x00"
-
- "\x04\x00\x00\x00" "key2"
- "\x06\x00\x00\x00" "value2"
-
- , 2 * (2 * 4 + 4 + 6) + 8 // all i32 + lengths of keys
- );
-
- // Missing EOM marker with empty fields
- TString input4 = TString(
- "\x00\x00\x00\x00"
- "\x00\x00\x00\x00"
- "\x00\x00\x00\x00"
- , 3 * 4
- );
-
- EXPECT_THROW(ParseYamr(input1, GetNullYsonConsumer(), config), std::exception);
- EXPECT_THROW(ParseYamr(input2, GetNullYsonConsumer(), config), std::exception);
- EXPECT_THROW(ParseYamr(input3, GetNullYsonConsumer(), config), std::exception);
- EXPECT_THROW(ParseYamr(input4, GetNullYsonConsumer(), config), std::exception);
-}
-
-TEST(TYamrParserTest, UnsupportedEOMInTextMode)
-{
- auto config = New<TYamrFormatConfig>();
- config->HasSubkey = false;
- config->Lenval = false;
- config->EnableEom = true;
-
- TString input = TString(
- "\x04\x00\x00\x00" "key1"
- "\x06\x00\x00\x00" "value1"
-
- "\xff\xff\xff\xff" "\x01\x00\x00\x00"
-
-
- "\x04\x00\x00\x00" "key2"
- "\x06\x00\x00\x00" "value2"
-
- "\xfb\xff\xff\xff" "\x02\x00\x00\x00\x00\x00\x00\x00"
- , 2 * (2 * 4 + 4 + 6) + 8 + 12 // all i32 + lengths of keys
- );
-
- EXPECT_THROW(ParseYamr(input, GetNullYsonConsumer(), config), std::exception);
-}
-
-TEST(TYamrParserTest, UnexpectedEOM)
-{
- auto config = New<TYamrFormatConfig>();
- config->HasSubkey = false;
- config->Lenval = true;
- config->EnableEom = false;
-
- TString input = TString(
- "\x04\x00\x00\x00" "key1"
- "\x06\x00\x00\x00" "value1"
-
- "\xff\xff\xff\xff" "\x01\x00\x00\x00"
-
- "\x04\x00\x00\x00" "key2"
- "\x06\x00\x00\x00" "value2"
-
- "\xfb\xff\xff\xff" "\x02\x00\x00\x00\x00\x00\x00\x00"
- , 2 * (2 * 4 + 4 + 6) + 8 + 12 // all i32 + lengths of keys
- );
-
- EXPECT_THROW(ParseYamr(input, GetNullYsonConsumer(), config), std::exception);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace
-} // namespace NYT::NFormats
diff --git a/yt/yt/client/unittests/yamr_writer_ut.cpp b/yt/yt/client/unittests/yamr_writer_ut.cpp
deleted file mode 100644
index 747f542807..0000000000
--- a/yt/yt/client/unittests/yamr_writer_ut.cpp
+++ /dev/null
@@ -1,644 +0,0 @@
-#include <yt/yt/core/test_framework/framework.h>
-
-#include <yt/yt/client/table_client/unversioned_row.h>
-#include <yt/yt/client/table_client/name_table.h>
-
-#include <yt/yt/client/formats/yamr_writer.h>
-
-#include <yt/yt/core/concurrency/async_stream.h>
-
-namespace NYT::NFormats {
-namespace {
-
-////////////////////////////////////////////////////////////////////////////////
-
-using namespace NYTree;
-using namespace NYson;
-using namespace NConcurrency;
-using namespace NTableClient;
-
-class TSchemalessWriterForYamrTest
- : public ::testing::Test
-{
-protected:
- TNameTablePtr NameTable_;
- int KeyId_;
- int SubkeyId_;
- int ValueId_;
- int TableIndexId_;
- int RangeIndexId_;
- int RowIndexId_;
-
- TYamrFormatConfigPtr Config_;
-
- IUnversionedRowsetWriterPtr Writer_;
-
- TStringStream OutputStream_;
-
- TSchemalessWriterForYamrTest() {
- NameTable_ = New<TNameTable>();
- KeyId_ = NameTable_->RegisterName("key");
- SubkeyId_ = NameTable_->RegisterName("subkey");
- ValueId_ = NameTable_->RegisterName("value");
- TableIndexId_ = NameTable_->RegisterName(TableIndexColumnName);
- RowIndexId_ = NameTable_->RegisterName(RowIndexColumnName);
- RangeIndexId_ = NameTable_->RegisterName(RangeIndexColumnName);
-
- Config_ = New<TYamrFormatConfig>();
- }
-
- void CreateStandardWriter(TControlAttributesConfigPtr controlAttributes = New<TControlAttributesConfig>())
- {
- Writer_ = CreateSchemalessWriterForYamr(
- Config_,
- NameTable_,
- CreateAsyncAdapter(static_cast<IOutputStream*>(&OutputStream_)),
- false, /* enableContextSaving */
- controlAttributes,
- 0 /* keyColumnCount */);
- }
-};
-
-TEST_F(TSchemalessWriterForYamrTest, Simple)
-{
- CreateStandardWriter();
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedStringValue("key1", KeyId_));
- row1.AddValue(MakeUnversionedStringValue("value1", ValueId_));
-
- // Ignore system columns.
- row1.AddValue(MakeUnversionedInt64Value(2, TableIndexId_));
- row1.AddValue(MakeUnversionedInt64Value(42, RowIndexId_));
- row1.AddValue(MakeUnversionedInt64Value(1, RangeIndexId_));
-
- // Note that key and value follow not in order.
- TUnversionedRowBuilder row2;
- row2.AddValue(MakeUnversionedStringValue("value2", ValueId_));
- row2.AddValue(MakeUnversionedStringValue("key2", KeyId_));
-
- std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow() };
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString output =
- "key1\tvalue1\n"
- "key2\tvalue2\n";
-
- EXPECT_EQ(output, OutputStream_.Str());
-}
-
-TEST_F(TSchemalessWriterForYamrTest, SimpleWithSubkey)
-{
- Config_->HasSubkey = true;
- CreateStandardWriter();
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedStringValue("key1", KeyId_));
- row1.AddValue(MakeUnversionedStringValue("value1", ValueId_));
- row1.AddValue(MakeUnversionedStringValue("subkey1", SubkeyId_));
-
- TUnversionedRowBuilder row2;
- row2.AddValue(MakeUnversionedStringValue("subkey2", SubkeyId_));
- row2.AddValue(MakeUnversionedStringValue("value2", ValueId_));
- row2.AddValue(MakeUnversionedStringValue("key2", KeyId_));
-
- std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow() };
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString output =
- "key1\tsubkey1\tvalue1\n"
- "key2\tsubkey2\tvalue2\n";
-
- EXPECT_EQ(output, OutputStream_.Str());
-}
-
-TEST_F(TSchemalessWriterForYamrTest, SubkeyCouldBeSkipped)
-{
- Config_->HasSubkey = true;
- CreateStandardWriter();
-
- TUnversionedRowBuilder row;
- row.AddValue(MakeUnversionedStringValue("key", KeyId_));
- row.AddValue(MakeUnversionedStringValue("value", ValueId_));
-
- std::vector<TUnversionedRow> rows = { row.GetRow() };
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString output = "key\t\tvalue\n";
- EXPECT_EQ(output, OutputStream_.Str());
-}
-
-TEST_F(TSchemalessWriterForYamrTest, SubkeyCouldBeNull)
-{
- Config_->HasSubkey = true;
- CreateStandardWriter();
-
- TUnversionedRowBuilder row;
- row.AddValue(MakeUnversionedStringValue("key", KeyId_));
- row.AddValue(MakeUnversionedSentinelValue(EValueType::Null, SubkeyId_));
- row.AddValue(MakeUnversionedStringValue("value", ValueId_));
-
- std::vector<TUnversionedRow> rows = { row.GetRow() };
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString output = "key\t\tvalue\n";
- EXPECT_EQ(output, OutputStream_.Str());
-}
-
-TEST_F(TSchemalessWriterForYamrTest, NonNullTerminatedStrings)
-{
- Config_->HasSubkey = true;
- CreateStandardWriter();
-
- TUnversionedRowBuilder row;
- const char* longString = "trashkeytrashsubkeytrashvalue";
- row.AddValue(MakeUnversionedStringValue(TStringBuf(longString + 5, 3), KeyId_));
- row.AddValue(MakeUnversionedStringValue(TStringBuf(longString + 13, 6), SubkeyId_));
- row.AddValue(MakeUnversionedStringValue(TStringBuf(longString + 24, 5), ValueId_));
-
- std::vector<TUnversionedRow> rows = { row.GetRow() };
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString output = "key\tsubkey\tvalue\n";
- EXPECT_EQ(output, OutputStream_.Str());
-}
-
-TEST_F(TSchemalessWriterForYamrTest, SkippedKey)
-{
- CreateStandardWriter();
-
- TUnversionedRowBuilder row;
- row.AddValue(MakeUnversionedStringValue("value", ValueId_));
-
- std::vector<TUnversionedRow> rows = { row.GetRow() };
-
- EXPECT_FALSE(Writer_->Write(rows));
-
- EXPECT_THROW(Writer_->Close()
- .Get()
- .ThrowOnError(), std::exception);
-}
-
-TEST_F(TSchemalessWriterForYamrTest, SkippedValue)
-{
- CreateStandardWriter();
-
- TUnversionedRowBuilder row;
- row.AddValue(MakeUnversionedStringValue("key", KeyId_));
-
- std::vector<TUnversionedRow> rows = { row.GetRow() };
-
- EXPECT_FALSE(Writer_->Write(rows));
-
- EXPECT_THROW(Writer_->Close()
- .Get()
- .ThrowOnError(), std::exception);
-}
-
-TEST_F(TSchemalessWriterForYamrTest, NotStringType) {
- CreateStandardWriter();
-
- TUnversionedRowBuilder row;
- row.AddValue(MakeUnversionedStringValue("key", KeyId_));
- row.AddValue(MakeUnversionedInt64Value(42, ValueId_));
-
- std::vector<TUnversionedRow> rows = { row.GetRow() };
-
- EXPECT_FALSE(Writer_->Write(rows));
-
- EXPECT_THROW(Writer_->Close()
- .Get()
- .ThrowOnError(), std::exception);
-}
-
-TEST_F(TSchemalessWriterForYamrTest, ExtraItem)
-{
- int trashId = NameTable_->RegisterName("trash");
- CreateStandardWriter();
-
- TUnversionedRowBuilder row;
- row.AddValue(MakeUnversionedStringValue("key", KeyId_));
- row.AddValue(MakeUnversionedStringValue("value", ValueId_));
- // This value will be ignored.
- row.AddValue(MakeUnversionedStringValue("trash", trashId));
- // This value will also be ignored because Config_->HasSubkey is off,
- // despite the fact it has non-string type.
- row.AddValue(MakeUnversionedInt64Value(42, SubkeyId_));
-
- std::vector<TUnversionedRow> rows = { row.GetRow() };
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString output = "key\tvalue\n";
- EXPECT_EQ(output, OutputStream_.Str());
-}
-
-TEST_F(TSchemalessWriterForYamrTest, Escaping)
-{
- Config_->HasSubkey = true;
- Config_->EnableEscaping = true;
- CreateStandardWriter();
-
- TUnversionedRowBuilder row;
- row.AddValue(MakeUnversionedStringValue("\n", KeyId_));
- row.AddValue(MakeUnversionedStringValue("\t", SubkeyId_));
- row.AddValue(MakeUnversionedStringValue("\n", ValueId_));
-
- std::vector<TUnversionedRow> rows = { row.GetRow() };
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString output = "\\n\t\\t\t\\n\n";
- EXPECT_EQ(output, OutputStream_.Str());
-}
-
-TEST_F(TSchemalessWriterForYamrTest, SimpleWithTableIndex)
-{
- Config_->EnableTableIndex = true;
-
- auto controlAttributes = New<TControlAttributesConfig>();
- controlAttributes->EnableTableIndex = true;
- CreateStandardWriter(controlAttributes);
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedStringValue("key1", KeyId_));
- row1.AddValue(MakeUnversionedStringValue("value1", ValueId_));
- row1.AddValue(MakeUnversionedInt64Value(42, TableIndexId_));
-
- TUnversionedRowBuilder row2;
- row2.AddValue(MakeUnversionedStringValue("key2", KeyId_));
- row2.AddValue(MakeUnversionedStringValue("value2", ValueId_));
- row2.AddValue(MakeUnversionedInt64Value(42, TableIndexId_));
-
- std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow() };
- EXPECT_EQ(true, Writer_->Write(rows));
-
- TUnversionedRowBuilder row3;
- row3.AddValue(MakeUnversionedStringValue("key3", KeyId_));
- row3.AddValue(MakeUnversionedStringValue("value3", ValueId_));
- row3.AddValue(MakeUnversionedInt64Value(23, TableIndexId_));
-
- rows = { row3.GetRow() };
- EXPECT_EQ(true, Writer_->Write(rows));
-
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString output =
- "42\n"
- "key1\tvalue1\n"
- "key2\tvalue2\n"
- "23\n"
- "key3\tvalue3\n";
-
- EXPECT_EQ(output, OutputStream_.Str());
-}
-
-TEST_F(TSchemalessWriterForYamrTest, SimpleWithRowIndexAndTableIndex)
-{
- Config_->EnableTableIndex = true;
-
- auto controlAttributes = New<TControlAttributesConfig>();
- controlAttributes->EnableTableIndex = true;
- controlAttributes->EnableRowIndex = true;
- CreateStandardWriter(controlAttributes);
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedStringValue("key1", KeyId_));
- row1.AddValue(MakeUnversionedStringValue("value1", ValueId_));
- row1.AddValue(MakeUnversionedInt64Value(42, TableIndexId_));
- row1.AddValue(MakeUnversionedInt64Value(0, RowIndexId_));
- row1.AddValue(MakeUnversionedInt64Value(0, RangeIndexId_));
- TUnversionedRowBuilder row2;
- row2.AddValue(MakeUnversionedStringValue("key2", KeyId_));
- row2.AddValue(MakeUnversionedStringValue("value2", ValueId_));
- std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow() };
- EXPECT_EQ(true, Writer_->Write(rows));
-
- TUnversionedRowBuilder row3;
- row3.AddValue(MakeUnversionedStringValue("key3", KeyId_));
- row3.AddValue(MakeUnversionedStringValue("value3", ValueId_));
- row3.AddValue(MakeUnversionedInt64Value(5, RowIndexId_));
- row3.AddValue(MakeUnversionedInt64Value(1, RangeIndexId_));
- rows = { row3.GetRow() };
- EXPECT_EQ(true, Writer_->Write(rows));
-
- TUnversionedRowBuilder row4;
- row4.AddValue(MakeUnversionedStringValue("key4", KeyId_));
- row4.AddValue(MakeUnversionedStringValue("value4", ValueId_));
- row4.AddValue(MakeUnversionedInt64Value(23, TableIndexId_));
- row4.AddValue(MakeUnversionedInt64Value(10, RowIndexId_));
- row4.AddValue(MakeUnversionedInt64Value(2, RangeIndexId_));
- rows = { row4.GetRow() };
- EXPECT_EQ(true, Writer_->Write(rows));
-
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString output =
- "42\n0\n"
- "key1\tvalue1\n"
- "key2\tvalue2\n"
- "42\n5\n"
- "key3\tvalue3\n"
- "23\n10\n"
- "key4\tvalue4\n";
-
- EXPECT_EQ(output, OutputStream_.Str());
-}
-
-TEST_F(TSchemalessWriterForYamrTest, Lenval)
-{
- Config_->HasSubkey = true;
- Config_->Lenval = true;
- CreateStandardWriter();
-
- // Note that order in both rows is unusual.
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedStringValue("value1", ValueId_));
- row1.AddValue(MakeUnversionedStringValue("key1", KeyId_));
- row1.AddValue(MakeUnversionedStringValue("subkey1", SubkeyId_));
-
- TUnversionedRowBuilder row2;
- row2.AddValue(MakeUnversionedStringValue("key2", KeyId_));
- row2.AddValue(MakeUnversionedStringValue("value2", ValueId_));
- row2.AddValue(MakeUnversionedStringValue("subkey2", SubkeyId_));
-
- std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow() };
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString output = TString(
- "\x04\x00\x00\x00" "key1"
- "\x07\x00\x00\x00" "subkey1"
- "\x06\x00\x00\x00" "value1"
-
- "\x04\x00\x00\x00" "key2"
- "\x07\x00\x00\x00" "subkey2"
- "\x06\x00\x00\x00" "value2"
- , 2 * (3 * 4 + 4 + 6 + 7) // all i32 + lengths of keys
- );
- EXPECT_EQ(output, OutputStream_.Str());
-}
-
-TEST_F(TSchemalessWriterForYamrTest, LenvalWithEmptyFields)
-{
- Config_->HasSubkey = true;
- Config_->Lenval = true;
- CreateStandardWriter();
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedStringValue("", KeyId_));
- row1.AddValue(MakeUnversionedStringValue("subkey1", SubkeyId_));
- row1.AddValue(MakeUnversionedStringValue("value1", ValueId_));
-
- TUnversionedRowBuilder row2;
- row2.AddValue(MakeUnversionedStringValue("key2", KeyId_));
- row2.AddValue(MakeUnversionedStringValue("", SubkeyId_));
- row2.AddValue(MakeUnversionedStringValue("value2", ValueId_));
-
- TUnversionedRowBuilder row3;
- row3.AddValue(MakeUnversionedStringValue("key3", KeyId_));
- row3.AddValue(MakeUnversionedStringValue("subkey3", SubkeyId_));
- row3.AddValue(MakeUnversionedStringValue("", ValueId_));
-
- std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow(), row3.GetRow() };
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString output = TString(
- "\x00\x00\x00\x00" ""
- "\x07\x00\x00\x00" "subkey1"
- "\x06\x00\x00\x00" "value1"
-
- "\x04\x00\x00\x00" "key2"
- "\x00\x00\x00\x00" ""
- "\x06\x00\x00\x00" "value2"
-
- "\x04\x00\x00\x00" "key3"
- "\x07\x00\x00\x00" "subkey3"
- "\x00\x00\x00\x00" ""
-
- , 9 * 4 + (7 + 6) + (4 + 6) + (4 + 7) // all i32 + lengths of keys
- );
-
- EXPECT_EQ(output, OutputStream_.Str());
-}
-
-TEST_F(TSchemalessWriterForYamrTest, LenvalWithKeySwitch)
-{
- Config_->HasSubkey = true;
- Config_->Lenval = true;
-
- auto controlAttributes = New<TControlAttributesConfig>();
- controlAttributes->EnableKeySwitch = true;
-
- Writer_ = CreateSchemalessWriterForYamr(
- Config_,
- NameTable_,
- CreateAsyncAdapter(static_cast<IOutputStream*>(&OutputStream_)),
- false, /* enableContextSaving */
- controlAttributes,
- 1 /* keyColumnCount */);
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedStringValue("key1", KeyId_));
- row1.AddValue(MakeUnversionedStringValue("subkey1", SubkeyId_));
- row1.AddValue(MakeUnversionedStringValue("value1", ValueId_));
-
- TUnversionedRowBuilder row2;
- row2.AddValue(MakeUnversionedStringValue("key2", KeyId_));
- row2.AddValue(MakeUnversionedStringValue("subkey21", SubkeyId_));
- row2.AddValue(MakeUnversionedStringValue("value21", ValueId_));
-
- TUnversionedRowBuilder row3;
- row3.AddValue(MakeUnversionedStringValue("key2", KeyId_));
- row3.AddValue(MakeUnversionedStringValue("subkey22", SubkeyId_));
- row3.AddValue(MakeUnversionedStringValue("value22", ValueId_));
-
- std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow(), row3.GetRow() };
- EXPECT_EQ(true, Writer_->Write(rows));
-
- TUnversionedRowBuilder row4;
- row4.AddValue(MakeUnversionedStringValue("key3", KeyId_));
- row4.AddValue(MakeUnversionedStringValue("subkey3", SubkeyId_));
- row4.AddValue(MakeUnversionedStringValue("value3", ValueId_));
-
- rows = { row4.GetRow() };
- EXPECT_EQ(true, Writer_->Write(rows));
-
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString output = TString(
- "\x04\x00\x00\x00" "key1"
- "\x07\x00\x00\x00" "subkey1"
- "\x06\x00\x00\x00" "value1"
-
- "\xfe\xff\xff\xff" // key switch
-
- "\x04\x00\x00\x00" "key2"
- "\x08\x00\x00\x00" "subkey21"
- "\x07\x00\x00\x00" "value21"
-
- "\x04\x00\x00\x00" "key2"
- "\x08\x00\x00\x00" "subkey22"
- "\x07\x00\x00\x00" "value22"
-
- "\xfe\xff\xff\xff"
-
- "\x04\x00\x00\x00" "key3"
- "\x07\x00\x00\x00" "subkey3"
- "\x06\x00\x00\x00" "value3"
-
- , 14 * 4 + (4 + 7 + 6) + (4 + 8 + 7) + (4 + 8 + 7) + (4 + 7 + 6) // all i32 + lengths of keys
- );
-
- EXPECT_EQ(output, OutputStream_.Str());
-}
-
-TEST_F(TSchemalessWriterForYamrTest, LenvalWithTableIndex)
-{
- Config_->EnableTableIndex = true;
- Config_->Lenval = true;
-
- auto controlAttributes = New<TControlAttributesConfig>();
- controlAttributes->EnableTableIndex = true;
- CreateStandardWriter(controlAttributes);
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedStringValue("key1", KeyId_));
- row1.AddValue(MakeUnversionedStringValue("value1", ValueId_));
- row1.AddValue(MakeUnversionedInt64Value(42, TableIndexId_));
-
- TUnversionedRowBuilder row2;
- row2.AddValue(MakeUnversionedStringValue("key2", KeyId_));
- row2.AddValue(MakeUnversionedStringValue("value2", ValueId_));
- row2.AddValue(MakeUnversionedInt64Value(42, TableIndexId_));
-
- std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow() };
- EXPECT_EQ(true, Writer_->Write(rows));
-
- TUnversionedRowBuilder row3;
- row3.AddValue(MakeUnversionedStringValue("key3", KeyId_));
- row3.AddValue(MakeUnversionedStringValue("value3", ValueId_));
- row3.AddValue(MakeUnversionedInt64Value(23, TableIndexId_));
-
- rows = { row3.GetRow() };
- EXPECT_EQ(true, Writer_->Write(rows));
-
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString output(
- "\xff\xff\xff\xff" "\x2a\x00\x00\x00" // 42
-
- "\x04\x00\x00\x00" "key1"
- "\x06\x00\x00\x00" "value1"
-
- "\x04\x00\x00\x00" "key2"
- "\x06\x00\x00\x00" "value2"
-
- "\xff\xff\xff\xff" "\x17\x00\x00\x00" // 23
-
- "\x04\x00\x00\x00" "key3"
- "\x06\x00\x00\x00" "value3"
- , 10 * 4 + 3 * (4 + 6));
-
- EXPECT_EQ(output, OutputStream_.Str());
-}
-
-TEST_F(TSchemalessWriterForYamrTest, LenvalWithRangeAndRowIndex)
-{
- Config_->Lenval = true;
-
- auto controlAttributes = New<TControlAttributesConfig>();
- controlAttributes->EnableRowIndex = true;
- controlAttributes->EnableRangeIndex = true;
- CreateStandardWriter(controlAttributes);
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedStringValue("key1", KeyId_));
- row1.AddValue(MakeUnversionedStringValue("value1", ValueId_));
- row1.AddValue(MakeUnversionedInt64Value(42, RangeIndexId_));
- row1.AddValue(MakeUnversionedInt64Value(23, RowIndexId_));
-
- TUnversionedRowBuilder row2;
- row2.AddValue(MakeUnversionedStringValue("key2", KeyId_));
- row2.AddValue(MakeUnversionedStringValue("value2", ValueId_));
- row2.AddValue(MakeUnversionedInt64Value(42, RangeIndexId_));
- row2.AddValue(MakeUnversionedInt64Value(24, RowIndexId_));
-
- std::vector<TUnversionedRow> rows = { row1.GetRow(), row2.GetRow() };
- EXPECT_EQ(true, Writer_->Write(rows));
-
- TUnversionedRowBuilder row3;
- row3.AddValue(MakeUnversionedStringValue("key3", KeyId_));
- row3.AddValue(MakeUnversionedStringValue("value3", ValueId_));
- row3.AddValue(MakeUnversionedInt64Value(42, RangeIndexId_));
- row3.AddValue(MakeUnversionedInt64Value(25, RowIndexId_));
-
- rows = { row3.GetRow() };
- EXPECT_EQ(true, Writer_->Write(rows));
-
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString output(
- "\xfd\xff\xff\xff" "\x2a\x00\x00\x00" // 42
- "\xfc\xff\xff\xff" "\x17\x00\x00\x00\x00\x00\x00\x00" // 23
-
- "\x04\x00\x00\x00" "key1"
- "\x06\x00\x00\x00" "value1"
-
- "\x04\x00\x00\x00" "key2"
- "\x06\x00\x00\x00" "value2"
-
- "\x04\x00\x00\x00" "key3"
- "\x06\x00\x00\x00" "value3"
- , 11 * 4 + 3 * (4 + 6));
-
- EXPECT_EQ(output, OutputStream_.Str());
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace
-} // namespace NYT::NFormats
diff --git a/yt/yt/client/unittests/yamred_dsv_parser_ut.cpp b/yt/yt/client/unittests/yamred_dsv_parser_ut.cpp
deleted file mode 100644
index d29c9a4df6..0000000000
--- a/yt/yt/client/unittests/yamred_dsv_parser_ut.cpp
+++ /dev/null
@@ -1,187 +0,0 @@
-#include <yt/yt/core/test_framework/framework.h>
-
-#include <yt/yt/core/test_framework/yson_consumer_mock.h>
-
-#include <yt/yt/client/formats/yamred_dsv_parser.h>
-
-namespace NYT::NFormats {
-namespace {
-
-using namespace NYson;
-
-using ::testing::InSequence;
-using ::testing::StrictMock;
-using ::testing::NiceMock;
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(TYamredDsvParserTest, Simple)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key_a"));
- EXPECT_CALL(Mock, OnStringScalar("1"));
- EXPECT_CALL(Mock, OnKeyedItem("key_b"));
- EXPECT_CALL(Mock, OnStringScalar("2"));
- EXPECT_CALL(Mock, OnKeyedItem("subkey_x"));
- EXPECT_CALL(Mock, OnStringScalar("3"));
- EXPECT_CALL(Mock, OnKeyedItem("a"));
- EXPECT_CALL(Mock, OnStringScalar("5"));
- EXPECT_CALL(Mock, OnKeyedItem("b"));
- EXPECT_CALL(Mock, OnStringScalar("6"));
- EXPECT_CALL(Mock, OnEndMap());
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key_a"));
- EXPECT_CALL(Mock, OnStringScalar("7"));
- EXPECT_CALL(Mock, OnKeyedItem("key_b"));
- EXPECT_CALL(Mock, OnStringScalar("8"));
- EXPECT_CALL(Mock, OnKeyedItem("subkey_x"));
- EXPECT_CALL(Mock, OnStringScalar("9"));
- EXPECT_CALL(Mock, OnKeyedItem("b"));
- EXPECT_CALL(Mock, OnStringScalar("max\tignat"));
- EXPECT_CALL(Mock, OnKeyedItem("a"));
- EXPECT_CALL(Mock, OnStringScalar("100"));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input =
- "1 2\t3\ta=5\tb=6\n"
- "7 8\t9\tb=max\\tignat\ta=100\n";
-
- auto config = New<TYamredDsvFormatConfig>();
- config->HasSubkey = true;
- config->KeyColumnNames.push_back("key_a");
- config->KeyColumnNames.push_back("key_b");
- config->SubkeyColumnNames.push_back("subkey_x");
-
- ParseYamredDsv(input, &Mock, config);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(TYamredDsvParserTest, EmptyField)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar(""));
- EXPECT_CALL(Mock, OnKeyedItem("subkey"));
- EXPECT_CALL(Mock, OnStringScalar("0 1"));
- EXPECT_CALL(Mock, OnKeyedItem("a"));
- EXPECT_CALL(Mock, OnStringScalar("b"));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input = "\t0 1\ta=b\n";
-
- auto config = New<TYamredDsvFormatConfig>();
- config->HasSubkey = true;
- config->KeyColumnNames.push_back("key");
- config->SubkeyColumnNames.push_back("subkey");
-
- ParseYamredDsv(input, &Mock, config);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(TYamredDsvParserTest, Escaping)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("\t"));
- EXPECT_CALL(Mock, OnKeyedItem("subkey"));
- EXPECT_CALL(Mock, OnStringScalar("0\n1"));
- EXPECT_CALL(Mock, OnKeyedItem("a"));
- EXPECT_CALL(Mock, OnStringScalar("\tb\nc"));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input = "\\t\t0\\n1\ta=\\tb\\nc\n";
-
- auto config = New<TYamredDsvFormatConfig>();
- config->HasSubkey = true;
- config->EnableEscaping = true;
- config->KeyColumnNames.push_back("key");
- config->SubkeyColumnNames.push_back("subkey");
-
- ParseYamredDsv(input, &Mock, config);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(TYamredDsvParserTest, Lenval)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("a"));
- EXPECT_CALL(Mock, OnKeyedItem("subkey"));
- EXPECT_CALL(Mock, OnStringScalar("bc"));
- EXPECT_CALL(Mock, OnKeyedItem("d"));
- EXPECT_CALL(Mock, OnStringScalar("e"));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input = TString(
- "\x01\x00\x00\x00" "a"
- "\x02\x00\x00\x00" "bc"
- "\x03\x00\x00\x00" "d=e"
- , 3 * 4 + 1 + 2 + 3
- );
-
- auto config = New<TYamredDsvFormatConfig>();
- config->Lenval = true;
- config->HasSubkey = true;
- config->KeyColumnNames.push_back("key");
- config->SubkeyColumnNames.push_back("subkey");
-
- ParseYamredDsv(input, &Mock, config);
-}
-
-TEST(TYamredDsvParserTest, EOM)
-{
- StrictMock<TMockYsonConsumer> Mock;
- InSequence dummy;
-
- EXPECT_CALL(Mock, OnListItem());
- EXPECT_CALL(Mock, OnBeginMap());
- EXPECT_CALL(Mock, OnKeyedItem("key"));
- EXPECT_CALL(Mock, OnStringScalar("a"));
- EXPECT_CALL(Mock, OnKeyedItem("subkey"));
- EXPECT_CALL(Mock, OnStringScalar("bc"));
- EXPECT_CALL(Mock, OnKeyedItem("d"));
- EXPECT_CALL(Mock, OnStringScalar("e"));
- EXPECT_CALL(Mock, OnEndMap());
-
- TString input = TString(
- "\x01\x00\x00\x00" "a"
- "\x02\x00\x00\x00" "bc"
- "\x03\x00\x00\x00" "d=e"
- "\xfb\xff\xff\xff" "\x01\x00\x00\x00\x00\x00\x00\x00"
- , 3 * 4 + 1 + 2 + 3 + 12
- );
-
- auto config = New<TYamredDsvFormatConfig>();
- config->Lenval = true;
- config->EnableEom = true;
- config->HasSubkey = true;
- config->KeyColumnNames.push_back("key");
- config->SubkeyColumnNames.push_back("subkey");
-
- ParseYamredDsv(input, &Mock, config);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace
-} // namespace NYT::NFormats
diff --git a/yt/yt/client/unittests/yamred_dsv_writer_ut.cpp b/yt/yt/client/unittests/yamred_dsv_writer_ut.cpp
deleted file mode 100644
index a45a895892..0000000000
--- a/yt/yt/client/unittests/yamred_dsv_writer_ut.cpp
+++ /dev/null
@@ -1,425 +0,0 @@
-#include <yt/yt/core/test_framework/framework.h>
-
-#include <yt/yt/client/table_client/unversioned_row.h>
-#include <yt/yt/client/table_client/name_table.h>
-
-#include <yt/yt/client/formats/yamred_dsv_writer.h>
-
-#include <yt/yt/core/concurrency/async_stream.h>
-
-#include <util/string/vector.h>
-
-#include <cstdio>
-
-
-namespace NYT::NFormats {
-namespace {
-
-using VectorStrok = TVector<TString>;
-
-////////////////////////////////////////////////////////////////////////////////
-
-using namespace NYTree;
-using namespace NYson;
-using namespace NConcurrency;
-using namespace NTableClient;
-
-class TSchemalessWriterForYamredDsvTest
- : public ::testing::Test
-{
-protected:
- TNameTablePtr NameTable_;
- TYamredDsvFormatConfigPtr Config_;
- IUnversionedRowsetWriterPtr Writer_;
-
- TStringStream OutputStream_;
-
- int KeyAId_;
- int KeyBId_;
- int KeyCId_;
- int ValueXId_;
- int ValueYId_;
- int TableIndexId_;
- int RangeIndexId_;
- int RowIndexId_;
-
- TSchemalessWriterForYamredDsvTest()
- {
- NameTable_ = New<TNameTable>();
- KeyAId_ = NameTable_->RegisterName("key_a");
- KeyBId_ = NameTable_->RegisterName("key_b");
- KeyCId_ = NameTable_->RegisterName("key_c");
- ValueXId_ = NameTable_->RegisterName("value_x");
- ValueYId_ = NameTable_->RegisterName("value_y");
- TableIndexId_ = NameTable_->RegisterName(TableIndexColumnName);
- RowIndexId_ = NameTable_->RegisterName(RowIndexColumnName);
- RangeIndexId_ = NameTable_->RegisterName(RangeIndexColumnName);
- Config_ = New<TYamredDsvFormatConfig>();
- }
-
- void CreateStandardWriter(TControlAttributesConfigPtr controlAttributes = New<TControlAttributesConfig>())
- {
- Writer_ = CreateSchemalessWriterForYamredDsv(
- Config_,
- NameTable_,
- CreateAsyncAdapter(static_cast<IOutputStream*>(&OutputStream_)),
- false, /* enableContextSaving */
- controlAttributes,
- 0 /* keyColumnCount */);
- }
-
- // Splits output into key and sorted vector of values that are entries of the last YAMR column.
- // Returns true if success (there are >= 2 values after splitting by field separator), otherwise false.
- bool ExtractKeyValue(TString output, TString& key, VectorStrok& value, char fieldSeparator = '\t')
- {
- char delimiter[2] = {fieldSeparator, 0};
- // Splitting by field separator.
- value = SplitString(output, delimiter, 0 /* maxFields */, KEEP_EMPTY_TOKENS);
- // We should at least have key and the rest of values.
- if (value.size() < 2)
- return false;
- key = value[0];
- value.erase(value.begin());
- std::sort(value.begin(), value.end());
- return true;
- }
-
- // The same function as previous, version with subkey.
- bool ExtractKeySubkeyValue(TString output, TString& key, TString& subkey, VectorStrok& value, char fieldSeparator = '\t')
- {
- char delimiter[2] = {fieldSeparator, 0};
- // Splitting by field separator.
- value = SplitString(output, delimiter, 0 /* maxFields */, KEEP_EMPTY_TOKENS);
- // We should at least have key, subkey and the rest of values.
- if (value.size() < 3)
- return false;
- key = value[0];
- subkey = value[1];
- value.erase(value.begin(), value.end());
- std::sort(value.begin(), value.end());
- return true;
- }
-
- // Compares output and expected output ignoring the order of entries in YAMR value column.
- void CompareKeyValue(TString output, TString expected, char recordSeparator = '\n', char fieldSeparator = '\t')
- {
- char delimiter[2] = {recordSeparator, 0};
- VectorStrok outputRows = SplitString(output, delimiter, 0 /* maxFields */ , KEEP_EMPTY_TOKENS);
- VectorStrok expectedRows = SplitString(expected, delimiter, 0 /* maxFields */, KEEP_EMPTY_TOKENS);
- EXPECT_EQ(outputRows.size(), expectedRows.size());
- // Since there is \n after each row, there will be an extra empty string in both vectors.
- EXPECT_EQ(outputRows.back(), "");
- ASSERT_EQ(expectedRows.back(), "");
- outputRows.pop_back();
- expectedRows.pop_back();
-
- TString outputKey;
- TString expectedKey;
- VectorStrok outputValue;
- VectorStrok expectedValue;
- for (int rowIndex = 0; rowIndex < static_cast<int>(outputRows.size()); rowIndex++) {
- EXPECT_TRUE(ExtractKeyValue(outputRows[rowIndex], outputKey, outputValue, fieldSeparator));
- ASSERT_TRUE(ExtractKeyValue(expectedRows[rowIndex], expectedKey, expectedValue, fieldSeparator));
- EXPECT_EQ(outputKey, expectedKey);
- EXPECT_EQ(outputValue, expectedValue);
- }
- }
-
- // The same function as previous, version with subkey.
- void CompareKeySubkeyValue(TString output, TString expected, char recordSeparator = '\n', char fieldSeparator = '\t')
- {
- char delimiter[2] = {recordSeparator, 0};
- VectorStrok outputRows = SplitString(output, delimiter, 0 /* maxFields */ , KEEP_EMPTY_TOKENS);
- VectorStrok expectedRows = SplitString(expected, delimiter, 0 /* maxFields */, KEEP_EMPTY_TOKENS);
- EXPECT_EQ(outputRows.size(), expectedRows.size());
- // Since there is \n after each row, there will be an extra empty string in both vectors.
- EXPECT_EQ(outputRows.back(), "");
- ASSERT_EQ(expectedRows.back(), "");
- outputRows.pop_back();
- expectedRows.pop_back();
-
- TString outputKey;
- TString expectedKey;
- TString outputSubkey;
- TString expectedSubkey;
- VectorStrok outputValue;
- VectorStrok expectedValue;
- for (int rowIndex = 0; rowIndex < static_cast<int>(outputRows.size()); rowIndex++) {
- EXPECT_TRUE(ExtractKeySubkeyValue(outputRows[rowIndex], outputKey, outputSubkey, outputValue, fieldSeparator));
- ASSERT_TRUE(ExtractKeySubkeyValue(expectedRows[rowIndex], expectedKey, expectedSubkey, expectedValue, fieldSeparator));
- EXPECT_EQ(outputKey, expectedKey);
- EXPECT_EQ(outputSubkey, expectedSubkey);
- EXPECT_EQ(outputValue, expectedValue);
- }
- }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST_F(TSchemalessWriterForYamredDsvTest, Simple)
-{
- Config_->KeyColumnNames.emplace_back("key_a");
- CreateStandardWriter();
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedStringValue("a1", KeyAId_));
- row1.AddValue(MakeUnversionedStringValue("x", ValueXId_));
- row1.AddValue(MakeUnversionedSentinelValue(EValueType::Null, ValueYId_));
-
- // Ignore system columns.
- row1.AddValue(MakeUnversionedInt64Value(2, TableIndexId_));
- row1.AddValue(MakeUnversionedInt64Value(42, RowIndexId_));
- row1.AddValue(MakeUnversionedInt64Value(1, RangeIndexId_));
-
- TUnversionedRowBuilder row2;
- row2.AddValue(MakeUnversionedStringValue("a2", KeyAId_));
- row2.AddValue(MakeUnversionedStringValue("y", ValueYId_));
- row2.AddValue(MakeUnversionedStringValue("b", KeyBId_));
-
- std::vector<TUnversionedRow> rows = {row1.GetRow(), row2.GetRow()};
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString expectedOutput =
- "a1\tvalue_x=x\n"
- "a2\tvalue_y=y\tkey_b=b\n";
-
- TString output = OutputStream_.Str();
-
- CompareKeyValue(expectedOutput, output);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST_F(TSchemalessWriterForYamredDsvTest, SimpleWithSubkey)
-{
- Config_->HasSubkey = true;
- Config_->KeyColumnNames.emplace_back("key_a");
- Config_->KeyColumnNames.emplace_back("key_b");
- Config_->SubkeyColumnNames.emplace_back("key_c");
- CreateStandardWriter();
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedStringValue("a", KeyAId_));
- row1.AddValue(MakeUnversionedStringValue("b1", KeyBId_));
- row1.AddValue(MakeUnversionedStringValue("c", KeyCId_));
-
- TUnversionedRowBuilder row2;
- row2.AddValue(MakeUnversionedStringValue("a", KeyAId_));
- row2.AddValue(MakeUnversionedStringValue("b2", KeyBId_));
- row2.AddValue(MakeUnversionedStringValue("c", KeyCId_));
-
- std::vector<TUnversionedRow> rows = {row1.GetRow(), row2.GetRow()};
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString expectedOutput =
- "a b1\tc\t\n"
- "a b2\tc\t\n";
-
- TString output = OutputStream_.Str();
-
- CompareKeySubkeyValue(expectedOutput, output);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST_F(TSchemalessWriterForYamredDsvTest, Lenval)
-{
- Config_->Lenval = true;
- Config_->HasSubkey = true;
- Config_->EnableTableIndex = true;
- Config_->KeyColumnNames.emplace_back("key_a");
- Config_->KeyColumnNames.emplace_back("key_b");
- Config_->SubkeyColumnNames.emplace_back("key_c");
-
- auto controlAttributes = New<TControlAttributesConfig>();
- controlAttributes->EnableTableIndex = true;
- controlAttributes->EnableRowIndex = true;
- controlAttributes->EnableRangeIndex = true;
- CreateStandardWriter(controlAttributes);
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedStringValue("a", KeyAId_));
- row1.AddValue(MakeUnversionedStringValue("b1", KeyBId_));
- row1.AddValue(MakeUnversionedStringValue("c", KeyCId_));
- row1.AddValue(MakeUnversionedStringValue("x", ValueXId_));
-
- row1.AddValue(MakeUnversionedInt64Value(42, TableIndexId_));
- row1.AddValue(MakeUnversionedInt64Value(23, RangeIndexId_));
- row1.AddValue(MakeUnversionedInt64Value(17, RowIndexId_));
-
- TUnversionedRowBuilder row2;
- row2.AddValue(MakeUnversionedStringValue("a", KeyAId_));
- row2.AddValue(MakeUnversionedStringValue("b2", KeyBId_));
- row2.AddValue(MakeUnversionedStringValue("c", KeyCId_));
-
- row2.AddValue(MakeUnversionedInt64Value(42, TableIndexId_));
- row2.AddValue(MakeUnversionedInt64Value(23, RangeIndexId_));
- row2.AddValue(MakeUnversionedInt64Value(18, RowIndexId_));
-
- std::vector<TUnversionedRow> rows = {row1.GetRow(), row2.GetRow()};
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString expectedOutput = TString(
- "\xff\xff\xff\xff" "\x2a\x00\x00\x00" // Table index.
- "\xfd\xff\xff\xff" "\x17\x00\x00\x00" // Range index.
- "\xfc\xff\xff\xff" "\x11\x00\x00\x00\x00\x00\x00\x00" // Row index.
-
- "\x04\x00\x00\x00" "a b1"
- "\x01\x00\x00\x00" "c"
- "\x09\x00\x00\x00" "value_x=x"
-
- "\x04\x00\x00\x00" "a b2"
- "\x01\x00\x00\x00" "c"
- "\x00\x00\x00\x00" "",
-
- 13 * 4 + 4 + 1 + 9 + 4 + 1 + 0
- );
-
- TString output = OutputStream_.Str();
- EXPECT_EQ(expectedOutput, output)
- << "expected length: " << expectedOutput.length()
- << ", "
- << "actual length: " << output.length();
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST_F(TSchemalessWriterForYamredDsvTest, Escaping)
-{
- Config_->KeyColumnNames.emplace_back("key_a");
- Config_->KeyColumnNames.emplace_back("key_b");
- int columnWithEscapedNameId = NameTable_->GetIdOrRegisterName("value\t_t");
- CreateStandardWriter();
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedStringValue("a\n", KeyAId_));
- row1.AddValue(MakeUnversionedStringValue("\nb\t", KeyBId_));
- row1.AddValue(MakeUnversionedStringValue("\nva\\lue\t", columnWithEscapedNameId));
-
- std::vector<TUnversionedRow> rows = {row1.GetRow()};
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString expectedOutput = "a\\n \\nb\\t\tvalue\\t_t=\\nva\\\\lue\\t\n";
- TString output = OutputStream_.Str();
-
- EXPECT_EQ(expectedOutput, output);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST_F(TSchemalessWriterForYamredDsvTest, SkippedKey)
-{
- Config_->KeyColumnNames.emplace_back("key_a");
- Config_->KeyColumnNames.emplace_back("key_b");
- CreateStandardWriter();
-
- TUnversionedRowBuilder row;
- row.AddValue(MakeUnversionedStringValue("b", KeyBId_));
-
- std::vector<TUnversionedRow> rows = { row.GetRow() };
-
- EXPECT_FALSE(Writer_->Write(rows));
-
- EXPECT_THROW(Writer_->Close()
- .Get()
- .ThrowOnError(), std::exception);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST_F(TSchemalessWriterForYamredDsvTest, SkippedSubkey)
-{
- Config_->HasSubkey = true;
- Config_->KeyColumnNames.emplace_back("key_a");
- Config_->SubkeyColumnNames.emplace_back("key_c");
- CreateStandardWriter();
-
- TUnversionedRowBuilder row;
- row.AddValue(MakeUnversionedStringValue("a", KeyAId_));
-
- std::vector<TUnversionedRow> rows = { row.GetRow() };
-
- EXPECT_FALSE(Writer_->Write(rows));
-
- EXPECT_THROW(Writer_->Close()
- .Get()
- .ThrowOnError(), std::exception);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST_F(TSchemalessWriterForYamredDsvTest, NonStringValues)
-{
- Config_->HasSubkey = true;
- Config_->KeyColumnNames.emplace_back("key_a");
- Config_->SubkeyColumnNames.emplace_back("key_c");
- CreateStandardWriter();
-
- TUnversionedRowBuilder row;
- row.AddValue(MakeUnversionedInt64Value(-42, KeyAId_));
- row.AddValue(MakeUnversionedUint64Value(18, KeyCId_));
- row.AddValue(MakeUnversionedBooleanValue(true, KeyBId_));
- row.AddValue(MakeUnversionedDoubleValue(3.14, ValueXId_));
- row.AddValue(MakeUnversionedStringValue("yt", ValueYId_));
-
- std::vector<TUnversionedRow> rows = { row.GetRow() };
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString expectedOutput = "-42\t18\tkey_b=true\tvalue_x=3.14\tvalue_y=yt\n";
- TString output = OutputStream_.Str();
-
- EXPECT_EQ(expectedOutput, output);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST_F(TSchemalessWriterForYamredDsvTest, ErasingSubkeyColumnsWhenHasSubkeyIsFalse)
-{
- Config_->KeyColumnNames.emplace_back("key_a");
- Config_->SubkeyColumnNames.emplace_back("key_b");
- // Config->HasSubkey = false by default.
- CreateStandardWriter();
-
- TUnversionedRowBuilder row1;
- row1.AddValue(MakeUnversionedStringValue("a", KeyAId_));
- row1.AddValue(MakeUnversionedStringValue("b", KeyBId_));
- row1.AddValue(MakeUnversionedStringValue("c", KeyCId_));
- row1.AddValue(MakeUnversionedStringValue("x", ValueXId_));
-
- std::vector<TUnversionedRow> rows = {row1.GetRow()};
-
- EXPECT_EQ(true, Writer_->Write(rows));
- Writer_->Close()
- .Get()
- .ThrowOnError();
-
- TString expectedOutput = "a\tkey_c=c\tvalue_x=x\n";
- TString output = OutputStream_.Str();
-
- EXPECT_EQ(expectedOutput, output);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace
-} // namespace NYT::NFormats
diff --git a/yt/yt/client/unittests/yson_helpers.cpp b/yt/yt/client/unittests/yson_helpers.cpp
deleted file mode 100644
index 669585caf7..0000000000
--- a/yt/yt/client/unittests/yson_helpers.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-#include "yson_helpers.h"
-
-#include <yt/yt/core/ytree/convert.h>
-#include <yt/yt/core/ytree/node.h>
-#include <yt/yt/core/yson/string.h>
-
-namespace NYT {
-
-using namespace NYson;
-using namespace NYTree;
-
-////////////////////////////////////////////////////////////////////////////////
-
-TString CanonizeYson(TStringBuf input)
-{
- auto node = ConvertToNode(TYsonString(input));
- auto binaryYson = ConvertToYsonString(node);
-
- TStringStream out;
- {
- TYsonWriter writer(&out, NYson::EYsonFormat::Pretty);
- ParseYsonStringBuffer(binaryYson.AsStringBuf(), EYsonType::Node, &writer);
- }
- return out.Str();
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT
diff --git a/yt/yt/library/column_converters/boolean_column_converter.cpp b/yt/yt/library/column_converters/boolean_column_converter.cpp
new file mode 100644
index 0000000000..37e27bc56c
--- /dev/null
+++ b/yt/yt/library/column_converters/boolean_column_converter.cpp
@@ -0,0 +1,100 @@
+#include "boolean_column_converter.h"
+
+#include "helpers.h"
+
+#include <yt/yt/client/table_client/schema.h>
+#include <yt/yt/client/table_client/unversioned_row.h>
+
+namespace NYT::NColumnConverters {
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+
+void FillColumnarBooleanValues(
+ TBatchColumn* column,
+ i64 startIndex,
+ i64 valueCount,
+ TRef bitmap)
+{
+ column->StartIndex = startIndex;
+ column->ValueCount = valueCount;
+
+ auto& values = column->Values.emplace();
+ values.BitWidth = 1;
+ values.Data = bitmap;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TBooleanColumnConverter
+ : public IColumnConverter
+{
+public:
+ TBooleanColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema)
+ : ColumnIndex_(columnIndex)
+ , ColumnSchema_(columnSchema)
+ { }
+
+ TConvertedColumn Convert(const std::vector<TUnversionedRowValues>& rowsValues) override
+ {
+ Reset();
+ AddValues(rowsValues);
+
+ auto column = std::make_shared<TBatchColumn>();
+ auto nullBitmapRef = NullBitmap_.Flush<TConverterTag>();
+ auto valuesRef = Values_.Flush<TConverterTag>();
+
+ FillColumnarBooleanValues(column.get(), 0, rowsValues.size(), valuesRef);
+ FillColumnarNullBitmap(column.get(), 0, rowsValues.size(), nullBitmapRef);
+
+ column->Type = ColumnSchema_.LogicalType();
+ column->Id = ColumnIndex_;
+
+ TOwningColumn owner = {
+ .Column = std::move(column),
+ .NullBitmap = std::move(nullBitmapRef),
+ .ValueBuffer = std::move(valuesRef),
+ };
+
+ return {{owner}, owner.Column.get()};
+ }
+
+
+private:
+ const int ColumnIndex_;
+ const NTableClient::TColumnSchema ColumnSchema_;
+
+ TBitmapOutput Values_;
+ TBitmapOutput NullBitmap_;
+
+ void Reset()
+ {
+ Values_.Flush<TConverterTag>();
+ NullBitmap_.Flush<TConverterTag>();
+ }
+
+ void AddValues(const std::vector<TUnversionedRowValues>& rowsValues)
+ {
+ for (auto rowValues : rowsValues) {
+ auto value = rowValues[ColumnIndex_];
+ bool isNull = value == nullptr || value->Type == NTableClient::EValueType::Null;
+ bool data = isNull ? false : value->Data.Boolean;
+ NullBitmap_.Append(isNull);
+ Values_.Append(data);
+ }
+ }
+};
+
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+
+IColumnConverterPtr CreateBooleanColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema)
+{
+ return std::make_unique<TBooleanColumnConverter>(columnIndex, columnSchema);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NColumnConverters
diff --git a/yt/yt/library/column_converters/boolean_column_converter.h b/yt/yt/library/column_converters/boolean_column_converter.h
new file mode 100644
index 0000000000..0495c4a188
--- /dev/null
+++ b/yt/yt/library/column_converters/boolean_column_converter.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include "column_converter.h"
+
+#include <yt/yt/client/table_client/public.h>
+
+namespace NYT::NColumnConverters {
+
+////////////////////////////////////////////////////////////////////////////////
+
+IColumnConverterPtr CreateBooleanColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NColumnConverters
diff --git a/yt/yt/library/column_converters/column_converter.cpp b/yt/yt/library/column_converters/column_converter.cpp
new file mode 100644
index 0000000000..21c9982549
--- /dev/null
+++ b/yt/yt/library/column_converters/column_converter.cpp
@@ -0,0 +1,91 @@
+#include "column_converter.h"
+
+#include "boolean_column_converter.h"
+#include "floating_point_column_converter.h"
+#include "integer_column_converter.h"
+#include "null_column_converter.h"
+#include "string_column_converter.h"
+
+#include <yt/yt/client/table_client/row_base.h>
+#include <yt/yt/client/table_client/schema.h>
+#include <yt/yt/client/table_client/unversioned_row.h>
+
+namespace NYT::NColumnConverters {
+
+using namespace NTableClient;
+
+////////////////////////////////////////////////////////////////////////////////
+
+IColumnConverterPtr CreateColumnConvert(
+ const NTableClient::TColumnSchema& columnSchema,
+ int columnIndex)
+{
+ switch (columnSchema.GetWireType()) {
+ case EValueType::Int64:
+ return CreateInt64ColumnConverter(columnIndex, columnSchema);
+
+ case EValueType::Uint64:
+ return CreateUint64ColumnConverter(columnIndex, columnSchema);
+
+ case EValueType::Double:
+ switch (columnSchema.CastToV1Type()) {
+ case NTableClient::ESimpleLogicalValueType::Float:
+ return CreateFloatingPoint32ColumnConverter(columnIndex, columnSchema);
+ default:
+ return CreateFloatingPoint64ColumnConverter(columnIndex, columnSchema);
+ }
+
+ case EValueType::String:
+ return CreateStringConverter(columnIndex, columnSchema);
+
+ case EValueType::Boolean:
+ return CreateBooleanColumnConverter(columnIndex, columnSchema);
+
+ case EValueType::Any:
+ return CreateAnyConverter(columnIndex, columnSchema);
+
+ case EValueType::Composite:
+ return CreateCompositeConverter(columnIndex, columnSchema);
+
+ case EValueType::Null:
+ return CreateNullConverter(columnIndex);
+
+ case EValueType::Min:
+ case EValueType::TheBottom:
+ case EValueType::Max:
+ break;
+ }
+ ThrowUnexpectedValueType(columnSchema.GetWireType());
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+TConvertedColumnRange ConvertRowsToColumns(
+ TRange<TUnversionedRow> rows,
+ const std::vector<TColumnSchema>& columnSchema)
+{
+ TConvertedColumnRange convertedColumnsRange;
+ std::vector<TUnversionedRowValues> rowsValues;
+ rowsValues.reserve(rows.size());
+
+ for (const auto& row : rows) {
+ TUnversionedRowValues rowValues;
+ rowValues.resize(columnSchema.size(), nullptr);
+ for (const auto* item = row.Begin(); item != row.End(); ++item) {
+ rowValues[item->Id] = item;
+ }
+ rowsValues.push_back(std::move(rowValues));
+ }
+
+ for (int columnId = 0; columnId < std::ssize(columnSchema); columnId++) {
+ auto converter = CreateColumnConvert(columnSchema[columnId], columnId);
+ auto columns = converter->Convert(rowsValues);
+ convertedColumnsRange.push_back(columns);
+ }
+ return convertedColumnsRange;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NColumnConverters
diff --git a/yt/yt/library/column_converters/column_converter.h b/yt/yt/library/column_converters/column_converter.h
new file mode 100644
index 0000000000..64cec2fd44
--- /dev/null
+++ b/yt/yt/library/column_converters/column_converter.h
@@ -0,0 +1,54 @@
+#pragma once
+
+#include <yt/yt/client/table_client/row_batch.h>
+
+#include <yt/yt/core/misc/bitmap.h>
+
+#include <library/cpp/yt/memory/ref.h>
+
+namespace NYT::NColumnConverters {
+
+////////////////////////////////////////////////////////////////////////////////
+
+using TBatchColumn = NTableClient::IUnversionedColumnarRowBatch::TColumn;
+using TBatchColumnPtr = std::shared_ptr<TBatchColumn>;
+using TUnversionedRowValues = std::vector<const NTableClient::TUnversionedValue*>;
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TOwningColumn
+{
+ TBatchColumnPtr Column;
+ TSharedRef NullBitmap;
+ TSharedRef ValueBuffer;
+ TSharedRef StringBuffer;
+};
+
+struct TConvertedColumn
+{
+ std::vector<TOwningColumn> Columns;
+ TBatchColumn* RootColumn;
+};
+
+using TConvertedColumnRange = std::vector<TConvertedColumn>;
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct IColumnConverter
+ : private TNonCopyable
+{
+ virtual ~IColumnConverter() = default;
+ virtual TConvertedColumn Convert(const std::vector<TUnversionedRowValues>& rowsValues) = 0;
+};
+
+using IColumnConverterPtr = std::unique_ptr<IColumnConverter>;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TConvertedColumnRange ConvertRowsToColumns(
+ TRange<NTableClient::TUnversionedRow> rows,
+ const std::vector<NTableClient::TColumnSchema>& columnSchema);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NColumnConverters
diff --git a/yt/yt/library/column_converters/floating_point_column_converter.cpp b/yt/yt/library/column_converters/floating_point_column_converter.cpp
new file mode 100644
index 0000000000..bc18a53f14
--- /dev/null
+++ b/yt/yt/library/column_converters/floating_point_column_converter.cpp
@@ -0,0 +1,135 @@
+#include "floating_point_column_converter.h"
+
+#include "helpers.h"
+
+#include <yt/yt/client/table_client/schema.h>
+#include <yt/yt/client/table_client/unversioned_row.h>
+
+namespace NYT::NColumnConverters {
+
+using namespace NProto;
+using namespace NTableClient;
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+
+template <typename T>
+void FillColumnarFloatingPointValues(
+ NTableClient::IUnversionedColumnarRowBatch::TColumn* column,
+ i64 startIndex,
+ i64 valueCount,
+ TRef data)
+{
+ column->StartIndex = startIndex;
+ column->ValueCount = valueCount;
+
+ auto& values = column->Values.emplace();
+ values.BitWidth = sizeof(T) * 8;
+ values.Data = data;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+TSharedRef SerializeFloatingPointVector(const std::vector<T>& values)
+{
+ auto data = TSharedMutableRef::Allocate<TConverterTag>(values.size() * sizeof(T) + sizeof(ui64), {.InitializeStorage = false});
+ *reinterpret_cast<ui64*>(data.Begin()) = static_cast<ui64>(values.size());
+ std::memcpy(
+ data.Begin() + sizeof(ui64),
+ values.data(),
+ values.size() * sizeof(T));
+ return data;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class TValue, NTableClient::EValueType ValueType>
+class TFloatingPointColumnConverter
+ : public IColumnConverter
+{
+public:
+ static_assert(std::is_floating_point_v<TValue>);
+
+ TFloatingPointColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema)
+ : ColumnIndex_(columnIndex)
+ , ColumnSchema_(columnSchema)
+
+ { }
+
+ TConvertedColumn Convert(const std::vector<TUnversionedRowValues>& rowsValues)
+ {
+ Reset();
+ AddValues(rowsValues);
+ auto nullBitmapRef = NullBitmap_.Flush<TConverterTag>();
+ auto valuesRef = TSharedRef::MakeCopy<TConverterTag>(TRef(Values_.data(), sizeof(TValue) * Values_.size()));
+
+ auto column = std::make_shared<TBatchColumn>();
+
+ FillColumnarFloatingPointValues<TValue>(
+ column.get(),
+ 0,
+ rowsValues.size(),
+ valuesRef);
+
+ FillColumnarNullBitmap(
+ column.get(),
+ 0,
+ rowsValues.size(),
+ nullBitmapRef);
+
+ column->Type = ColumnSchema_.LogicalType();
+ column->Id = ColumnIndex_;
+
+ TOwningColumn owner = {
+ .Column = std::move(column),
+ .NullBitmap = std::move(nullBitmapRef),
+ .ValueBuffer = std::move(valuesRef),
+ };
+
+ return {{owner}, owner.Column.get()};
+ }
+
+private:
+ const int ColumnIndex_;
+ const TColumnSchema ColumnSchema_;
+
+ std::vector<TValue> Values_;
+ TBitmapOutput NullBitmap_;
+
+ void Reset()
+ {
+ Values_.clear();
+ NullBitmap_.Flush<TConverterTag>();
+ }
+
+ void AddValues(const std::vector<TUnversionedRowValues>& rowsValues)
+ {
+ for (auto rowValues : rowsValues) {
+ auto value = rowValues[ColumnIndex_];
+ bool isNull = value == nullptr || value->Type == NTableClient::EValueType::Null;
+ TValue data = isNull ? 0 : value->Data.Double;
+ NullBitmap_.Append(isNull);
+ Values_.push_back(data);
+ }
+ }
+};
+
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+
+IColumnConverterPtr CreateFloatingPoint32ColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema)
+{
+ return std::make_unique<TFloatingPointColumnConverter<float, NTableClient::EValueType::Double>>(columnIndex, columnSchema);
+}
+
+IColumnConverterPtr CreateFloatingPoint64ColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema)
+{
+ return std::make_unique<TFloatingPointColumnConverter<double, NTableClient::EValueType::Double>>(columnIndex, columnSchema);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NColumnConverters
diff --git a/yt/yt/library/column_converters/floating_point_column_converter.h b/yt/yt/library/column_converters/floating_point_column_converter.h
new file mode 100644
index 0000000000..3739d4e729
--- /dev/null
+++ b/yt/yt/library/column_converters/floating_point_column_converter.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include "column_converter.h"
+
+namespace NYT::NColumnConverters {
+
+////////////////////////////////////////////////////////////////////////////////
+
+IColumnConverterPtr CreateFloatingPoint32ColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema);
+
+IColumnConverterPtr CreateFloatingPoint64ColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NColumnConverters
diff --git a/yt/yt/library/column_converters/helpers.cpp b/yt/yt/library/column_converters/helpers.cpp
new file mode 100644
index 0000000000..cddac06d79
--- /dev/null
+++ b/yt/yt/library/column_converters/helpers.cpp
@@ -0,0 +1,59 @@
+#include "helpers.h"
+
+#include <yt/yt/client/table_client/columnar.h>
+#include <yt/yt/client/table_client/logical_type.h>
+#include <yt/yt/client/table_client/schema.h>
+#include <yt/yt/client/table_client/unversioned_row.h>
+
+#include <yt/yt/core/misc/bitmap.h>
+
+namespace NYT::NColumnConverters {
+
+using namespace NProto;
+using namespace NTableClient;
+
+////////////////////////////////////////////////////////////////////////////////
+
+void FillColumnarNullBitmap(
+ NTableClient::IUnversionedColumnarRowBatch::TColumn* column,
+ i64 startIndex,
+ i64 valueCount,
+ TRef bitmap)
+{
+ column->StartIndex = startIndex;
+ column->ValueCount = valueCount;
+
+ auto& nullBitmap = column->NullBitmap.emplace();
+ nullBitmap.Data = bitmap;
+}
+
+
+void FillColumnarDictionary(
+ NTableClient::IUnversionedColumnarRowBatch::TColumn* primaryColumn,
+ NTableClient::IUnversionedColumnarRowBatch::TColumn* dictionaryColumn,
+ NTableClient::IUnversionedColumnarRowBatch::TDictionaryId dictionaryId,
+ NTableClient::TLogicalTypePtr type,
+ i64 startIndex,
+ i64 valueCount,
+ TRef ids)
+{
+ primaryColumn->StartIndex = startIndex;
+ primaryColumn->ValueCount = valueCount;
+
+ dictionaryColumn->Type = type && type->GetMetatype() == ELogicalMetatype::Optional
+ ? type->AsOptionalTypeRef().GetElement()
+ : type;
+
+ auto& primaryValues = primaryColumn->Values.emplace();
+ primaryValues.BitWidth = 32;
+ primaryValues.Data = ids;
+
+ auto& dictionary = primaryColumn->Dictionary.emplace();
+ dictionary.DictionaryId = dictionaryId;
+ dictionary.ZeroMeansNull = true;
+ dictionary.ValueColumn = dictionaryColumn;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NColumnConverters
diff --git a/yt/yt/library/column_converters/helpers.h b/yt/yt/library/column_converters/helpers.h
new file mode 100644
index 0000000000..6957ff13c1
--- /dev/null
+++ b/yt/yt/library/column_converters/helpers.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <yt/yt/client/table_client/row_batch.h>
+#include <yt/yt/client/table_client/schema.h>
+
+#include <yt/yt/core/misc/common.h>
+
+namespace NYT::NColumnConverters {
+
+////////////////////////////////////////////////////////////////////////////////
+
+void FillColumnarNullBitmap(
+ NTableClient::IUnversionedColumnarRowBatch::TColumn* column,
+ i64 startIndex,
+ i64 valueCount,
+ TRef bitmap);
+
+void FillColumnarDictionary(
+ NTableClient::IUnversionedColumnarRowBatch::TColumn* primaryColumn,
+ NTableClient::IUnversionedColumnarRowBatch::TColumn* dictionaryColumn,
+ NTableClient::IUnversionedColumnarRowBatch::TDictionaryId dictionaryId,
+ NTableClient::TLogicalTypePtr type,
+ i64 startIndex,
+ i64 valueCount,
+ TRef ids);
+
+////////////////////////////////////////////////////////////////////////////////
+
+DEFINE_ENUM(EUnversionedStringSegmentType,
+ ((DictionaryDense) (0))
+ ((DirectDense) (1))
+);
+
+struct TConverterTag
+{};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NColumnConverters
diff --git a/yt/yt/library/column_converters/integer_column_converter.cpp b/yt/yt/library/column_converters/integer_column_converter.cpp
new file mode 100644
index 0000000000..862c23e5b7
--- /dev/null
+++ b/yt/yt/library/column_converters/integer_column_converter.cpp
@@ -0,0 +1,175 @@
+#include "integer_column_converter.h"
+
+#include "helpers.h"
+
+#include <yt/yt/client/table_client/schema.h>
+#include <yt/yt/client/table_client/unversioned_row.h>
+
+#include <library/cpp/yt/coding/zig_zag.h>
+
+namespace NYT::NColumnConverters {
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+
+ui64 EncodeValue(i64 value)
+{
+ return ZigZagEncode64(value);
+}
+
+ui64 EncodeValue(ui64 value)
+{
+ return value;
+}
+
+template <class TValue>
+typename std::enable_if<std::is_signed<TValue>::value, TValue>::type
+GetValue(const NTableClient::TUnversionedValue& value)
+{
+ return value.Data.Int64;
+}
+
+template <class TValue>
+typename std::enable_if<std::is_unsigned<TValue>::value, TValue>::type
+GetValue(const NTableClient::TUnversionedValue& value)
+{
+ return value.Data.Uint64;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void FillColumnarIntegerValues(
+ NTableClient::IUnversionedColumnarRowBatch::TColumn* column,
+ i64 startIndex,
+ i64 valueCount,
+ NTableClient::EValueType valueType,
+ ui64 baseValue,
+ TRef data)
+{
+ column->StartIndex = startIndex;
+ column->ValueCount = valueCount;
+
+ auto& values = column->Values.emplace();
+ values.BaseValue = baseValue;
+ values.BitWidth = 64;
+ values.ZigZagEncoded = (valueType == NTableClient::EValueType::Int64);
+ values.Data = data;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+// TValue - i64 or ui64.
+template <class TValue>
+class TIntegerColumnConverter
+ : public IColumnConverter
+{
+public:
+ static_assert(std::is_integral_v<TValue>);
+
+ TIntegerColumnConverter(
+ int columnIndex,
+ NTableClient::EValueType ValueType,
+ NTableClient::TColumnSchema columnSchema)
+ : ColumnIndex_(columnIndex)
+ , ColumnSchema_(columnSchema)
+ , ValueType_(ValueType)
+ { }
+
+ TConvertedColumn Convert(const std::vector<TUnversionedRowValues>& rowsValues) override
+ {
+ Reset();
+ AddValues(rowsValues);
+ for (i64 index = 0; index < std::ssize(Values_); ++index) {
+ if (!NullBitmap_[index]) {
+ Values_[index] -= MinValue_;
+ }
+ }
+
+ auto nullBitmapRef = NullBitmap_.Flush<TConverterTag>();
+ auto valuesRef = TSharedRef::MakeCopy<TConverterTag>(TRef(Values_.data(), sizeof(ui64) * Values_.size()));
+ auto column = std::make_shared<TBatchColumn>();
+
+ FillColumnarIntegerValues(
+ column.get(),
+ 0,
+ RowCount_,
+ ValueType_,
+ MinValue_,
+ valuesRef);
+
+ FillColumnarNullBitmap(
+ column.get(),
+ 0,
+ RowCount_,
+ nullBitmapRef);
+
+ column->Type = ColumnSchema_.LogicalType();
+ column->Id = ColumnIndex_;
+
+ TOwningColumn owner = {
+ .Column = std::move(column),
+ .NullBitmap = std::move(nullBitmapRef),
+ .ValueBuffer = std::move(valuesRef),
+ };
+
+ return {{owner}, owner.Column.get()};
+ }
+
+
+private:
+ const int ColumnIndex_;
+ const NTableClient::TColumnSchema ColumnSchema_;
+ const NTableClient::EValueType ValueType_;
+
+ i64 RowCount_ = 0;
+ TBitmapOutput NullBitmap_;
+ std::vector<ui64> Values_;
+
+ ui64 MaxValue_;
+ ui64 MinValue_;
+
+ void Reset()
+ {
+ Values_.clear();
+ RowCount_ = 0;
+ MaxValue_ = 0;
+ MinValue_ = std::numeric_limits<ui64>::max();
+ NullBitmap_.Flush<TConverterTag>();
+ }
+
+ void AddValues(const std::vector<TUnversionedRowValues>& rowsValues)
+ {
+ for (auto rowValues : rowsValues) {
+ auto value = rowValues[ColumnIndex_];
+ bool isNull = value == nullptr || value->Type == NTableClient::EValueType::Null;
+ ui64 data = 0;
+ if (!isNull) {
+ YT_VERIFY(value != nullptr);
+ data = EncodeValue(GetValue<TValue>(*value));
+ }
+ Values_.push_back(data);
+ NullBitmap_.Append(isNull);
+ ++RowCount_;
+ }
+ }
+};
+
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+
+IColumnConverterPtr CreateInt64ColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema)
+{
+ return std::make_unique<TIntegerColumnConverter<i64>>(columnIndex, NTableClient::EValueType::Int64, columnSchema);
+}
+
+
+IColumnConverterPtr CreateUint64ColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema)
+{
+ return std::make_unique<TIntegerColumnConverter<ui64>>(columnIndex, NTableClient::EValueType::Uint64, columnSchema);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NColumnConverters
diff --git a/yt/yt/library/column_converters/integer_column_converter.h b/yt/yt/library/column_converters/integer_column_converter.h
new file mode 100644
index 0000000000..99b9d86342
--- /dev/null
+++ b/yt/yt/library/column_converters/integer_column_converter.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "column_converter.h"
+
+#include <yt/yt/client/table_client/public.h>
+
+namespace NYT::NColumnConverters {
+
+////////////////////////////////////////////////////////////////////////////////
+
+IColumnConverterPtr CreateInt64ColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema);
+
+std::unique_ptr<IColumnConverter> CreateUint64ColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NColumnConverters
diff --git a/yt/yt/library/column_converters/null_column_converter.cpp b/yt/yt/library/column_converters/null_column_converter.cpp
new file mode 100644
index 0000000000..d07ab24ceb
--- /dev/null
+++ b/yt/yt/library/column_converters/null_column_converter.cpp
@@ -0,0 +1,49 @@
+#include "null_column_converter.h"
+
+#include <yt/yt/client/table_client/logical_type.h>
+
+namespace NYT::NColumnConverters {
+
+using namespace NTableClient;
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TNullColumnWriterConverter
+ : public IColumnConverter
+{
+public:
+ TNullColumnWriterConverter(int columnIndex)
+ : ColumnIndex_(columnIndex)
+ { }
+
+ TConvertedColumn Convert(const std::vector<TUnversionedRowValues>& rowsValues) override
+ {
+ auto rowCount = rowsValues.size();
+
+ auto column = std::make_shared<TBatchColumn>();
+
+ column->Id = ColumnIndex_;
+ column->Type = SimpleLogicalType(ESimpleLogicalValueType::Null);
+ column->ValueCount = rowCount;
+
+ TOwningColumn owner = {
+ .Column = std::move(column),
+ };
+
+ return {{owner}, owner.Column.get()};
+ }
+
+private:
+ const int ColumnIndex_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+IColumnConverterPtr CreateNullConverter(int columnIndex)
+{
+ return std::make_unique<TNullColumnWriterConverter>(columnIndex);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NColumnConverters
diff --git a/yt/yt/client/unittests/yson_helpers.h b/yt/yt/library/column_converters/null_column_converter.h
index 3cab460345..a8f97c84a1 100644
--- a/yt/yt/client/unittests/yson_helpers.h
+++ b/yt/yt/library/column_converters/null_column_converter.h
@@ -1,13 +1,13 @@
#pragma once
-#include <util/generic/string.h>
+#include "column_converter.h"
-namespace NYT {
+namespace NYT::NColumnConverters {
////////////////////////////////////////////////////////////////////////////////
-TString CanonizeYson(TStringBuf yson);
+IColumnConverterPtr CreateNullConverter(int columnIndex);
////////////////////////////////////////////////////////////////////////////////
-} // namespace NYT \ No newline at end of file
+} // namespace NYT::NColumnConverters
diff --git a/yt/yt/library/column_converters/string_column_converter.cpp b/yt/yt/library/column_converters/string_column_converter.cpp
new file mode 100644
index 0000000000..c8a4354c47
--- /dev/null
+++ b/yt/yt/library/column_converters/string_column_converter.cpp
@@ -0,0 +1,375 @@
+#include "string_column_converter.h"
+
+#include "helpers.h"
+
+#include <yt/yt/client/table_client/schema.h>
+#include <yt/yt/client/table_client/unversioned_row.h>
+
+#include <yt/yt/core/misc/bit_packed_unsigned_vector.h>
+
+#include <library/cpp/yt/string/string_builder.h>
+
+namespace NYT::NColumnConverters {
+
+using namespace NTableClient;
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+
+void FillColumnarStringValues(
+ NTableClient::IUnversionedColumnarRowBatch::TColumn* column,
+ i64 startIndex,
+ i64 valueCount,
+ ui32 avgLength,
+ TRef offsets,
+ TRef stringData)
+{
+ column->StartIndex = startIndex;
+ column->ValueCount = valueCount;
+
+ auto& values = column->Values.emplace();
+ values.BitWidth = 32;
+ values.ZigZagEncoded = true;
+ values.Data = offsets;
+
+ auto& strings = column->Strings.emplace();
+ strings.AvgLength = avgLength;
+ strings.Data = stringData;
+}
+
+bool IsValueNull(TStringBuf lhs)
+{
+ return !lhs.data();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+template <EValueType ValueType>
+class TStringConverter
+ : public IColumnConverter
+{
+public:
+ TStringConverter(
+ int columnIndex,
+ const TColumnSchema& columnSchema)
+ : ColumnIndex_(columnIndex)
+ , ColumnSchema_(columnSchema)
+ { }
+
+ TConvertedColumn Convert(const std::vector<TUnversionedRowValues>& rowsValues) override
+ {
+ Reset();
+ AddValues(rowsValues);
+ return GetColumns();
+ }
+
+private:
+ const int ColumnIndex_;
+ const TColumnSchema ColumnSchema_;
+
+ ui32 RowCount_ = 0;
+ ui64 AllStringsSize_ = 0;
+ ui64 DictionaryByteSize_ = 0;
+
+ std::vector<TStringBuf> Values_;
+ THashMap<TStringBuf, ui32> Dictionary_;
+ TStringBuilder DirectBuffer_;
+
+ void Reset()
+ {
+ AllStringsSize_ = 0;
+ RowCount_ = 0;
+ DictionaryByteSize_ = 0;
+
+ DirectBuffer_.Reset();
+ Values_.clear();
+ Dictionary_.clear();
+ }
+
+ TSharedRef GetDirectDenseNullBitmap() const
+ {
+ TBitmapOutput nullBitmap(Values_.size());
+
+ for (auto value : Values_) {
+ nullBitmap.Append(IsValueNull(value));
+ }
+
+ return nullBitmap.Flush<TConverterTag>();
+ }
+
+ std::vector<ui32> GetDirectDenseOffsets() const
+ {
+ std::vector<ui32> offsets;
+ offsets.reserve(Values_.size());
+
+ ui32 offset = 0;
+ for (auto value : Values_) {
+ offset += value.length();
+ offsets.push_back(offset);
+ }
+
+ return offsets;
+ }
+
+ TConvertedColumn GetDirectColumn(TSharedRef nullBitmap)
+ {
+ auto offsets = GetDirectDenseOffsets();
+
+ // Save offsets as diff from expected.
+ ui32 expectedLength;
+ ui32 maxDiff;
+ PrepareDiffFromExpected(&offsets, &expectedLength, &maxDiff);
+
+ auto directData = DirectBuffer_.GetBuffer();
+
+ auto offsetsRef = TSharedRef::MakeCopy<TConverterTag>(TRef(offsets.data(), sizeof(ui32) * offsets.size()));
+ auto directDataPtr = TSharedRef::MakeCopy<TConverterTag>(TRef(directData.data(), directData.size()));
+ auto column = std::make_shared<TBatchColumn>();
+
+ FillColumnarStringValues(
+ column.get(),
+ 0,
+ RowCount_,
+ expectedLength,
+ TRef(offsetsRef),
+ TRef(directDataPtr));
+
+ FillColumnarNullBitmap(
+ column.get(),
+ 0,
+ RowCount_,
+ TRef(nullBitmap));
+
+ column->Type = ColumnSchema_.LogicalType();
+ column->Id = ColumnIndex_;
+
+ TOwningColumn owner = {
+ .Column = std::move(column),
+ .NullBitmap = std::move(nullBitmap),
+ .ValueBuffer = std::move(offsetsRef),
+ .StringBuffer = std::move(directDataPtr),
+ };
+ return {{owner}, owner.Column.get()};
+ }
+
+ TConvertedColumn GetDictionaryColumn()
+ {
+ auto dictionaryData = TSharedMutableRef::Allocate<TConverterTag>(DictionaryByteSize_, {.InitializeStorage = false});
+
+ std::vector<ui32> dictionaryOffsets;
+ dictionaryOffsets.reserve(Dictionary_.size());
+
+ std::vector<ui32> ids;
+ ids.reserve(Values_.size());
+
+ ui32 dictionarySize = 0;
+ ui32 dictionaryOffset = 0;
+ for (auto value : Values_) {
+ if (IsValueNull(value)) {
+ ids.push_back(0);
+ continue;
+ }
+
+ ui32 id = GetOrCrash(Dictionary_, value);
+ ids.push_back(id);
+
+ if (id > dictionarySize) {
+ std::memcpy(
+ dictionaryData.Begin() + dictionaryOffset,
+ value.data(),
+ value.length());
+ dictionaryOffset += value.length();
+ dictionaryOffsets.push_back(dictionaryOffset);
+ ++dictionarySize;
+ }
+ }
+
+ YT_VERIFY(dictionaryOffset == DictionaryByteSize_);
+
+ // 1. Value ids.
+ auto idsRef = TSharedRef::MakeCopy<TConverterTag>(TRef(ids.data(), sizeof(ui32) * ids.size()));
+
+ // 2. Dictionary offsets.
+ ui32 expectedLength;
+ ui32 maxDiff;
+ PrepareDiffFromExpected(&dictionaryOffsets, &expectedLength, &maxDiff);
+ auto dictionaryOffsetsRef = TSharedRef::MakeCopy<TConverterTag>(TRef(dictionaryOffsets.data(), sizeof(ui32) * dictionaryOffsets.size()));
+
+ auto primaryColumn = std::make_shared<TBatchColumn>();
+ auto dictionaryColumn = std::make_shared<TBatchColumn>();
+
+ FillColumnarStringValues(
+ dictionaryColumn.get(),
+ 0,
+ dictionaryOffsets.size(),
+ expectedLength,
+ TRef(dictionaryOffsetsRef),
+ dictionaryData);
+
+ FillColumnarDictionary(
+ primaryColumn.get(),
+ dictionaryColumn.get(),
+ NTableClient::IUnversionedColumnarRowBatch::GenerateDictionaryId(),
+ primaryColumn->Type,
+ 0,
+ RowCount_,
+ idsRef);
+
+ dictionaryColumn->Type = ColumnSchema_.LogicalType();
+ primaryColumn->Type = ColumnSchema_.LogicalType();
+ primaryColumn->Id = ColumnIndex_;
+
+ TOwningColumn dictOwner = {
+ .Column = std::move(dictionaryColumn),
+ .ValueBuffer = std::move(dictionaryOffsetsRef),
+ .StringBuffer = std::move(dictionaryData),
+ };
+
+ TOwningColumn primeOwner = {
+ .Column = std::move(primaryColumn),
+ .ValueBuffer = std::move(idsRef),
+ };
+
+ return {{primeOwner, dictOwner}, primeOwner.Column.get()};
+ }
+
+ TConvertedColumn GetColumns()
+ {
+ auto costs = GetEncodingMethodsCosts();
+
+ auto minElement = std::min_element(costs.begin(), costs.end());
+ auto type = EUnversionedStringSegmentType(std::distance(costs.begin(), minElement));
+
+ switch (type) {
+
+ case EUnversionedStringSegmentType::DirectDense:
+ return GetDirectColumn(GetDirectDenseNullBitmap());
+
+ case EUnversionedStringSegmentType::DictionaryDense:
+ return GetDictionaryColumn();
+
+ default:
+ YT_ABORT();
+ }
+ }
+
+ TEnumIndexedVector<EUnversionedStringSegmentType, ui64> GetEncodingMethodsCosts() const
+ {
+ TEnumIndexedVector<EUnversionedStringSegmentType, ui64> costs;
+ for (auto type : TEnumTraits<EUnversionedStringSegmentType>::GetDomainValues()) {
+ costs[type] = GetSpecificEncodingMethodCosts(type);
+ }
+ return costs;
+ }
+
+ ui64 GetSpecificEncodingMethodCosts(EUnversionedStringSegmentType type) const
+ {
+ switch (type) {
+ case EUnversionedStringSegmentType::DictionaryDense:
+ return GetDictionaryByteSize();
+
+ case EUnversionedStringSegmentType::DirectDense:
+ return GetDirectByteSize();
+
+ default:
+ YT_ABORT();
+ }
+ }
+
+ void AddValues(const std::vector<TUnversionedRowValues>& rowsValues)
+ {
+ for (auto rowValues : rowsValues) {
+ auto unversionedValue = rowValues[ColumnIndex_];
+ YT_VERIFY(unversionedValue != nullptr);
+ auto value = CaptureValue(*unversionedValue);
+ Values_.push_back(value);
+ ++RowCount_;
+ }
+ }
+
+ ui64 GetDirectByteSize() const
+ {
+ return AllStringsSize_;
+ }
+
+ ui64 GetDictionaryByteSize() const
+ {
+ return DictionaryByteSize_ + Values_.size() * sizeof(ui32);
+ }
+
+
+ TStringBuf CaptureValue(const TUnversionedValue& unversionedValue)
+ {
+ if (unversionedValue.Type == EValueType::Null) {
+ return {};
+ }
+
+ auto valueCapacity = IsAnyOrComposite(ValueType) && !IsAnyOrComposite(unversionedValue.Type)
+ ? GetYsonSize(unversionedValue)
+ : static_cast<i64>(unversionedValue.Length);
+
+ char* buffer = DirectBuffer_.Preallocate(valueCapacity);
+ if (!buffer) {
+ // This means, that we reserved nothing, because all strings are either null or empty.
+ // To distinguish between null and empty, we set preallocated pointer to special value.
+ static char* const EmptyStringBase = reinterpret_cast<char*>(1);
+ buffer = EmptyStringBase;
+ }
+
+ auto start = buffer;
+
+ if (IsAnyOrComposite(ValueType) && !IsAnyOrComposite(unversionedValue.Type)) {
+ // Any non-any and non-null value convert to YSON.
+ buffer += WriteYson(buffer, unversionedValue);
+ } else {
+ std::memcpy(
+ buffer,
+ unversionedValue.Data.String,
+ unversionedValue.Length);
+ buffer += unversionedValue.Length;
+ }
+
+ auto value = TStringBuf(start, buffer);
+
+ YT_VERIFY(value.size() <= valueCapacity);
+
+ DirectBuffer_.Advance(value.size());
+
+ if (Dictionary_.emplace(value, Dictionary_.size() + 1).second) {
+ DictionaryByteSize_ += value.size();
+ }
+ AllStringsSize_ += value.size();
+ return value;
+ }
+};
+
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+
+IColumnConverterPtr CreateStringConverter(
+ int columnIndex,
+ const NTableClient::TColumnSchema& columnSchema)
+{
+ return std::make_unique<TStringConverter<EValueType::String>>(columnIndex, columnSchema);
+}
+
+IColumnConverterPtr CreateAnyConverter(
+ int columnIndex,
+ const NTableClient::TColumnSchema& columnSchema)
+{
+ return std::make_unique<TStringConverter<EValueType::Any>>(columnIndex, columnSchema);
+}
+
+IColumnConverterPtr CreateCompositeConverter(
+ int columnIndex,
+ const NTableClient::TColumnSchema& columnSchema)
+{
+ return std::make_unique<TStringConverter<EValueType::Composite>>(columnIndex, columnSchema);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NColumnConverters
diff --git a/yt/yt/library/column_converters/string_column_converter.h b/yt/yt/library/column_converters/string_column_converter.h
new file mode 100644
index 0000000000..b9c3d2bdf7
--- /dev/null
+++ b/yt/yt/library/column_converters/string_column_converter.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include "column_converter.h"
+
+#include <yt/yt/client/table_client/public.h>
+
+namespace NYT::NColumnConverters {
+
+////////////////////////////////////////////////////////////////////////////////
+
+IColumnConverterPtr CreateStringConverter(
+ int columnIndex,
+ const NTableClient::TColumnSchema& columnSchema);
+
+IColumnConverterPtr CreateAnyConverter(
+ int columnIndex,
+ const NTableClient::TColumnSchema& columnSchema);
+
+IColumnConverterPtr CreateCompositeConverter(
+ int columnIndex,
+ const NTableClient::TColumnSchema& columnSchema);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NColumnConverters
diff --git a/yt/yt/library/column_converters/ya.make b/yt/yt/library/column_converters/ya.make
new file mode 100644
index 0000000000..55cd9f86c0
--- /dev/null
+++ b/yt/yt/library/column_converters/ya.make
@@ -0,0 +1,19 @@
+LIBRARY()
+
+INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
+
+SRCS(
+ boolean_column_converter.cpp
+ column_converter.cpp
+ floating_point_column_converter.cpp
+ helpers.cpp
+ integer_column_converter.cpp
+ null_column_converter.cpp
+ string_column_converter.cpp
+)
+
+PEERDIR(
+ yt/yt/core
+)
+
+END()
diff --git a/yt/yt/library/formats/arrow_writer.cpp b/yt/yt/library/formats/arrow_writer.cpp
new file mode 100644
index 0000000000..b93e54cf31
--- /dev/null
+++ b/yt/yt/library/formats/arrow_writer.cpp
@@ -0,0 +1,1065 @@
+#include "arrow_writer.h"
+
+#include <yt/yt/client/arrow/fbs/Message.fbs.h>
+#include <yt/yt/client/arrow/fbs/Schema.fbs.h>
+
+#include <yt/yt/client/formats/public.h>
+#include <yt/yt/library/formats/schemaless_writer_adapter.h>
+
+#include <yt/yt/client/table_client/columnar.h>
+#include <yt/yt/client/table_client/logical_type.h>
+#include <yt/yt/client/table_client/name_table.h>
+#include <yt/yt/client/table_client/public.h>
+#include <yt/yt/client/table_client/row_batch.h>
+#include <yt/yt/client/table_client/schema.h>
+
+#include <yt/yt/library/column_converters/column_converter.h>
+
+#include <yt/yt/core/concurrency/async_stream.h>
+#include <yt/yt/core/concurrency/public.h>
+
+#include <yt/yt/core/misc/blob_output.h>
+#include <yt/yt/core/misc/error.h>
+#include <yt/yt/core/misc/range.h>
+
+#include <vector>
+
+namespace NYT::NFormats {
+
+using namespace NTableClient;
+using namespace NComplexTypes;
+
+static const auto& Logger = FormatsLogger;
+
+using TBodyWriter = std::function<void(TMutableRef)>;
+using TBatchColumn = IUnversionedColumnarRowBatch::TColumn;
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TTypedBatchColumn
+{
+ const TBatchColumn* Column;
+ TLogicalTypePtr Type;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+constexpr i64 ArrowAlignment = 8;
+
+flatbuffers::Offset<flatbuffers::String> SerializeString(
+ flatbuffers::FlatBufferBuilder* flatbufBuilder,
+ const TString& str)
+{
+ return flatbufBuilder->CreateString(str.data(), str.length());
+}
+
+std::tuple<org::apache::arrow::flatbuf::Type, flatbuffers::Offset<void>> SerializeColumnType(
+ flatbuffers::FlatBufferBuilder* flatbufBuilder,
+ TColumnSchema schema)
+{
+ auto simpleType = CastToV1Type(schema.LogicalType()).first;
+ switch (simpleType) {
+ case ESimpleLogicalValueType::Null:
+ return std::make_tuple(
+ org::apache::arrow::flatbuf::Type_Null,
+ org::apache::arrow::flatbuf::CreateNull(*flatbufBuilder)
+ .Union());
+
+ case ESimpleLogicalValueType::Int64:
+ case ESimpleLogicalValueType::Uint64:
+ case ESimpleLogicalValueType::Int8:
+ case ESimpleLogicalValueType::Uint8:
+ case ESimpleLogicalValueType::Int16:
+ case ESimpleLogicalValueType::Uint16:
+ case ESimpleLogicalValueType::Int32:
+ case ESimpleLogicalValueType::Uint32:
+ return std::make_tuple(
+ org::apache::arrow::flatbuf::Type_Int,
+ org::apache::arrow::flatbuf::CreateInt(
+ *flatbufBuilder,
+ GetIntegralTypeBitWidth(simpleType),
+ IsIntegralTypeSigned(simpleType))
+ .Union());
+
+ case ESimpleLogicalValueType::Double:
+ return std::make_tuple(
+ org::apache::arrow::flatbuf::Type_FloatingPoint,
+ org::apache::arrow::flatbuf::CreateFloatingPoint(
+ *flatbufBuilder,
+ org::apache::arrow::flatbuf::Precision_DOUBLE)
+ .Union());
+
+ case ESimpleLogicalValueType::Boolean:
+ return std::make_tuple(
+ org::apache::arrow::flatbuf::Type_Bool,
+ org::apache::arrow::flatbuf::CreateBool(*flatbufBuilder)
+ .Union());
+
+ case ESimpleLogicalValueType::String:
+ case ESimpleLogicalValueType::Any:
+ return std::make_tuple(
+ org::apache::arrow::flatbuf::Type_Binary,
+ org::apache::arrow::flatbuf::CreateBinary(*flatbufBuilder)
+ .Union());
+
+ case ESimpleLogicalValueType::Utf8:
+ return std::make_tuple(
+ org::apache::arrow::flatbuf::Type_Utf8,
+ org::apache::arrow::flatbuf::CreateUtf8(*flatbufBuilder)
+ .Union());
+
+ // TODO(babenko): the following types are not supported:
+ // Date
+ // Datetime
+ // Interval
+ // Timestamp
+
+ default:
+ THROW_ERROR_EXCEPTION("Column %v has type %Qlv that is not currently supported by Arrow encoder",
+ schema.GetDiagnosticNameString(),
+ simpleType);
+ }
+}
+
+bool IsRleButNotDictionaryEncodedStringLikeColumn(const TBatchColumn& column)
+{
+ auto simpleType = CastToV1Type(column.Type).first;
+ return IsStringLikeType(simpleType) &&
+ column.Rle &&
+ !column.Rle->ValueColumn->Dictionary;
+}
+
+bool IsRleAndDictionaryEncodedColumn(const TBatchColumn& column)
+{
+ return column.Rle &&
+ column.Rle->ValueColumn->Dictionary;
+}
+
+bool IsDictionaryEncodedColumn(const TBatchColumn& column)
+{
+ return column.Dictionary ||
+ IsRleAndDictionaryEncodedColumn(column) ||
+ IsRleButNotDictionaryEncodedStringLikeColumn(column);
+}
+
+
+struct TRecordBatchBodyPart
+{
+ i64 Size;
+ TBodyWriter Writer;
+};
+
+struct TRecordBatchSerializationContext final
+{
+ explicit TRecordBatchSerializationContext(flatbuffers::FlatBufferBuilder* flatbufBuilder)
+ : FlatbufBuilder(flatbufBuilder)
+ {}
+
+ void AddFieldNode(i64 length, i64 nullCount)
+ {
+ FieldNodes.emplace_back(length, nullCount);
+ }
+
+ void AddBuffer(i64 size, TBodyWriter writer)
+ {
+ YT_LOG_DEBUG("Buffer registered (Offset: %v, Size: %v)",
+ CurrentBodyOffset,
+ size);
+
+ Buffers.emplace_back(CurrentBodyOffset, size);
+ CurrentBodyOffset += AlignUp<i64>(size, ArrowAlignment);
+ Parts.push_back(TRecordBatchBodyPart{size, std::move(writer)});
+ }
+
+ flatbuffers::FlatBufferBuilder* const FlatbufBuilder;
+
+ i64 CurrentBodyOffset = 0;
+ std::vector<org::apache::arrow::flatbuf::FieldNode> FieldNodes;
+ std::vector<org::apache::arrow::flatbuf::Buffer> Buffers;
+ std::vector<TRecordBatchBodyPart> Parts;
+};
+
+template <class T>
+TMutableRange<T> GetTypedValues(TMutableRef ref)
+{
+ return MakeMutableRange(
+ reinterpret_cast<T*>(ref.Begin()),
+ reinterpret_cast<T*>(ref.End()));
+}
+
+void SerializeColumnPrologue(
+ const TTypedBatchColumn& typedColumn,
+ TRecordBatchSerializationContext* context)
+{
+ const auto* column = typedColumn.Column;
+ if (column->NullBitmap ||
+ column->Rle && column->Rle->ValueColumn->NullBitmap)
+ {
+ if (column->Rle) {
+ const auto* valueColumn = column->Rle->ValueColumn;
+ auto rleIndexes = column->GetTypedValues<ui64>();
+
+ context->AddFieldNode(
+ column->ValueCount,
+ CountOnesInRleBitmap(
+ valueColumn->NullBitmap->Data,
+ rleIndexes,
+ column->StartIndex,
+ column->StartIndex + column->ValueCount));
+
+ context->AddBuffer(
+ GetBitmapByteSize(column->ValueCount),
+ [=] (TMutableRef dstRef) {
+ BuildValidityBitmapFromRleNullBitmap(
+ valueColumn->NullBitmap->Data,
+ rleIndexes,
+ column->StartIndex,
+ column->StartIndex + column->ValueCount,
+ dstRef);
+ });
+ } else {
+ context->AddFieldNode(
+ column->ValueCount,
+ CountOnesInBitmap(
+ column->NullBitmap->Data,
+ column->StartIndex,
+ column->StartIndex + column->ValueCount));
+
+ context->AddBuffer(
+ GetBitmapByteSize(column->ValueCount),
+ [=] (TMutableRef dstRef) {
+ CopyBitmapRangeToBitmapNegated(
+ column->NullBitmap->Data,
+ column->StartIndex,
+ column->StartIndex + column->ValueCount,
+ dstRef);
+ });
+ }
+ } else {
+ context->AddFieldNode(
+ column->ValueCount,
+ 0);
+
+ context->AddBuffer(
+ 0,
+ [=] (TMutableRef /*dstRef*/) {
+ });
+ }
+}
+
+void SerializeRleButNotDictionaryEncodedStringLikeColumn(
+ const TTypedBatchColumn& typedColumn,
+ TRecordBatchSerializationContext* context)
+{
+ const auto* column = typedColumn.Column;
+ YT_VERIFY(column->Values);
+ YT_VERIFY(column->Values->BitWidth == 64);
+ YT_VERIFY(column->Values->BaseValue == 0);
+ YT_VERIFY(!column->Values->ZigZagEncoded);
+
+ YT_LOG_DEBUG("Adding RLE but not dictionary-encoded string-like column (ColumnId: %v, StartIndex: %v, ValueCount: %v)",
+ column->Id,
+ column->StartIndex,
+ column->ValueCount);
+
+ SerializeColumnPrologue(typedColumn, context);
+
+ auto rleIndexes = column->GetTypedValues<ui64>();
+
+ context->AddBuffer(
+ sizeof(ui32) * column->ValueCount,
+ [=] (TMutableRef dstRef) {
+ BuildIotaDictionaryIndexesFromRleIndexes(
+ rleIndexes,
+ column->StartIndex,
+ column->StartIndex + column->ValueCount,
+ GetTypedValues<ui32>(dstRef));
+ });
+}
+
+void SerializeDictionaryColumn(
+ const TTypedBatchColumn& typedColumn,
+ TRecordBatchSerializationContext* context)
+{
+ const auto* column = typedColumn.Column;
+ YT_VERIFY(column->Values);
+ YT_VERIFY(column->Dictionary->ZeroMeansNull);
+ YT_VERIFY(column->Values->BitWidth == 32);
+ YT_VERIFY(column->Values->BaseValue == 0);
+ YT_VERIFY(!column->Values->ZigZagEncoded);
+
+ YT_LOG_DEBUG("Adding dictionary column (ColumnId: %v, StartIndex: %v, ValueCount: %v, Rle: %v)",
+ column->Id,
+ column->StartIndex,
+ column->ValueCount,
+ column->Rle.has_value());
+
+ auto relevantDictionaryIndexes = column->GetRelevantTypedValues<ui32>();
+
+ context->AddFieldNode(
+ column->ValueCount,
+ CountNullsInDictionaryIndexesWithZeroNull(relevantDictionaryIndexes));
+
+ context->AddBuffer(
+ GetBitmapByteSize(column->ValueCount),
+ [=] (TMutableRef dstRef) {
+ BuildValidityBitmapFromDictionaryIndexesWithZeroNull(
+ relevantDictionaryIndexes,
+ dstRef);
+ });
+
+ context->AddBuffer(
+ sizeof(ui32) * column->ValueCount,
+ [=] (TMutableRef dstRef) {
+ BuildDictionaryIndexesFromDictionaryIndexesWithZeroNull(
+ relevantDictionaryIndexes,
+ GetTypedValues<ui32>(dstRef));
+ });
+}
+
+void SerializeRleDictionaryColumn(
+ const TTypedBatchColumn& typedColumn,
+ TRecordBatchSerializationContext* context)
+{
+ const auto* column = typedColumn.Column;
+ YT_VERIFY(column->Values);
+ YT_VERIFY(column->Values->BitWidth == 64);
+ YT_VERIFY(column->Values->BaseValue == 0);
+ YT_VERIFY(!column->Values->ZigZagEncoded);
+ YT_VERIFY(column->Rle->ValueColumn->Dictionary->ZeroMeansNull);
+ YT_VERIFY(column->Rle->ValueColumn->Values->BitWidth == 32);
+ YT_VERIFY(column->Rle->ValueColumn->Values->BaseValue == 0);
+ YT_VERIFY(!column->Rle->ValueColumn->Values->ZigZagEncoded);
+
+ YT_LOG_DEBUG("Adding dictionary column (ColumnId: %v, StartIndex: %v, ValueCount: %v, Rle: %v)",
+ column->Id,
+ column->StartIndex,
+ column->ValueCount,
+ column->Rle.has_value());
+
+ auto dictionaryIndexes = column->Rle->ValueColumn->GetTypedValues<ui32>();
+ auto rleIndexes = column->GetTypedValues<ui64>();
+
+ context->AddFieldNode(
+ column->ValueCount,
+ CountNullsInRleDictionaryIndexesWithZeroNull(
+ dictionaryIndexes,
+ rleIndexes,
+ column->StartIndex,
+ column->StartIndex + column->ValueCount));
+
+ context->AddBuffer(
+ GetBitmapByteSize(column->ValueCount),
+ [=] (TMutableRef dstRef) {
+ BuildValidityBitmapFromRleDictionaryIndexesWithZeroNull(
+ dictionaryIndexes,
+ rleIndexes,
+ column->StartIndex,
+ column->StartIndex + column->ValueCount,
+ dstRef);
+ });
+
+ context->AddBuffer(
+ sizeof(ui32) * column->ValueCount,
+ [=] (TMutableRef dstRef) {
+ BuildDictionaryIndexesFromRleDictionaryIndexesWithZeroNull(
+ dictionaryIndexes,
+ rleIndexes,
+ column->StartIndex,
+ column->StartIndex + column->ValueCount,
+ GetTypedValues<ui32>(dstRef));
+ });
+}
+
+void SerializeIntegerColumn(
+ const TTypedBatchColumn& typedColumn,
+ ESimpleLogicalValueType simpleType,
+ TRecordBatchSerializationContext* context)
+{
+ const auto* column = typedColumn.Column;
+ YT_VERIFY(column->Values);
+
+ YT_LOG_DEBUG("Adding integer column (ColumnId: %v, StartIndex: %v, ValueCount: %v, Rle: %v)",
+ column->Id,
+ column->StartIndex,
+ column->ValueCount,
+ column->Rle.has_value());
+
+ SerializeColumnPrologue(typedColumn, context);
+
+ context->AddBuffer(
+ column->ValueCount * GetIntegralTypeByteSize(simpleType),
+ [=] (TMutableRef dstRef) {
+ const auto* valueColumn = column->Rle
+ ? column->Rle->ValueColumn
+ : column;
+ auto values = valueColumn->GetTypedValues<ui64>();
+
+ auto rleIndexes = column->Rle
+ ? column->GetTypedValues<ui64>()
+ : TRange<ui64>();
+
+ switch (simpleType) {
+#define XX(cppType, ytType) \
+ case ESimpleLogicalValueType::ytType: { \
+ auto dstValues = GetTypedValues<cppType>(dstRef); \
+ auto* currentOutput = dstValues.Begin(); \
+ DecodeIntegerVector( \
+ column->StartIndex, \
+ column->StartIndex + column->ValueCount, \
+ valueColumn->Values->BaseValue, \
+ valueColumn->Values->ZigZagEncoded, \
+ TRange<ui32>(), \
+ rleIndexes, \
+ [&] (auto index) { \
+ return values[index]; \
+ }, \
+ [&] (auto value) { \
+ *currentOutput++ = value; \
+ }); \
+ break; \
+ }
+
+ XX(i8, Int8)
+ XX(i16, Int16)
+ XX(i32, Int32)
+ XX(i64, Int64)
+ XX(ui8, Uint8)
+ XX(ui16, Uint16)
+ XX(ui32, Uint32)
+ XX(ui64, Uint64)
+
+#undef XX
+
+ default:
+ THROW_ERROR_EXCEPTION("Integer column %v has unexpected type %Qlv",
+ typedColumn.Column->Id,
+ simpleType);
+ }
+ });
+}
+
+void SerializeDoubleColumn(
+ const TTypedBatchColumn& typedColumn,
+ TRecordBatchSerializationContext* context)
+{
+ const auto* column = typedColumn.Column;
+ YT_VERIFY(column->Values);
+ YT_VERIFY(column->Values->BitWidth == 64);
+ YT_VERIFY(column->Values->BaseValue == 0);
+ YT_VERIFY(!column->Values->ZigZagEncoded);
+
+ YT_LOG_DEBUG("Adding double column (ColumnId: %v, StartIndex: %v, ValueCount: %v)",
+ column->Id,
+ column->StartIndex,
+ column->ValueCount,
+ column->Rle.has_value());
+
+ SerializeColumnPrologue(typedColumn, context);
+
+ context->AddBuffer(
+ column->ValueCount * sizeof(double),
+ [=] (TMutableRef dstRef) {
+ auto relevantValues = column->GetRelevantTypedValues<double>();
+ ::memcpy(
+ dstRef.Begin(),
+ relevantValues.Begin(),
+ column->ValueCount * sizeof(double));
+ });
+}
+
+void SerializeStringLikeColumn(
+ const TTypedBatchColumn& typedColumn,
+ TRecordBatchSerializationContext* context)
+{
+ const auto* column = typedColumn.Column;
+ YT_VERIFY(column->Values);
+ YT_VERIFY(column->Values->BaseValue == 0);
+ YT_VERIFY(column->Values->BitWidth == 32);
+ YT_VERIFY(column->Values->ZigZagEncoded);
+ YT_VERIFY(column->Strings);
+ YT_VERIFY(column->Strings->AvgLength);
+ YT_VERIFY(!column->Rle);
+
+ auto startIndex = column->StartIndex;
+ auto endIndex = startIndex + column->ValueCount;
+ auto stringData = column->Strings->Data;
+ auto avgLength = *column->Strings->AvgLength;
+
+ auto offsets = column->GetTypedValues<ui32>();
+ auto startOffset = DecodeStringOffset(offsets, avgLength, startIndex);
+ auto endOffset = DecodeStringOffset(offsets, avgLength, endIndex);
+ auto stringsSize = endOffset - startOffset;
+
+ YT_LOG_DEBUG("Adding string-like column (ColumnId: %v, StartIndex: %v, ValueCount: %v, StartOffset: %v, EndOffset: %v, StringsSize: %v)",
+ column->Id,
+ column->StartIndex,
+ column->ValueCount,
+ startOffset,
+ endOffset,
+ stringsSize);
+
+ SerializeColumnPrologue(typedColumn, context);
+
+ context->AddBuffer(
+ sizeof(i32) * (column->ValueCount + 1),
+ [=] (TMutableRef dstRef) {
+ DecodeStringOffsets(
+ offsets,
+ avgLength,
+ startIndex,
+ endIndex,
+ GetTypedValues<ui32>(dstRef));
+ });
+
+ context->AddBuffer(
+ stringsSize,
+ [=] (TMutableRef dstRef) {
+ ::memcpy(
+ dstRef.Begin(),
+ stringData.Begin() + startOffset,
+ stringsSize);
+ });
+}
+
+void SerializeBooleanColumn(
+ const TTypedBatchColumn& typedColumn,
+ TRecordBatchSerializationContext* context)
+{
+ const auto* column = typedColumn.Column;
+ YT_VERIFY(column->Values);
+ YT_VERIFY(!column->Values->ZigZagEncoded);
+ YT_VERIFY(column->Values->BaseValue == 0);
+ YT_VERIFY(column->Values->BitWidth == 1);
+
+ YT_LOG_DEBUG("Adding boolean column (ColumnId: %v, StartIndex: %v, ValueCount: %v)",
+ column->Id,
+ column->StartIndex,
+ column->ValueCount);
+
+ SerializeColumnPrologue(typedColumn, context);
+
+ context->AddBuffer(
+ GetBitmapByteSize(column->ValueCount),
+ [=] (TMutableRef dstRef) {
+ CopyBitmapRangeToBitmap(
+ column->Values->Data,
+ column->StartIndex,
+ column->StartIndex + column->ValueCount,
+ dstRef);
+ });
+}
+
+void SerializeColumn(
+ const TTypedBatchColumn& typedColumn,
+ TRecordBatchSerializationContext* context)
+{
+ const auto* column = typedColumn.Column;
+
+ if (IsRleButNotDictionaryEncodedStringLikeColumn(*typedColumn.Column)) {
+ SerializeRleButNotDictionaryEncodedStringLikeColumn(typedColumn, context);
+ return;
+ }
+
+ if (column->Dictionary) {
+ SerializeDictionaryColumn(typedColumn, context);
+ return;
+ }
+
+ if (column->Rle && column->Rle->ValueColumn->Dictionary) {
+ SerializeRleDictionaryColumn(typedColumn, context);
+ return;
+ }
+
+ auto simpleType = CastToV1Type(typedColumn.Type).first;
+ if (IsIntegralType(simpleType)) {
+ SerializeIntegerColumn(typedColumn, simpleType, context);
+ } else if (simpleType == ESimpleLogicalValueType::Double) {
+ SerializeDoubleColumn(typedColumn, context);
+ } else if (IsStringLikeType(simpleType)) {
+ SerializeStringLikeColumn(typedColumn, context);
+ } else if (simpleType == ESimpleLogicalValueType::Boolean) {
+ SerializeBooleanColumn(typedColumn, context);
+ } else if (simpleType == ESimpleLogicalValueType::Null) {
+ // No buffers are allocated for null columns.
+ } else {
+ THROW_ERROR_EXCEPTION("Column %v has unexpected type %Qlv",
+ typedColumn.Column->Id,
+ simpleType);
+ }
+}
+
+auto SerializeRecordBatch(
+ flatbuffers::FlatBufferBuilder* flatbufBuilder,
+ int length,
+ TRange<TTypedBatchColumn> typedColumns)
+{
+ auto context = New<TRecordBatchSerializationContext>(flatbufBuilder);
+
+ for (const auto& typedColumn : typedColumns) {
+ SerializeColumn(typedColumn, context.Get());
+ }
+
+ auto fieldNodesOffset = flatbufBuilder->CreateVectorOfStructs(context->FieldNodes);
+
+ auto buffersOffset = flatbufBuilder->CreateVectorOfStructs(context->Buffers);
+
+ auto recordBatchOffset = org::apache::arrow::flatbuf::CreateRecordBatch(
+ *flatbufBuilder,
+ length,
+ fieldNodesOffset,
+ buffersOffset);
+
+ auto totalSize = context->CurrentBodyOffset;
+
+ return std::make_tuple(
+ recordBatchOffset,
+ totalSize,
+ [context = std::move(context)] (TMutableRef dstRef) {
+ char* current = dstRef.Begin();
+ for (const auto& part : context->Parts) {
+ part.Writer(TMutableRef(current, current + part.Size));
+ current += AlignUp<i64>(part.Size, ArrowAlignment);
+ }
+ YT_VERIFY(current == dstRef.End());
+ });
+}
+///////////////////////////////////////////////////////////////////////////////
+
+class TArrowWriter
+ : public TSchemalessFormatWriterBase
+{
+public:
+ TArrowWriter(
+ TNameTablePtr nameTable,
+ const std::vector<NTableClient::TTableSchemaPtr>& tableSchemas,
+ NConcurrency::IAsyncOutputStreamPtr output,
+ bool enableContextSaving,
+ TControlAttributesConfigPtr controlAttributesConfig,
+ int keyColumnCount)
+ : TSchemalessFormatWriterBase(
+ std::move(nameTable),
+ std::move(output),
+ enableContextSaving,
+ std::move(controlAttributesConfig),
+ keyColumnCount)
+ {
+ YT_VERIFY(tableSchemas.size() > 0);
+
+ auto tableSchema = tableSchemas[0];
+ auto columnCount = NameTable_->GetSize();
+
+ for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) {
+ ColumnSchemas_.push_back(GetColumnSchema(tableSchema, columnIndex));
+ }
+ }
+
+private:
+ void Reset()
+ {
+ Messages_.clear();
+ TypedColumns_.clear();
+ NumberOfRows_ = 0;
+ }
+
+ void DoWrite(TRange<TUnversionedRow> rows) override
+ {
+ Reset();
+
+ auto convertedColumns = NColumnConverters::ConvertRowsToColumns(rows, ColumnSchemas_);
+
+ std::vector<const TBatchColumn*> rootColumns;
+ rootColumns.reserve( std::ssize(convertedColumns));
+ for (ssize_t columnIndex = 0; columnIndex < std::ssize(convertedColumns); columnIndex++) {
+ rootColumns.push_back(convertedColumns[columnIndex].RootColumn);
+ }
+ NumberOfRows_ = rows.size();
+ PrepareColumns(rootColumns);
+ Encode();
+ }
+
+ void DoWriteBatch(NTableClient::IUnversionedRowBatchPtr rowBatch) override
+ {
+ auto columnarBatch = rowBatch->TryAsColumnar();
+ if (!columnarBatch) {
+ YT_LOG_DEBUG("Encoding non-columnar batch; running write rows");
+ DoWrite(rowBatch->MaterializeRows());
+ } else {
+ YT_LOG_DEBUG("Encoding columnar batch");
+ Reset();
+ NumberOfRows_ = rowBatch->GetRowCount();
+ PrepareColumns(columnarBatch->MaterializeColumns());
+ Encode();
+ }
+ }
+
+ void Encode()
+ {
+ auto output = GetOutputStream();
+ if (IsSchemaMessageNeeded()) {
+ if (!IsFirstBatch_) {
+ RegisterEosMarker();
+ }
+ ResetArrowDictionaries();
+ PrepareSchema();
+ }
+ IsFirstBatch_ = false;
+ PrepareDictionaryBatches();
+ PrepareRecordBatch();
+
+ WritePayload(output);
+ TryFlushBuffer(true);
+ }
+
+private:
+ bool IsFirstBatch_ = true;
+ size_t NumberOfRows_ = 0;
+ std::vector<TTypedBatchColumn> TypedColumns_;
+ std::vector<TColumnSchema> ColumnSchemas_;
+ std::vector<IUnversionedColumnarRowBatch::TDictionaryId> ArrowDictionaryIds_;
+
+ struct TMessage
+ {
+ std::optional<flatbuffers::FlatBufferBuilder> FlatbufBuilder;
+ i64 BodySize;
+ TBodyWriter BodyWriter;
+ };
+
+ std::vector<TMessage> Messages_;
+
+ bool CheckIfSystemColumnEnable(int columnIndex)
+ {
+ return ControlAttributesConfig_->EnableTableIndex && IsTableIndexColumnId(columnIndex) ||
+ ControlAttributesConfig_->EnableRangeIndex && IsRangeIndexColumnId(columnIndex) ||
+ ControlAttributesConfig_->EnableRowIndex && IsRowIndexColumnId(columnIndex) ||
+ ControlAttributesConfig_->EnableTabletIndex && IsTabletIndexColumnId(columnIndex);
+ }
+
+ bool CheckIfTypeIsNotNull(int columnIndex)
+ {
+ YT_VERIFY(columnIndex >= 0 && columnIndex < std::ssize(ColumnSchemas_));
+ return CastToV1Type(ColumnSchemas_[columnIndex].LogicalType()).first != ESimpleLogicalValueType::Null;
+ }
+
+ TColumnSchema GetColumnSchema(NTableClient::TTableSchemaPtr& tableSchema, int columnIndex)
+ {
+ YT_VERIFY(columnIndex >= 0);
+ auto name = NameTable_->GetName(columnIndex);
+ auto columnSchema = tableSchema->FindColumn(name);
+ if (!columnSchema) {
+ if (IsSystemColumnId(columnIndex) && CheckIfSystemColumnEnable(columnIndex)) {
+ return TColumnSchema(TString(name), EValueType::Int64);
+ }
+ return TColumnSchema(TString(name), EValueType::Null);
+ }
+ return *columnSchema;
+ }
+
+ void PrepareColumns(const TRange<const TBatchColumn*>& batchColumns)
+ {
+ TypedColumns_.reserve(batchColumns.Size());
+ for (const auto* column : batchColumns) {
+ if (CheckIfTypeIsNotNull(column->Id)) {
+ YT_VERIFY(column->Id >= 0 && column->Id < std::ssize(ColumnSchemas_));
+ TypedColumns_.push_back(TTypedBatchColumn{
+ column,
+ ColumnSchemas_[column->Id].LogicalType()});
+ }
+ }
+ }
+
+ bool IsSchemaMessageNeeded()
+ {
+ if (IsFirstBatch_) {
+ return true;
+ }
+ YT_VERIFY(ArrowDictionaryIds_.size() == TypedColumns_.size());
+ bool result = false;
+ for (int index = 0; index < std::ssize(TypedColumns_); ++index) {
+ bool currentDictionary = IsDictionaryEncodedColumn(*TypedColumns_[index].Column);
+ bool previousDictionary = ArrowDictionaryIds_[index] != IUnversionedColumnarRowBatch::NullDictionaryId;
+ if (currentDictionary != previousDictionary) {
+ result = true;
+ }
+ }
+ return result;
+ }
+
+ void ResetArrowDictionaries()
+ {
+ ArrowDictionaryIds_.assign(TypedColumns_.size(), IUnversionedColumnarRowBatch::NullDictionaryId);
+ }
+
+ void RegisterEosMarker()
+ {
+ YT_LOG_DEBUG("EOS marker registered");
+
+ Messages_.push_back(TMessage{
+ std::nullopt,
+ 0,
+ TBodyWriter()});
+ }
+
+ void RegisterMessage(
+ [[maybe_unused]] org::apache::arrow::flatbuf::MessageHeader type,
+ flatbuffers::FlatBufferBuilder&& flatbufBuilder,
+ i64 bodySize = 0,
+ std::function<void(TMutableRef)> bodyWriter = nullptr)
+ {
+ YT_LOG_DEBUG("Message registered (Type: %v, MessageSize: %v, BodySize: %v)",
+ org::apache::arrow::flatbuf::EnumNamesMessageHeader()[type],
+ flatbufBuilder.GetSize(),
+ bodySize);
+
+ YT_VERIFY((bodySize % ArrowAlignment) == 0);
+ Messages_.push_back(TMessage{
+ std::move(flatbufBuilder),
+ bodySize,
+ std::move(bodyWriter)});
+ }
+
+ void PrepareSchema()
+ {
+ flatbuffers::FlatBufferBuilder flatbufBuilder;
+
+ int arrowDictionaryIdCounter = 0;
+ std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>> fieldOffsets;
+ for (int columnIndex = 0; columnIndex < std::ssize(TypedColumns_); columnIndex++) {
+ const auto& typedColumn = TypedColumns_[columnIndex];
+ YT_VERIFY(typedColumn.Column->Id >= 0 && typedColumn.Column->Id < std::ssize(ColumnSchemas_));
+ auto columnSchema = ColumnSchemas_[typedColumn.Column->Id];
+ auto nameOffset = SerializeString(&flatbufBuilder, columnSchema.Name());
+
+ auto [typeType, typeOffset] = SerializeColumnType(&flatbufBuilder, columnSchema);
+
+ flatbuffers::Offset<org::apache::arrow::flatbuf::DictionaryEncoding> dictionaryEncodingOffset;
+ auto index_type_offset = org::apache::arrow::flatbuf::CreateInt(flatbufBuilder, 32, false);
+
+ if (IsDictionaryEncodedColumn(*typedColumn.Column)) {
+ dictionaryEncodingOffset = org::apache::arrow::flatbuf::CreateDictionaryEncoding(
+ flatbufBuilder,
+ arrowDictionaryIdCounter++,
+ index_type_offset);
+ }
+
+ auto fieldOffset = org::apache::arrow::flatbuf::CreateField(
+ flatbufBuilder,
+ nameOffset,
+ columnSchema.LogicalType()->IsNullable(),
+ typeType,
+ typeOffset,
+ dictionaryEncodingOffset);
+
+ fieldOffsets.push_back(fieldOffset);
+ }
+
+ auto fieldsOffset = flatbufBuilder.CreateVector(fieldOffsets);
+
+ auto schemaOffset = org::apache::arrow::flatbuf::CreateSchema(
+ flatbufBuilder,
+ org::apache::arrow::flatbuf::Endianness_Little,
+ fieldsOffset);
+
+ auto messageOffset = org::apache::arrow::flatbuf::CreateMessage(
+ flatbufBuilder,
+ org::apache::arrow::flatbuf::MetadataVersion_V4,
+ org::apache::arrow::flatbuf::MessageHeader_Schema,
+ schemaOffset.Union(),
+ 0);
+
+ flatbufBuilder.Finish(messageOffset);
+
+ RegisterMessage(
+ org::apache::arrow::flatbuf::MessageHeader_Schema,
+ std::move(flatbufBuilder));
+ }
+
+ void PrepareDictionaryBatches()
+ {
+ int arrowDictionaryIdCounter = 0;
+ auto prepareDictionaryBatch = [&] (
+ int columnIndex,
+ IUnversionedColumnarRowBatch::TDictionaryId ytDictionaryId,
+ const TBatchColumn* dictionaryColumn) {
+ int arrowDictionaryId = arrowDictionaryIdCounter++;
+ const auto& typedColumn = TypedColumns_[columnIndex];
+ auto previousYTDictionaryId = ArrowDictionaryIds_[columnIndex];
+ if (ytDictionaryId == previousYTDictionaryId) {
+ YT_LOG_DEBUG("Reusing previous dictionary (ColumnId: %v, YTDictionaryId: %v, ArrowDictionaryId: %v)",
+ typedColumn.Column->Id,
+ ytDictionaryId,
+ arrowDictionaryId);
+ } else {
+ YT_LOG_DEBUG("Sending new dictionary (ColumnId: %v, YTDictionaryId: %v, ArrowDictionaryId: %v)",
+ typedColumn.Column->Id,
+ ytDictionaryId,
+ arrowDictionaryId);
+ PrepareDictionaryBatch(
+ TTypedBatchColumn{dictionaryColumn, typedColumn.Type},
+ arrowDictionaryId);
+ ArrowDictionaryIds_[columnIndex] = ytDictionaryId;
+ }
+ };
+
+ for (int columnIndex = 0; columnIndex < std::ssize(TypedColumns_); ++columnIndex) {
+ const auto& typedColumn = TypedColumns_[columnIndex];
+ if (typedColumn.Column->Dictionary) {
+ YT_LOG_DEBUG("Adding dictionary batch for dictionary-encoded column (ColumnId: %v)",
+ typedColumn.Column->Id);
+ prepareDictionaryBatch(
+ columnIndex,
+ typedColumn.Column->Dictionary->DictionaryId,
+ typedColumn.Column->Dictionary->ValueColumn);
+ } else if (IsRleButNotDictionaryEncodedStringLikeColumn(*typedColumn.Column)) {
+ YT_LOG_DEBUG("Adding dictionary batch for RLE but not dictionary-encoded string-like column (ColumnId: %v)",
+ typedColumn.Column->Id);
+ prepareDictionaryBatch(
+ columnIndex,
+ IUnversionedColumnarRowBatch::GenerateDictionaryId(), // any unique one will do
+ typedColumn.Column->Rle->ValueColumn);
+ } else if (IsRleAndDictionaryEncodedColumn(*typedColumn.Column)) {
+ YT_LOG_DEBUG("Adding dictionary batch for RLE and dictionary-encoded column (ColumnId: %v)",
+ typedColumn.Column->Id);
+ prepareDictionaryBatch(
+ columnIndex,
+ typedColumn.Column->Rle->ValueColumn->Dictionary->DictionaryId,
+ typedColumn.Column->Rle->ValueColumn->Dictionary->ValueColumn);
+ }
+ }
+ }
+
+ void PrepareDictionaryBatch(
+ const TTypedBatchColumn& typedColumn,
+ int arrowDictionaryId)
+ {
+ flatbuffers::FlatBufferBuilder flatbufBuilder;
+
+ auto [recordBatchOffset, bodySize, bodyWriter] = SerializeRecordBatch(
+ &flatbufBuilder,
+ typedColumn.Column->ValueCount,
+ MakeRange({typedColumn}));
+
+ auto dictionaryBatchOffset = org::apache::arrow::flatbuf::CreateDictionaryBatch(
+ flatbufBuilder,
+ arrowDictionaryId,
+ recordBatchOffset);
+
+ auto messageOffset = org::apache::arrow::flatbuf::CreateMessage(
+ flatbufBuilder,
+ org::apache::arrow::flatbuf::MetadataVersion_V4,
+ org::apache::arrow::flatbuf::MessageHeader_DictionaryBatch,
+ dictionaryBatchOffset.Union(),
+ bodySize);
+
+ flatbufBuilder.Finish(messageOffset);
+
+ RegisterMessage(
+ org::apache::arrow::flatbuf::MessageHeader_DictionaryBatch,
+ std::move(flatbufBuilder),
+ bodySize,
+ std::move(bodyWriter));
+ }
+
+ void PrepareRecordBatch()
+ {
+ flatbuffers::FlatBufferBuilder flatbufBuilder;
+
+ auto [recordBatchOffset, bodySize, bodyWriter] = SerializeRecordBatch(
+ &flatbufBuilder,
+ NumberOfRows_,
+ TypedColumns_);
+
+ auto messageOffset = org::apache::arrow::flatbuf::CreateMessage(
+ flatbufBuilder,
+ org::apache::arrow::flatbuf::MetadataVersion_V4,
+ org::apache::arrow::flatbuf::MessageHeader_RecordBatch,
+ recordBatchOffset.Union(),
+ bodySize);
+
+ flatbufBuilder.Finish(messageOffset);
+
+ RegisterMessage(
+ org::apache::arrow::flatbuf::MessageHeader_RecordBatch,
+ std::move(flatbufBuilder),
+ bodySize,
+ std::move(bodyWriter));
+ }
+
+ i64 GetPayloadSize() const
+ {
+ i64 size = 0;
+ for (const auto& message : Messages_) {
+ size += sizeof(ui32); // continuation indicator
+ size += sizeof(ui32); // metadata size
+ if (message.FlatbufBuilder) {
+ size += AlignUp<i64>(message.FlatbufBuilder->GetSize(), ArrowAlignment); // metadata message
+ size += AlignUp<i64>(message.BodySize, ArrowAlignment); // body
+ }
+ }
+ return size;
+ }
+
+ void WritePayload(TBlobOutput* output)
+ {
+ YT_LOG_DEBUG("Started writing payload");
+ for (const auto& message : Messages_) {
+ // Continuation indicator
+ ui32 constMax = 0xFFFFFFFF;
+ output->Write(&constMax, sizeof(ui32));
+
+ if (message.FlatbufBuilder) {
+ auto metadataSize = message.FlatbufBuilder->GetSize();
+
+ auto metadataPtr = message.FlatbufBuilder->GetBufferPointer();
+
+
+ ui32 metadataSz = AlignUp<i64>(metadataSize, ArrowAlignment);
+
+ output->Write(&metadataSz, sizeof(ui32));
+ output->Write(metadataPtr, metadataSize);
+
+ // Body
+ if (message.BodyWriter) {
+ TString current;
+ current.resize(message.BodySize);
+ // Double copying.
+ message.BodyWriter(TMutableRef::FromString(current));
+ output->Write(current.data(), message.BodySize);
+ } else {
+ YT_VERIFY(message.BodySize == 0);
+ }
+ } else {
+ // EOS marker
+ ui32 zero = 0;
+ output->Write(&zero, sizeof(ui32));
+ }
+ }
+
+ YT_LOG_DEBUG("Finished writing payload");
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+ISchemalessFormatWriterPtr CreateWriterForArrow(
+ NTableClient::TNameTablePtr nameTable,
+ const std::vector<NTableClient::TTableSchemaPtr>& schemas,
+ NConcurrency::IAsyncOutputStreamPtr output,
+ bool enableContextSaving,
+ TControlAttributesConfigPtr controlAttributesConfig,
+ int keyColumnCount)
+{
+ auto result = New<TArrowWriter>(
+ std::move(nameTable),
+ schemas,
+ std::move(output),
+ enableContextSaving,
+ std::move(controlAttributesConfig),
+ keyColumnCount);
+
+ return result;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NFormats
diff --git a/yt/yt/library/formats/arrow_writer.h b/yt/yt/library/formats/arrow_writer.h
new file mode 100644
index 0000000000..60b296f73b
--- /dev/null
+++ b/yt/yt/library/formats/arrow_writer.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <yt/yt/client/formats/public.h>
+
+#include <yt/yt/client/table_client/public.h>
+
+#include <yt/yt/core/concurrency/public.h>
+
+#include <yt/yt/core/ytree/public.h>
+
+
+namespace NYT::NFormats {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ISchemalessFormatWriterPtr CreateWriterForArrow(
+ NTableClient::TNameTablePtr nameTable,
+ const std::vector<NTableClient::TTableSchemaPtr>& schemas,
+ NConcurrency::IAsyncOutputStreamPtr output,
+ bool enableContextSaving,
+ TControlAttributesConfigPtr controlAttributesConfig,
+ int keyColumnCount);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NFormat
diff --git a/yt/yt/client/formats/dsv_parser.cpp b/yt/yt/library/formats/dsv_parser.cpp
index c2ccb2babf..bd9035176c 100644
--- a/yt/yt/client/formats/dsv_parser.cpp
+++ b/yt/yt/library/formats/dsv_parser.cpp
@@ -2,7 +2,8 @@
#include "format.h"
#include "escape.h"
-#include "parser.h"
+
+#include <yt/yt/client/formats/parser.h>
namespace NYT::NFormats {
diff --git a/yt/yt/client/formats/dsv_parser.h b/yt/yt/library/formats/dsv_parser.h
index 5a156d5db5..b93fc3a050 100644
--- a/yt/yt/client/formats/dsv_parser.h
+++ b/yt/yt/library/formats/dsv_parser.h
@@ -1,7 +1,7 @@
#pragma once
-#include "public.h"
-#include "config.h"
+#include <yt/yt/client/formats/public.h>
+#include <yt/yt/client/formats/config.h>
#include <yt/yt/core/yson/consumer.h>
diff --git a/yt/yt/client/formats/dsv_writer.cpp b/yt/yt/library/formats/dsv_writer.cpp
index 934b82ed26..934b82ed26 100644
--- a/yt/yt/client/formats/dsv_writer.cpp
+++ b/yt/yt/library/formats/dsv_writer.cpp
diff --git a/yt/yt/client/formats/dsv_writer.h b/yt/yt/library/formats/dsv_writer.h
index 5d1c5de674..c17fcd3a6f 100644
--- a/yt/yt/client/formats/dsv_writer.h
+++ b/yt/yt/library/formats/dsv_writer.h
@@ -1,11 +1,12 @@
#pragma once
-#include "config.h"
#include "escape.h"
#include "helpers.h"
-#include "public.h"
#include "schemaless_writer_adapter.h"
+#include <yt/yt/client/formats/config.h>
+#include <yt/yt/client/formats/public.h>
+
#include <yt/yt/client/table_client/public.h>
#include <library/cpp/yt/misc/enum.h>
diff --git a/yt/yt/client/formats/escape.cpp b/yt/yt/library/formats/escape.cpp
index 50b1bf85e5..50b1bf85e5 100644
--- a/yt/yt/client/formats/escape.cpp
+++ b/yt/yt/library/formats/escape.cpp
diff --git a/yt/yt/client/formats/escape.h b/yt/yt/library/formats/escape.h
index 979ff2689d..4efc743944 100644
--- a/yt/yt/client/formats/escape.h
+++ b/yt/yt/library/formats/escape.h
@@ -1,6 +1,6 @@
#pragma once
-#include "public.h"
+#include <yt/yt/client/formats/public.h>
#include <string>
#include <vector>
diff --git a/yt/yt/library/formats/format.cpp b/yt/yt/library/formats/format.cpp
new file mode 100644
index 0000000000..1b7e0cf749
--- /dev/null
+++ b/yt/yt/library/formats/format.cpp
@@ -0,0 +1,598 @@
+#include "format.h"
+
+#include "dsv_parser.h"
+#include "dsv_writer.h"
+#include "protobuf_parser.h"
+#include "protobuf_writer.h"
+#include "schemaful_dsv_parser.h"
+#include "schemaful_dsv_writer.h"
+#include "schemaful_writer.h"
+#include "web_json_writer.h"
+#include "schemaless_writer_adapter.h"
+#include "skiff_parser.h"
+#include "skiff_writer.h"
+#include "versioned_writer.h"
+#include "yamred_dsv_parser.h"
+#include "yamred_dsv_writer.h"
+#include "yamr_parser.h"
+#include "yamr_writer.h"
+#include "yson_parser.h"
+
+#include <yt/yt/client/formats/parser.h>
+
+#include <yt/yt/client/table_client/name_table.h>
+#include <yt/yt/client/table_client/table_consumer.h>
+
+#include <yt/yt/library/skiff_ext/schema_match.h>
+
+#include <yt/yt/core/misc/error.h>
+
+#include <yt/yt/core/yson/writer.h>
+
+#include <yt/yt/core/ytree/fluent.h>
+
+#include <yt/yt/core/yson/forwarding_consumer.h>
+
+#include <yt/yt/core/json/json_parser.h>
+#include <yt/yt/core/json/json_writer.h>
+
+namespace NYT::NFormats {
+
+using namespace NConcurrency;
+using namespace NYTree;
+using namespace NYson;
+using namespace NJson;
+using namespace NTableClient;
+using namespace NSkiffExt;
+using namespace NComplexTypes;
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+
+EYsonType DataTypeToYsonType(EDataType dataType)
+{
+ switch (dataType) {
+ case EDataType::Structured:
+ return EYsonType::Node;
+ case EDataType::Tabular:
+ return EYsonType::ListFragment;
+ default:
+ THROW_ERROR_EXCEPTION("Data type %Qlv is not supported by YSON",
+ dataType);
+ }
+}
+
+std::unique_ptr<IFlushableYsonConsumer> CreateConsumerForYson(
+ EDataType dataType,
+ const IAttributeDictionary& attributes,
+ IZeroCopyOutput* output)
+{
+ auto config = ConvertTo<TYsonFormatConfigPtr>(&attributes);
+ return CreateYsonWriter(
+ output,
+ config->Format,
+ DataTypeToYsonType(dataType),
+ config->Format == EYsonFormat::Binary);
+}
+
+std::unique_ptr<IFlushableYsonConsumer> CreateConsumerForJson(
+ EDataType dataType,
+ const IAttributeDictionary& attributes,
+ IOutputStream* output)
+{
+ auto config = ConvertTo<TJsonFormatConfigPtr>(&attributes);
+ return CreateJsonConsumer(output, DataTypeToYsonType(dataType), config);
+}
+
+std::unique_ptr<IFlushableYsonConsumer> CreateConsumerForDsv(
+ EDataType dataType,
+ const IAttributeDictionary& attributes,
+ IOutputStream* output)
+{
+ auto config = ConvertTo<TDsvFormatConfigPtr>(&attributes);
+ switch (dataType) {
+ case EDataType::Structured:
+ return std::unique_ptr<IFlushableYsonConsumer>(new TDsvNodeConsumer(output, config));
+
+ case EDataType::Tabular:
+ case EDataType::Binary:
+ case EDataType::Null:
+ THROW_ERROR_EXCEPTION("Data type %Qlv is not supported by DSV",
+ dataType);
+
+ default:
+ YT_ABORT();
+ };
+}
+
+class TTableParserAdapter
+ : public IParser
+{
+public:
+ TTableParserAdapter(
+ const TFormat& format,
+ std::vector<IValueConsumer*> valueConsumers,
+ int tableIndex)
+ : TableConsumer_(new TTableConsumer(
+ TYsonConverterConfig{
+ .ComplexTypeMode = format.Attributes().Get("complex_type_mode", EComplexTypeMode::Named),
+ .StringKeyedDictMode = format.Attributes().Get("string_keyed_dict_mode", EDictMode::Positional),
+ .DecimalMode = format.Attributes().Get("decimal_mode", EDecimalMode::Binary),
+ .TimeMode = format.Attributes().Get("time_mode", ETimeMode::Binary),
+ .UuidMode = format.Attributes().Get("uuid_mode", EUuidMode::Binary),
+ },
+ valueConsumers,
+ tableIndex))
+ , Parser_(CreateParserForFormat(
+ format,
+ EDataType::Tabular,
+ TableConsumer_.get()))
+ { }
+
+ void Read(TStringBuf data) override
+ {
+ Parser_->Read(data);
+ }
+
+ void Finish() override
+ {
+ Parser_->Finish();
+ }
+
+private:
+ const std::unique_ptr<IYsonConsumer> TableConsumer_;
+ const std::unique_ptr<IParser> Parser_;
+};
+
+} // namespace
+
+std::unique_ptr<IFlushableYsonConsumer> CreateConsumerForFormat(
+ const TFormat& format,
+ EDataType dataType,
+ IZeroCopyOutput* output)
+{
+ switch (format.GetType()) {
+ case EFormatType::Yson:
+ return CreateConsumerForYson(dataType, format.Attributes(), output);
+ case EFormatType::Json:
+ return CreateConsumerForJson(dataType, format.Attributes(), output);
+ case EFormatType::Dsv:
+ return CreateConsumerForDsv(dataType, format.Attributes(), output);
+ default:
+ THROW_ERROR_EXCEPTION("Unsupported output format %Qlv",
+ format.GetType());
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class TWriter, class TConsumerAdapter>
+TIntrusivePtr<TWriter> CreateAdaptedWriterForYson(
+ const IAttributeDictionary& attributes,
+ TTableSchemaPtr schema,
+ IAsyncOutputStreamPtr output)
+{
+ auto config = ConvertTo<TYsonFormatConfigPtr>(&attributes);
+ return New<TConsumerAdapter>(std::move(output), std::move(schema), [=] (IZeroCopyOutput* buffer) {
+ if (config->Format == EYsonFormat::Binary) {
+ return std::unique_ptr<IFlushableYsonConsumer>(new TBufferedBinaryYsonWriter(
+ buffer,
+ EYsonType::ListFragment,
+ true));
+ } else {
+ return std::unique_ptr<IFlushableYsonConsumer>(new TYsonWriter(
+ buffer,
+ config->Format,
+ EYsonType::ListFragment));
+ }
+ });
+}
+
+template <class TWriter, class TConsumerAdapter>
+TIntrusivePtr<TWriter> CreateAdaptedWriterForJson(
+ const IAttributeDictionary& attributes,
+ TTableSchemaPtr schema,
+ IAsyncOutputStreamPtr output)
+{
+ auto config = ConvertTo<TJsonFormatConfigPtr>(&attributes);
+ return New<TConsumerAdapter>(std::move(output), std::move(schema), [&] (IOutputStream* buffer) {
+ return CreateJsonConsumer(buffer, EYsonType::ListFragment, config);
+ });
+}
+
+IUnversionedRowsetWriterPtr CreateSchemafulWriterForFormat(
+ const TFormat& format,
+ TTableSchemaPtr schema,
+ IAsyncOutputStreamPtr output)
+{
+ switch (format.GetType()) {
+ case EFormatType::Yson:
+ return CreateAdaptedWriterForYson<IUnversionedRowsetWriter, TSchemafulWriter>(format.Attributes(), std::move(schema), std::move(output));
+ case EFormatType::Json:
+ return CreateAdaptedWriterForJson<IUnversionedRowsetWriter, TSchemafulWriter>(format.Attributes(), std::move(schema), std::move(output));
+ case EFormatType::SchemafulDsv:
+ return CreateSchemafulWriterForSchemafulDsv(format.Attributes(), std::move(schema), std::move(output));
+ case EFormatType::WebJson: {
+ auto webJsonFormatConfig = ConvertTo<TWebJsonFormatConfigPtr>(&format.Attributes());
+ webJsonFormatConfig->SkipSystemColumns = false;
+
+ return CreateWriterForWebJson(
+ std::move(webJsonFormatConfig),
+ TNameTable::FromSchema(*schema),
+ {schema},
+ std::move(output));
+ }
+ default:
+ THROW_ERROR_EXCEPTION("Unsupported output format %Qlv",
+ format.GetType());
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+IVersionedWriterPtr CreateVersionedWriterForFormat(
+ const TFormat& format,
+ NTableClient::TTableSchemaPtr schema,
+ NConcurrency::IAsyncOutputStreamPtr output)
+{
+ switch (format.GetType()) {
+ case EFormatType::Yson:
+ return CreateAdaptedWriterForYson<IVersionedWriter, TVersionedWriter>(format.Attributes(), std::move(schema), std::move(output));
+ case EFormatType::Json:
+ return CreateAdaptedWriterForJson<IVersionedWriter, TVersionedWriter>(format.Attributes(), std::move(schema), std::move(output));
+ default:
+ THROW_ERROR_EXCEPTION("Unsupported output format %Qlv", format.GetType());
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+ISchemalessFormatWriterPtr CreateStaticTableWriterForFormat(
+ const TFormat& format,
+ TNameTablePtr nameTable,
+ const std::vector<TTableSchemaPtr>& tableSchemas,
+ NConcurrency::IAsyncOutputStreamPtr output,
+ bool enableContextSaving,
+ TControlAttributesConfigPtr controlAttributesConfig,
+ int keyColumnCount)
+{
+ switch (format.GetType()) {
+ case EFormatType::Dsv:
+ return CreateSchemalessWriterForDsv(
+ format.Attributes(),
+ nameTable,
+ std::move(output),
+ enableContextSaving,
+ controlAttributesConfig,
+ keyColumnCount);
+ case EFormatType::Yamr:
+ return CreateSchemalessWriterForYamr(
+ format.Attributes(),
+ nameTable,
+ std::move(output),
+ enableContextSaving,
+ controlAttributesConfig,
+ keyColumnCount);
+ case EFormatType::YamredDsv:
+ return CreateSchemalessWriterForYamredDsv(
+ format.Attributes(),
+ nameTable,
+ std::move(output),
+ enableContextSaving,
+ controlAttributesConfig,
+ keyColumnCount);
+ case EFormatType::SchemafulDsv:
+ return CreateSchemalessWriterForSchemafulDsv(
+ format.Attributes(),
+ nameTable,
+ std::move(output),
+ enableContextSaving,
+ controlAttributesConfig,
+ keyColumnCount);
+ case EFormatType::Protobuf:
+ return CreateWriterForProtobuf(
+ format.Attributes(),
+ tableSchemas,
+ nameTable,
+ std::move(output),
+ enableContextSaving,
+ controlAttributesConfig,
+ keyColumnCount);
+ case EFormatType::WebJson:
+ return CreateWriterForWebJson(
+ format.Attributes(),
+ nameTable,
+ tableSchemas,
+ std::move(output));
+ case EFormatType::Skiff:
+ return CreateWriterForSkiff(
+ format.Attributes(),
+ nameTable,
+ tableSchemas,
+ std::move(output),
+ enableContextSaving,
+ controlAttributesConfig,
+ keyColumnCount);
+ default:
+ auto adapter = New<TSchemalessWriterAdapter>(
+ nameTable,
+ std::move(output),
+ enableContextSaving,
+ controlAttributesConfig,
+ keyColumnCount);
+ adapter->Init(tableSchemas, format);
+ return adapter;
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TYsonProducer CreateProducerForDsv(
+ EDataType dataType,
+ const IAttributeDictionary& attributes,
+ IInputStream* input)
+{
+ if (dataType != EDataType::Tabular) {
+ THROW_ERROR_EXCEPTION("DSV is supported only for tabular data");
+ }
+ auto config = ConvertTo<TDsvFormatConfigPtr>(&attributes);
+ return BIND([=] (IYsonConsumer* consumer) {
+ ParseDsv(input, consumer, config);
+ });
+}
+
+TYsonProducer CreateProducerForYamr(
+ EDataType dataType,
+ const IAttributeDictionary& attributes,
+ IInputStream* input)
+{
+ if (dataType != EDataType::Tabular) {
+ THROW_ERROR_EXCEPTION("YAMR is supported only for tabular data");
+ }
+ auto config = ConvertTo<TYamrFormatConfigPtr>(&attributes);
+ return BIND([=] (IYsonConsumer* consumer) {
+ ParseYamr(input, consumer, config);
+ });
+}
+
+TYsonProducer CreateProducerForYamredDsv(
+ EDataType dataType,
+ const IAttributeDictionary& attributes,
+ IInputStream* input)
+{
+ if (dataType != EDataType::Tabular) {
+ THROW_ERROR_EXCEPTION("Yamred DSV is supported only for tabular data");
+ }
+ auto config = ConvertTo<TYamredDsvFormatConfigPtr>(&attributes);
+ return BIND([=] (IYsonConsumer* consumer) {
+ ParseYamredDsv(input, consumer, config);
+ });
+}
+
+TYsonProducer CreateProducerForSchemafulDsv(
+ EDataType dataType,
+ const IAttributeDictionary& attributes,
+ IInputStream* input)
+{
+ if (dataType != EDataType::Tabular) {
+ THROW_ERROR_EXCEPTION("Schemaful DSV is supported only for tabular data");
+ }
+ auto config = ConvertTo<TSchemafulDsvFormatConfigPtr>(&attributes);
+ return BIND([=] (IYsonConsumer* consumer) {
+ ParseSchemafulDsv(input, consumer, config);
+ });
+}
+
+TYsonProducer CreateProducerForJson(
+ EDataType dataType,
+ const IAttributeDictionary& attributes,
+ IInputStream* input)
+{
+ auto ysonType = DataTypeToYsonType(dataType);
+ auto config = ConvertTo<TJsonFormatConfigPtr>(&attributes);
+ return BIND([=] (IYsonConsumer* consumer) {
+ ParseJson(input, consumer, config, ysonType);
+ });
+}
+
+TYsonProducer CreateProducerForYson(EDataType dataType, IInputStream* input)
+{
+ auto ysonType = DataTypeToYsonType(dataType);
+ return ConvertToProducer(TYsonInput(input, ysonType));
+}
+
+TYsonProducer CreateProducerForFormat(const TFormat& format, EDataType dataType, IInputStream* input)
+{
+ switch (format.GetType()) {
+ case EFormatType::Yson:
+ return CreateProducerForYson(dataType, input);
+ case EFormatType::Json:
+ return CreateProducerForJson(dataType, format.Attributes(), input);
+ case EFormatType::Dsv:
+ return CreateProducerForDsv(dataType, format.Attributes(), input);
+ case EFormatType::Yamr:
+ return CreateProducerForYamr(dataType, format.Attributes(), input);
+ case EFormatType::YamredDsv:
+ return CreateProducerForYamredDsv(dataType, format.Attributes(), input);
+ case EFormatType::SchemafulDsv:
+ return CreateProducerForSchemafulDsv(dataType, format.Attributes(), input);
+ default:
+ THROW_ERROR_EXCEPTION("Unsupported input format %Qlv",
+ format.GetType());
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template<class TBase>
+struct TParserAdapter
+ : public TBase
+ , public IParser
+{
+public:
+ template<class... TArgs>
+ TParserAdapter(TArgs&&... args)
+ : TBase(std::forward<TArgs>(args)...)
+ { }
+
+ void Read(TStringBuf data) override
+ {
+ TBase::Read(data);
+ }
+
+ void Finish() override
+ {
+ TBase::Finish();
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::unique_ptr<IParser> CreateParserForFormat(const TFormat& format, EDataType dataType, IYsonConsumer* consumer)
+{
+ switch (format.GetType()) {
+ case EFormatType::Yson:
+ return CreateParserForYson(consumer, DataTypeToYsonType(dataType));
+ case EFormatType::Json: {
+ auto config = ConvertTo<TJsonFormatConfigPtr>(&format.Attributes());
+ return std::unique_ptr<IParser>(new TParserAdapter<TJsonParser>(consumer, config, DataTypeToYsonType(dataType)));
+ }
+ case EFormatType::Dsv: {
+ auto config = ConvertTo<TDsvFormatConfigPtr>(&format.Attributes());
+ return CreateParserForDsv(consumer, config);
+ }
+ case EFormatType::Yamr: {
+ auto config = ConvertTo<TYamrFormatConfigPtr>(&format.Attributes());
+ return CreateParserForYamr(consumer, config);
+ }
+ case EFormatType::YamredDsv: {
+ auto config = ConvertTo<TYamredDsvFormatConfigPtr>(&format.Attributes());
+ return CreateParserForYamredDsv(consumer, config);
+ }
+ case EFormatType::SchemafulDsv: {
+ auto config = ConvertTo<TSchemafulDsvFormatConfigPtr>(&format.Attributes());
+ return CreateParserForSchemafulDsv(consumer, config);
+ }
+ default:
+ THROW_ERROR_EXCEPTION("Unsupported input format %Qlv",
+ format.GetType());
+ }
+}
+
+std::vector<std::unique_ptr<IParser>> CreateParsersForFormat(
+ const TFormat& format,
+ const std::vector<IValueConsumer*>& valueConsumers)
+{
+ std::vector<std::unique_ptr<IParser>> parsers;
+
+ auto parserCount = std::ssize(valueConsumers);
+ parsers.reserve(parserCount);
+
+ switch (format.GetType()) {
+ case EFormatType::Protobuf: {
+ auto config = ConvertTo<TProtobufFormatConfigPtr>(&format.Attributes());
+ // TODO(max42): implementation of CreateParserForProtobuf clones config
+ // on each call, so this loop works in quadratic time. Fix that.
+ for (int tableIndex = 0; tableIndex < parserCount; ++tableIndex) {
+ parsers.emplace_back(CreateParserForProtobuf(valueConsumers[tableIndex], config, tableIndex));
+ }
+ break;
+ }
+ case EFormatType::Skiff: {
+ auto config = ConvertTo<TSkiffFormatConfigPtr>(&format.Attributes());
+ auto skiffSchemas = ParseSkiffSchemas(config->SkiffSchemaRegistry, config->TableSkiffSchemas);
+ for (int tableIndex = 0; tableIndex < parserCount; ++tableIndex) {
+ parsers.emplace_back(CreateParserForSkiff(valueConsumers[tableIndex], skiffSchemas, config, tableIndex));
+ }
+ break;
+ }
+ default:
+ for (int tableIndex = 0; tableIndex < parserCount; ++tableIndex) {
+ parsers.emplace_back(std::make_unique<TTableParserAdapter>(format, valueConsumers, tableIndex));
+ }
+ break;
+ }
+
+ return parsers;
+}
+
+std::unique_ptr<IParser> CreateParserForFormat(
+ const TFormat& format,
+ IValueConsumer* valueConsumer)
+{
+ auto parsers = CreateParsersForFormat(format, {valueConsumer});
+ return std::move(parsers.front());
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void ConfigureEscapeTable(const TSchemafulDsvFormatConfigPtr& config, TEscapeTable* escapeTable)
+{
+ std::vector<char> stopSymbols = {config->RecordSeparator, config->FieldSeparator};
+ if (config->EnableEscaping) {
+ stopSymbols.push_back(config->EscapingSymbol);
+ escapeTable->EscapingSymbol = config->EscapingSymbol;
+ }
+ escapeTable->FillStops(stopSymbols);
+}
+
+void ConfigureEscapeTables(
+ const TDsvFormatConfigBasePtr& config,
+ bool addCarriageReturn,
+ TEscapeTable* keyEscapeTable,
+ TEscapeTable* valueEscapeTable)
+{
+ std::vector<char> stopSymbols = {config->RecordSeparator, config->FieldSeparator, '\0'};
+
+ if (config->EnableEscaping) {
+ stopSymbols.push_back(config->EscapingSymbol);
+ keyEscapeTable->EscapingSymbol = valueEscapeTable->EscapingSymbol = config->EscapingSymbol;
+ }
+
+ if (addCarriageReturn) {
+ stopSymbols.push_back('\r');
+ }
+
+ valueEscapeTable->FillStops(stopSymbols);
+
+ stopSymbols.push_back(config->KeyValueSeparator);
+ keyEscapeTable->FillStops(stopSymbols);
+}
+
+void ConfigureEscapeTables(
+ const TYamrFormatConfigBasePtr& config,
+ bool enableKeyEscaping,
+ bool enableValueEscaping,
+ bool escapingForWriter,
+ TEscapeTable* keyEscapeTable,
+ TEscapeTable* valueEscapeTable)
+{
+ std::vector<char> valueStopSymbols = {config->RecordSeparator};
+ std::vector<char> keyStopSymbols = {config->RecordSeparator, config->FieldSeparator};
+
+ if (enableKeyEscaping) {
+ if (escapingForWriter) {
+ keyStopSymbols.push_back('\0');
+ keyStopSymbols.push_back('\r');
+ }
+ keyStopSymbols.push_back(config->EscapingSymbol);
+ keyEscapeTable->EscapingSymbol = config->EscapingSymbol;
+ }
+
+ if (enableValueEscaping) {
+ if (escapingForWriter) {
+ valueStopSymbols.push_back('\0');
+ valueStopSymbols.push_back('\r');
+ }
+ valueStopSymbols.push_back(config->EscapingSymbol);
+ valueEscapeTable->EscapingSymbol = config->EscapingSymbol;
+ }
+
+ keyEscapeTable->FillStops(keyStopSymbols);
+ valueEscapeTable->FillStops(valueStopSymbols);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NFormats
diff --git a/yt/yt/library/formats/format.h b/yt/yt/library/formats/format.h
new file mode 100644
index 0000000000..3a85d7f1a4
--- /dev/null
+++ b/yt/yt/library/formats/format.h
@@ -0,0 +1,109 @@
+#pragma once
+
+#include <yt/yt/client/formats/public.h>
+#include <yt/yt/client/formats/format.h>
+
+#include <yt/yt/client/table_client/public.h>
+#include <yt/yt/client/table_client/unversioned_writer.h>
+
+#include <yt/yt/core/concurrency/public.h>
+
+#include <yt/yt/core/misc/property.h>
+
+#include <yt/yt/core/yson/public.h>
+
+#include <yt/yt/core/ytree/attributes.h>
+
+namespace NYT::NFormats {
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct ISchemalessFormatWriter
+ : public NTableClient::IUnversionedRowsetWriter
+{
+ virtual TBlob GetContext() const = 0;
+
+ virtual i64 GetWrittenSize() const = 0;
+
+ [[nodiscard]] virtual TFuture<void> Flush() = 0;
+
+ virtual bool WriteBatch(NTableClient::IUnversionedRowBatchPtr rowBatch) = 0;
+};
+
+DEFINE_REFCOUNTED_TYPE(ISchemalessFormatWriter)
+
+////////////////////////////////////////////////////////////////////////////////
+
+// This function historically creates format for reading dynamic tables.
+// It slightly differs from format for static tables. :(
+NTableClient::IUnversionedRowsetWriterPtr CreateSchemafulWriterForFormat(
+ const TFormat& Format,
+ NTableClient::TTableSchemaPtr schema,
+ NConcurrency::IAsyncOutputStreamPtr output);
+
+////////////////////////////////////////////////////////////////////////////////
+
+NTableClient::IVersionedWriterPtr CreateVersionedWriterForFormat(
+ const TFormat& Format,
+ NTableClient::TTableSchemaPtr schema,
+ NConcurrency::IAsyncOutputStreamPtr output);
+
+////////////////////////////////////////////////////////////////////////////////
+
+ISchemalessFormatWriterPtr CreateStaticTableWriterForFormat(
+ const TFormat& format,
+ NTableClient::TNameTablePtr nameTable,
+ const std::vector<NTableClient::TTableSchemaPtr>& tableSchemas,
+ NConcurrency::IAsyncOutputStreamPtr output,
+ bool enableContextSaving,
+ TControlAttributesConfigPtr controlAttributesConfig,
+ int keyColumnCount);
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::unique_ptr<NYson::IFlushableYsonConsumer> CreateConsumerForFormat(
+ const TFormat& format,
+ EDataType dataType,
+ IZeroCopyOutput* output);
+
+NYson::TYsonProducer CreateProducerForFormat(
+ const TFormat& format,
+ EDataType dataType,
+ IInputStream* input);
+
+std::unique_ptr<IParser> CreateParserForFormat(
+ const TFormat& format,
+ EDataType dataType,
+ NYson::IYsonConsumer* consumer);
+
+//! Create own parser for each value consumer.
+std::vector<std::unique_ptr<IParser>> CreateParsersForFormat(
+ const TFormat& format,
+ const std::vector<NTableClient::IValueConsumer*>& valueConsumers);
+
+//! Create parser for value consumer. Helper for previous method in singular case.
+std::unique_ptr<IParser> CreateParserForFormat(
+ const TFormat& format,
+ NTableClient::IValueConsumer* valueConsumer);
+
+////////////////////////////////////////////////////////////////////////////////
+
+void ConfigureEscapeTable(const TSchemafulDsvFormatConfigPtr& config, TEscapeTable* escapeTable);
+
+void ConfigureEscapeTables(
+ const TDsvFormatConfigBasePtr& config,
+ bool addCarriageReturn,
+ TEscapeTable* keyEscapeTable,
+ TEscapeTable* valueEscapeTable);
+
+void ConfigureEscapeTables(
+ const TYamrFormatConfigBasePtr& config,
+ bool enableKeyEscaping,
+ bool enableValueEscaping,
+ bool escapingForWriter,
+ TEscapeTable* keyEscapeTable,
+ TEscapeTable* valueEscapeTable);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NFormats
diff --git a/yt/yt/client/formats/helpers.cpp b/yt/yt/library/formats/helpers.cpp
index 9609b447fb..9609b447fb 100644
--- a/yt/yt/client/formats/helpers.cpp
+++ b/yt/yt/library/formats/helpers.cpp
diff --git a/yt/yt/client/formats/helpers.h b/yt/yt/library/formats/helpers.h
index 526a95db0f..1d73d9279f 100644
--- a/yt/yt/client/formats/helpers.h
+++ b/yt/yt/library/formats/helpers.h
@@ -1,6 +1,6 @@
#pragma once
-#include "public.h"
+#include <yt/yt/client/formats/public.h>
#include <yt/yt/client/table_client/public.h>
diff --git a/yt/yt/client/formats/lenval_control_constants.h b/yt/yt/library/formats/lenval_control_constants.h
index ccbbc71fe4..ccbbc71fe4 100644
--- a/yt/yt/client/formats/lenval_control_constants.h
+++ b/yt/yt/library/formats/lenval_control_constants.h
diff --git a/yt/yt/client/formats/private.h b/yt/yt/library/formats/private.h
index 805168dd30..805168dd30 100644
--- a/yt/yt/client/formats/private.h
+++ b/yt/yt/library/formats/private.h
diff --git a/yt/yt/client/formats/protobuf.cpp b/yt/yt/library/formats/protobuf.cpp
index e4c29652fa..e4c29652fa 100644
--- a/yt/yt/client/formats/protobuf.cpp
+++ b/yt/yt/library/formats/protobuf.cpp
diff --git a/yt/yt/client/formats/protobuf.h b/yt/yt/library/formats/protobuf.h
index accf1e8b71..7f0b3e38f6 100644
--- a/yt/yt/client/formats/protobuf.h
+++ b/yt/yt/library/formats/protobuf.h
@@ -1,6 +1,7 @@
#pragma once
-#include "config.h"
+#include <yt/yt/client/formats/config.h>
+
#include "private.h"
#include <google/protobuf/descriptor.h>
diff --git a/yt/yt/client/formats/protobuf_options.cpp b/yt/yt/library/formats/protobuf_options.cpp
index 5e9543d844..5e9543d844 100644
--- a/yt/yt/client/formats/protobuf_options.cpp
+++ b/yt/yt/library/formats/protobuf_options.cpp
diff --git a/yt/yt/client/formats/protobuf_options.h b/yt/yt/library/formats/protobuf_options.h
index 03caaadaff..7bd51bd54f 100644
--- a/yt/yt/client/formats/protobuf_options.h
+++ b/yt/yt/library/formats/protobuf_options.h
@@ -1,6 +1,6 @@
#pragma once
-#include "config.h"
+#include <yt/yt/client/formats/config.h>
#include <yt/yt_proto/yt/formats/extension.pb.h>
diff --git a/yt/yt/client/formats/protobuf_parser.cpp b/yt/yt/library/formats/protobuf_parser.cpp
index abaef0cb22..925dabd9ff 100644
--- a/yt/yt/client/formats/protobuf_parser.cpp
+++ b/yt/yt/library/formats/protobuf_parser.cpp
@@ -1,7 +1,9 @@
#include "protobuf_parser.h"
#include "protobuf.h"
-#include "parser.h"
+
+#include <yt/yt/client/formats/parser.h>
+
#include "yson_map_to_unversioned_value.h"
#include <yt/yt/client/table_client/helpers.h>
diff --git a/yt/yt/client/formats/protobuf_parser.h b/yt/yt/library/formats/protobuf_parser.h
index 1ac356069f..14f32192b1 100644
--- a/yt/yt/client/formats/protobuf_parser.h
+++ b/yt/yt/library/formats/protobuf_parser.h
@@ -1,7 +1,7 @@
#pragma once
-#include "public.h"
-#include "config.h"
+#include <yt/yt/client/formats/public.h>
+#include <yt/yt/client/formats/config.h>
namespace NYT::NFormats {
diff --git a/yt/yt/client/formats/protobuf_writer.cpp b/yt/yt/library/formats/protobuf_writer.cpp
index f4321cd68a..f4321cd68a 100644
--- a/yt/yt/client/formats/protobuf_writer.cpp
+++ b/yt/yt/library/formats/protobuf_writer.cpp
diff --git a/yt/yt/client/formats/protobuf_writer.h b/yt/yt/library/formats/protobuf_writer.h
index a6f7936405..d726f92e4c 100644
--- a/yt/yt/client/formats/protobuf_writer.h
+++ b/yt/yt/library/formats/protobuf_writer.h
@@ -1,6 +1,6 @@
#pragma once
-#include "public.h"
+#include <yt/yt/client/formats/public.h>
#include <yt/yt/client/table_client/public.h>
diff --git a/yt/yt/client/formats/schemaful_dsv_parser.cpp b/yt/yt/library/formats/schemaful_dsv_parser.cpp
index 8fb0bda433..3149f28851 100644
--- a/yt/yt/client/formats/schemaful_dsv_parser.cpp
+++ b/yt/yt/library/formats/schemaful_dsv_parser.cpp
@@ -1,9 +1,10 @@
#include "schemaful_dsv_parser.h"
-#include "parser.h"
#include "escape.h"
#include "format.h"
+#include <yt/yt/client/formats/parser.h>
+
#include <yt/yt/client/table_client/public.h>
namespace NYT::NFormats {
diff --git a/yt/yt/client/formats/schemaful_dsv_parser.h b/yt/yt/library/formats/schemaful_dsv_parser.h
index cc01a9b399..164b51ebb2 100644
--- a/yt/yt/client/formats/schemaful_dsv_parser.h
+++ b/yt/yt/library/formats/schemaful_dsv_parser.h
@@ -1,7 +1,7 @@
#pragma once
-#include "public.h"
-#include "config.h"
+#include <yt/yt/client/formats/public.h>
+#include <yt/yt/client/formats/config.h>
#include <yt/yt/core/yson/consumer.h>
diff --git a/yt/yt/client/formats/schemaful_dsv_writer.cpp b/yt/yt/library/formats/schemaful_dsv_writer.cpp
index 17b9210ff9..17b9210ff9 100644
--- a/yt/yt/client/formats/schemaful_dsv_writer.cpp
+++ b/yt/yt/library/formats/schemaful_dsv_writer.cpp
diff --git a/yt/yt/client/formats/schemaful_dsv_writer.h b/yt/yt/library/formats/schemaful_dsv_writer.h
index c420f9e7ea..a4c990b0a4 100644
--- a/yt/yt/client/formats/schemaful_dsv_writer.h
+++ b/yt/yt/library/formats/schemaful_dsv_writer.h
@@ -1,7 +1,8 @@
#pragma once
-#include "public.h"
-#include "config.h"
+#include <yt/yt/client/formats/public.h>
+#include <yt/yt/client/formats/config.h>
+
#include "helpers.h"
#include "schemaless_writer_adapter.h"
diff --git a/yt/yt/client/formats/schemaful_writer.cpp b/yt/yt/library/formats/schemaful_writer.cpp
index c7f72d5544..c7f72d5544 100644
--- a/yt/yt/client/formats/schemaful_writer.cpp
+++ b/yt/yt/library/formats/schemaful_writer.cpp
diff --git a/yt/yt/client/formats/schemaful_writer.h b/yt/yt/library/formats/schemaful_writer.h
index 2d4848431f..0a627be388 100644
--- a/yt/yt/client/formats/schemaful_writer.h
+++ b/yt/yt/library/formats/schemaful_writer.h
@@ -1,6 +1,6 @@
#pragma once
-#include "public.h"
+#include <yt/yt/client/formats/public.h>
#include <yt/yt/client/complex_types/yson_format_conversion.h>
diff --git a/yt/yt/client/formats/schemaless_writer_adapter.cpp b/yt/yt/library/formats/schemaless_writer_adapter.cpp
index 8970177132..68c95b1b2d 100644
--- a/yt/yt/client/formats/schemaless_writer_adapter.cpp
+++ b/yt/yt/library/formats/schemaless_writer_adapter.cpp
@@ -1,5 +1,6 @@
#include "schemaless_writer_adapter.h"
-#include "config.h"
+
+#include <yt/yt/client/formats/config.h>
#include <yt/yt/client/table_client/name_table.h>
#include <yt/yt/client/table_client/row_batch.h>
diff --git a/yt/yt/client/formats/schemaless_writer_adapter.h b/yt/yt/library/formats/schemaless_writer_adapter.h
index 52c85c7ffa..4055d1968f 100644
--- a/yt/yt/client/formats/schemaless_writer_adapter.h
+++ b/yt/yt/library/formats/schemaless_writer_adapter.h
@@ -1,10 +1,11 @@
#pragma once
-#include "public.h"
#include "format.h"
#include "helpers.h"
#include "unversioned_value_yson_writer.h"
+#include <yt/yt/client/formats/public.h>
+
#include <yt/yt/client/table_client/unversioned_writer.h>
#include <yt/yt/core/concurrency/public.h>
diff --git a/yt/yt/client/formats/skiff_parser.cpp b/yt/yt/library/formats/skiff_parser.cpp
index 8b2d71238b..77d887e9ce 100644
--- a/yt/yt/client/formats/skiff_parser.cpp
+++ b/yt/yt/library/formats/skiff_parser.cpp
@@ -2,7 +2,9 @@
#include "skiff_yson_converter.h"
#include "helpers.h"
-#include "parser.h"
+
+#include <yt/yt/client/formats/parser.h>
+
#include "yson_map_to_unversioned_value.h"
#include <yt/yt/library/decimal/decimal.h>
diff --git a/yt/yt/client/formats/skiff_parser.h b/yt/yt/library/formats/skiff_parser.h
index 35cea6666e..7321054511 100644
--- a/yt/yt/client/formats/skiff_parser.h
+++ b/yt/yt/library/formats/skiff_parser.h
@@ -1,7 +1,7 @@
#pragma once
-#include "public.h"
-#include "config.h"
+#include <yt/yt/client/formats/public.h>
+#include <yt/yt/client/formats/config.h>
#include <library/cpp/skiff/skiff.h>
diff --git a/yt/yt/client/formats/skiff_writer.cpp b/yt/yt/library/formats/skiff_writer.cpp
index 1eaf09ce8d..4d84770ccc 100644
--- a/yt/yt/client/formats/skiff_writer.cpp
+++ b/yt/yt/library/formats/skiff_writer.cpp
@@ -1,9 +1,10 @@
#include "skiff_writer.h"
-#include "public.h"
#include "schemaless_writer_adapter.h"
#include "skiff_yson_converter.h"
+#include <yt/yt/client/formats/public.h>
+
#include <yt/yt/client/table_client/name_table.h>
#include <yt/yt/client/table_client/logical_type.h>
#include <yt/yt/client/table_client/schema.h>
diff --git a/yt/yt/client/formats/skiff_writer.h b/yt/yt/library/formats/skiff_writer.h
index 9cd8f66268..0e66a54156 100644
--- a/yt/yt/client/formats/skiff_writer.h
+++ b/yt/yt/library/formats/skiff_writer.h
@@ -1,6 +1,6 @@
#pragma once
-#include "public.h"
+#include <yt/yt/client/formats/public.h>
#include <yt/yt/client/table_client/public.h>
diff --git a/yt/yt/client/formats/skiff_yson_converter-inl.h b/yt/yt/library/formats/skiff_yson_converter-inl.h
index 2c667b35d6..2c667b35d6 100644
--- a/yt/yt/client/formats/skiff_yson_converter-inl.h
+++ b/yt/yt/library/formats/skiff_yson_converter-inl.h
diff --git a/yt/yt/client/formats/skiff_yson_converter.cpp b/yt/yt/library/formats/skiff_yson_converter.cpp
index 171bfd9a9a..171bfd9a9a 100644
--- a/yt/yt/client/formats/skiff_yson_converter.cpp
+++ b/yt/yt/library/formats/skiff_yson_converter.cpp
diff --git a/yt/yt/client/formats/skiff_yson_converter.h b/yt/yt/library/formats/skiff_yson_converter.h
index 233b106729..233b106729 100644
--- a/yt/yt/client/formats/skiff_yson_converter.h
+++ b/yt/yt/library/formats/skiff_yson_converter.h
diff --git a/yt/yt/client/formats/unversioned_value_yson_writer.cpp b/yt/yt/library/formats/unversioned_value_yson_writer.cpp
index c89dd48373..c89dd48373 100644
--- a/yt/yt/client/formats/unversioned_value_yson_writer.cpp
+++ b/yt/yt/library/formats/unversioned_value_yson_writer.cpp
diff --git a/yt/yt/client/formats/unversioned_value_yson_writer.h b/yt/yt/library/formats/unversioned_value_yson_writer.h
index 1b6f671298..7799f3c3a3 100644
--- a/yt/yt/client/formats/unversioned_value_yson_writer.h
+++ b/yt/yt/library/formats/unversioned_value_yson_writer.h
@@ -1,7 +1,7 @@
#pragma once
-#include "public.h"
-#include "config.h"
+#include <yt/yt/client/formats/public.h>
+#include <yt/yt/client/formats/config.h>
#include <yt/yt/client/complex_types/yson_format_conversion.h>
@@ -25,4 +25,4 @@ private:
////////////////////////////////////////////////////////////////////////////////
-} // namespace NYT::NFormats \ No newline at end of file
+} // namespace NYT::NFormats
diff --git a/yt/yt/client/formats/versioned_writer.cpp b/yt/yt/library/formats/versioned_writer.cpp
index 056c86d5e1..056c86d5e1 100644
--- a/yt/yt/client/formats/versioned_writer.cpp
+++ b/yt/yt/library/formats/versioned_writer.cpp
diff --git a/yt/yt/client/formats/versioned_writer.h b/yt/yt/library/formats/versioned_writer.h
index 215f4e6537..025ba8e1fe 100644
--- a/yt/yt/client/formats/versioned_writer.h
+++ b/yt/yt/library/formats/versioned_writer.h
@@ -1,6 +1,6 @@
#pragma once
-#include "public.h"
+#include <yt/yt/client/formats/public.h>
#include <yt/yt/client/table_client/versioned_writer.h>
#include <yt/yt/client/table_client/schema.h>
diff --git a/yt/yt/client/formats/web_json_writer.cpp b/yt/yt/library/formats/web_json_writer.cpp
index d4c7293c85..9233f55c6f 100644
--- a/yt/yt/client/formats/web_json_writer.cpp
+++ b/yt/yt/library/formats/web_json_writer.cpp
@@ -1,8 +1,9 @@
#include "web_json_writer.h"
-#include "config.h"
+#include <yt/yt/client/formats/public.h>
+#include <yt/yt/client/formats/config.h>
+
#include "format.h"
-#include "public.h"
#include "schemaless_writer_adapter.h"
#include "yql_yson_converter.h"
diff --git a/yt/yt/client/formats/web_json_writer.h b/yt/yt/library/formats/web_json_writer.h
index 25d476a98f..a2ca099260 100644
--- a/yt/yt/client/formats/web_json_writer.h
+++ b/yt/yt/library/formats/web_json_writer.h
@@ -1,7 +1,8 @@
#pragma once
-#include "public.h"
-#include "config.h"
+#include <yt/yt/client/formats/public.h>
+#include <yt/yt/client/formats/config.h>
+
#include "helpers.h"
#include "schemaless_writer_adapter.h"
diff --git a/yt/yt/library/formats/ya.make b/yt/yt/library/formats/ya.make
new file mode 100644
index 0000000000..72e875b867
--- /dev/null
+++ b/yt/yt/library/formats/ya.make
@@ -0,0 +1,48 @@
+LIBRARY()
+
+INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
+
+SRCS(
+ arrow_writer.cpp
+ dsv_parser.cpp
+ dsv_writer.cpp
+ escape.cpp
+ format.cpp
+ helpers.cpp
+ protobuf.cpp
+ protobuf_options.cpp
+ protobuf_parser.cpp
+ protobuf_writer.cpp
+ schemaful_dsv_parser.cpp
+ schemaful_dsv_writer.cpp
+ schemaful_writer.cpp
+ schemaless_writer_adapter.cpp
+ skiff_parser.cpp
+ skiff_writer.cpp
+ skiff_yson_converter.cpp
+ unversioned_value_yson_writer.cpp
+ versioned_writer.cpp
+ web_json_writer.cpp
+ yamred_dsv_parser.cpp
+ yamred_dsv_writer.cpp
+ yamr_parser_base.cpp
+ yamr_parser.cpp
+ yamr_writer_base.cpp
+ yamr_writer.cpp
+ yql_yson_converter.cpp
+ yson_map_to_unversioned_value.cpp
+ yson_parser.cpp
+)
+
+PEERDIR(
+ yt/yt/client
+ yt/yt/client/formats
+ yt/yt/client/arrow/fbs
+ yt/yt/library/column_converters
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ unittests
+)
diff --git a/yt/yt/client/formats/yamr_parser.cpp b/yt/yt/library/formats/yamr_parser.cpp
index 0465741743..0465741743 100644
--- a/yt/yt/client/formats/yamr_parser.cpp
+++ b/yt/yt/library/formats/yamr_parser.cpp
diff --git a/yt/yt/client/formats/yamr_parser.h b/yt/yt/library/formats/yamr_parser.h
index 10b5185d8e..3e7791a930 100644
--- a/yt/yt/client/formats/yamr_parser.h
+++ b/yt/yt/library/formats/yamr_parser.h
@@ -1,7 +1,7 @@
#pragma once
-#include "public.h"
-#include "config.h"
+#include <yt/yt/client/formats/public.h>
+#include <yt/yt/client/formats/config.h>
namespace NYT::NFormats {
diff --git a/yt/yt/client/formats/yamr_parser_base.cpp b/yt/yt/library/formats/yamr_parser_base.cpp
index 3a7a7d8833..935faaed80 100644
--- a/yt/yt/client/formats/yamr_parser_base.cpp
+++ b/yt/yt/library/formats/yamr_parser_base.cpp
@@ -1,7 +1,8 @@
#include "yamr_parser_base.h"
#include "format.h"
-#include "config.h"
+
+#include <yt/yt/client/formats/config.h>
#include <yt/yt/client/table_client/public.h>
diff --git a/yt/yt/client/formats/yamr_parser_base.h b/yt/yt/library/formats/yamr_parser_base.h
index 240a2855a8..56968d4908 100644
--- a/yt/yt/client/formats/yamr_parser_base.h
+++ b/yt/yt/library/formats/yamr_parser_base.h
@@ -1,6 +1,6 @@
#pragma once
-#include "parser.h"
+#include <yt/yt/client/formats/parser.h>
#include "escape.h"
diff --git a/yt/yt/client/formats/yamr_writer.cpp b/yt/yt/library/formats/yamr_writer.cpp
index 4408b35568..4408b35568 100644
--- a/yt/yt/client/formats/yamr_writer.cpp
+++ b/yt/yt/library/formats/yamr_writer.cpp
diff --git a/yt/yt/client/formats/yamr_writer.h b/yt/yt/library/formats/yamr_writer.h
index d381b1d4bd..68b89efb26 100644
--- a/yt/yt/client/formats/yamr_writer.h
+++ b/yt/yt/library/formats/yamr_writer.h
@@ -1,7 +1,8 @@
#pragma once
-#include "public.h"
-#include "config.h"
+#include <yt/yt/client/formats/public.h>
+#include <yt/yt/client/formats/config.h>
+
#include "helpers.h"
#include "yamr_writer_base.h"
diff --git a/yt/yt/client/formats/yamr_writer_base.cpp b/yt/yt/library/formats/yamr_writer_base.cpp
index 1dc73de0d8..1dc73de0d8 100644
--- a/yt/yt/client/formats/yamr_writer_base.cpp
+++ b/yt/yt/library/formats/yamr_writer_base.cpp
diff --git a/yt/yt/client/formats/yamr_writer_base.h b/yt/yt/library/formats/yamr_writer_base.h
index d8483f6636..a8f0583be1 100644
--- a/yt/yt/client/formats/yamr_writer_base.h
+++ b/yt/yt/library/formats/yamr_writer_base.h
@@ -1,7 +1,8 @@
#pragma once
-#include "public.h"
-#include "config.h"
+#include <yt/yt/client/formats/public.h>
+#include <yt/yt/client/formats/config.h>
+
#include "helpers.h"
#include "escape.h"
#include "schemaless_writer_adapter.h"
diff --git a/yt/yt/client/formats/yamred_dsv_parser.cpp b/yt/yt/library/formats/yamred_dsv_parser.cpp
index 476e760ea6..476e760ea6 100644
--- a/yt/yt/client/formats/yamred_dsv_parser.cpp
+++ b/yt/yt/library/formats/yamred_dsv_parser.cpp
diff --git a/yt/yt/client/formats/yamred_dsv_parser.h b/yt/yt/library/formats/yamred_dsv_parser.h
index e260ee6b30..6a214aa674 100644
--- a/yt/yt/client/formats/yamred_dsv_parser.h
+++ b/yt/yt/library/formats/yamred_dsv_parser.h
@@ -1,7 +1,7 @@
#pragma once
-#include "public.h"
-#include "config.h"
+#include <yt/yt/client/formats/public.h>
+#include <yt/yt/client/formats/config.h>
namespace NYT::NFormats {
diff --git a/yt/yt/client/formats/yamred_dsv_writer.cpp b/yt/yt/library/formats/yamred_dsv_writer.cpp
index 956e771732..956e771732 100644
--- a/yt/yt/client/formats/yamred_dsv_writer.cpp
+++ b/yt/yt/library/formats/yamred_dsv_writer.cpp
diff --git a/yt/yt/client/formats/yamred_dsv_writer.h b/yt/yt/library/formats/yamred_dsv_writer.h
index 6d9050abe3..6d1f4071be 100644
--- a/yt/yt/client/formats/yamred_dsv_writer.h
+++ b/yt/yt/library/formats/yamred_dsv_writer.h
@@ -1,7 +1,8 @@
#pragma once
-#include "public.h"
-#include "config.h"
+#include <yt/yt/client/formats/public.h>
+#include <yt/yt/client/formats/config.h>
+
#include "helpers.h"
#include "yamr_writer_base.h"
diff --git a/yt/yt/client/formats/yql_yson_converter.cpp b/yt/yt/library/formats/yql_yson_converter.cpp
index dc57a1a69a..dc57a1a69a 100644
--- a/yt/yt/client/formats/yql_yson_converter.cpp
+++ b/yt/yt/library/formats/yql_yson_converter.cpp
diff --git a/yt/yt/client/formats/yql_yson_converter.h b/yt/yt/library/formats/yql_yson_converter.h
index 5caafea963..1e5fa80e7f 100644
--- a/yt/yt/client/formats/yql_yson_converter.h
+++ b/yt/yt/library/formats/yql_yson_converter.h
@@ -1,4 +1,4 @@
-#include "public.h"
+#include <yt/yt/client/formats/public.h>
#include <yt/yt/client/table_client/public.h>
diff --git a/yt/yt/client/formats/yson_map_to_unversioned_value.cpp b/yt/yt/library/formats/yson_map_to_unversioned_value.cpp
index fced3477f5..fced3477f5 100644
--- a/yt/yt/client/formats/yson_map_to_unversioned_value.cpp
+++ b/yt/yt/library/formats/yson_map_to_unversioned_value.cpp
diff --git a/yt/yt/client/formats/yson_map_to_unversioned_value.h b/yt/yt/library/formats/yson_map_to_unversioned_value.h
index 1e53ad6c88..023a0600a3 100644
--- a/yt/yt/client/formats/yson_map_to_unversioned_value.h
+++ b/yt/yt/library/formats/yson_map_to_unversioned_value.h
@@ -1,6 +1,6 @@
#pragma once
-#include "public.h"
+#include <yt/yt/client/formats/public.h>
#include <yt/yt/client/table_client/public.h>
#include <yt/yt/client/table_client/table_consumer.h>
diff --git a/yt/yt/client/formats/yson_parser.cpp b/yt/yt/library/formats/yson_parser.cpp
index 193b7caf31..d9bb6b303e 100644
--- a/yt/yt/client/formats/yson_parser.cpp
+++ b/yt/yt/library/formats/yson_parser.cpp
@@ -1,5 +1,6 @@
#include "yson_parser.h"
-#include "parser.h"
+
+#include <yt/yt/client/formats/parser.h>
#include <yt/yt/client/table_client/public.h>
diff --git a/yt/yt/client/formats/yson_parser.h b/yt/yt/library/formats/yson_parser.h
index a6e4880b30..5a5f9752da 100644
--- a/yt/yt/client/formats/yson_parser.h
+++ b/yt/yt/library/formats/yson_parser.h
@@ -1,6 +1,6 @@
#pragma once
-#include "public.h"
+#include <yt/yt/client/formats/public.h>
#include <yt/yt/core/yson/public.h>
diff --git a/yt/yt/client/unittests/logical_type_shortcuts.h b/yt/yt/library/logical_type_shortcuts/logical_type_shortcuts.h
index 7b22006a06..7b22006a06 100644
--- a/yt/yt/client/unittests/logical_type_shortcuts.h
+++ b/yt/yt/library/logical_type_shortcuts/logical_type_shortcuts.h