summaryrefslogtreecommitdiffstats
path: root/yt/cpp/mapreduce/interface/ut
diff options
context:
space:
mode:
authornadya73 <[email protected]>2024-07-02 23:10:50 +0300
committernadya73 <[email protected]>2024-07-02 23:21:03 +0300
commit5ea9afc5ee7edc24efa5f45b3a15e184872b0854 (patch)
tree4ccc339d97575cba8b3ed47b6f0615326bdb5324 /yt/cpp/mapreduce/interface/ut
parent96b239778766d32d5158aca805e08199b3c0a743 (diff)
[yt/cpp/mapreduce] YT-21595: Use gtest instead of ytest in all mapreduce tests
85671f0cf4f45b4f015fa2cc0d195b81c16c6e8a
Diffstat (limited to 'yt/cpp/mapreduce/interface/ut')
-rw-r--r--yt/cpp/mapreduce/interface/ut/common_ut.cpp353
-rw-r--r--yt/cpp/mapreduce/interface/ut/common_ut.h1
-rw-r--r--yt/cpp/mapreduce/interface/ut/config_ut.cpp17
-rw-r--r--yt/cpp/mapreduce/interface/ut/error_ut.cpp81
-rw-r--r--yt/cpp/mapreduce/interface/ut/format_ut.cpp232
-rw-r--r--yt/cpp/mapreduce/interface/ut/job_counters_ut.cpp100
-rw-r--r--yt/cpp/mapreduce/interface/ut/job_statistics_ut.cpp254
-rw-r--r--yt/cpp/mapreduce/interface/ut/operation_ut.cpp272
-rw-r--r--yt/cpp/mapreduce/interface/ut/proto3_ut.proto17
-rw-r--r--yt/cpp/mapreduce/interface/ut/protobuf_file_options_ut.cpp270
-rw-r--r--yt/cpp/mapreduce/interface/ut/protobuf_file_options_ut.proto142
-rw-r--r--yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.cpp444
-rw-r--r--yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.proto402
-rw-r--r--yt/cpp/mapreduce/interface/ut/serialize_ut.cpp46
-rw-r--r--yt/cpp/mapreduce/interface/ut/ya.make5
15 files changed, 2634 insertions, 2 deletions
diff --git a/yt/cpp/mapreduce/interface/ut/common_ut.cpp b/yt/cpp/mapreduce/interface/ut/common_ut.cpp
new file mode 100644
index 00000000000..85122a97ec6
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/ut/common_ut.cpp
@@ -0,0 +1,353 @@
+#include "common_ut.h"
+
+#include <yt/cpp/mapreduce/interface/common.h>
+#include <yt/cpp/mapreduce/interface/fluent.h>
+
+#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h>
+
+#include <library/cpp/testing/gtest/gtest.h>
+
+#include <library/cpp/yson/node/node_io.h>
+#include <library/cpp/yson/node/node_builder.h>
+
+#include <util/generic/xrange.h>
+
+using namespace NYT;
+
+template <class T>
+TString SaveToString(const T& obj)
+{
+ TString s;
+ TStringOutput out(s);
+ ::Save(&out, obj);
+ return s;
+}
+
+template <class T>
+T LoadFromString(TStringBuf s)
+{
+ TMemoryInput in(s);
+ T obj;
+ ::Load(&in, obj);
+ return obj;
+}
+
+template <class T>
+T SaveLoad(const T& obj)
+{
+ return LoadFromString<T>(SaveToString(obj));
+}
+
+TEST(TCommonTest, SortColumnsLegacy)
+{
+ TSortColumns keys1("a", "b");
+ EXPECT_TRUE((keys1.Parts_ == TSortColumns{"a", "b"}));
+
+ keys1.Add("c", "d");
+ EXPECT_TRUE((keys1.Parts_ == TSortColumns{"a", "b", "c", "d"}));
+
+ auto keys2 = TSortColumns(keys1).Add("e", "f");
+ EXPECT_TRUE((keys1.Parts_ == TSortColumns{"a", "b", "c", "d"}));
+ EXPECT_TRUE((keys2.Parts_ == TSortColumns{"a", "b", "c", "d", "e", "f"}));
+
+ auto keys3 = TSortColumns(keys1).Add("e").Add("f").Add("g");
+ EXPECT_TRUE((keys1.Parts_ == TSortColumns{"a", "b", "c", "d"}));
+ EXPECT_TRUE((keys3.Parts_ == TSortColumns{"a", "b", "c", "d", "e", "f", "g"}));
+}
+
+TEST(TCommonTest, SortColumn)
+{
+ auto ascending = TSortColumn("a");
+ EXPECT_EQ(ascending.Name(), "a");
+ EXPECT_EQ(ascending.SortOrder(), ESortOrder::SO_ASCENDING);
+ EXPECT_EQ(ascending, TSortColumn("a", ESortOrder::SO_ASCENDING));
+ EXPECT_NE(ascending, TSortColumn("a", ESortOrder::SO_DESCENDING));
+
+ EXPECT_NO_THROW(ascending.EnsureAscending());
+ EXPECT_EQ(static_cast<TString>(ascending), "a");
+ EXPECT_EQ(ascending, "a");
+
+ auto another = ascending;
+ EXPECT_NO_THROW(another = "another");
+ EXPECT_EQ(another.Name(), "another");
+ EXPECT_EQ(another.SortOrder(), ESortOrder::SO_ASCENDING);
+ EXPECT_EQ(another, TSortColumn("another", ESortOrder::SO_ASCENDING));
+ EXPECT_NE(another, TSortColumn("another", ESortOrder::SO_DESCENDING));
+
+ auto ascendingNode = BuildYsonNodeFluently().Value(ascending);
+ EXPECT_EQ(ascendingNode, TNode("a"));
+
+ EXPECT_EQ(SaveLoad(ascending), ascending);
+ EXPECT_NE(SaveToString(ascending), SaveToString(TString("a")));
+
+ auto descending = TSortColumn("a", ESortOrder::SO_DESCENDING);
+ EXPECT_EQ(descending.Name(), "a");
+ EXPECT_EQ(descending.SortOrder(), ESortOrder::SO_DESCENDING);
+ EXPECT_EQ(descending, TSortColumn("a", ESortOrder::SO_DESCENDING));
+ EXPECT_NE(descending, TSortColumn("a", ESortOrder::SO_ASCENDING));
+
+ EXPECT_THROW(descending.EnsureAscending(), yexception);
+ EXPECT_THROW(Y_UNUSED(static_cast<TString>(descending)), yexception);
+ EXPECT_THROW(Y_UNUSED(descending == "a"), yexception);
+ EXPECT_THROW(descending = "a", yexception);
+
+ auto descendingNode = BuildYsonNodeFluently().Value(descending);
+ EXPECT_EQ(descendingNode, TNode()("name", "a")("sort_order", "descending"));
+
+ EXPECT_EQ(SaveLoad(descending), descending);
+ EXPECT_NE(SaveToString(descending), SaveToString("a"));
+
+ EXPECT_EQ(ToString(TSortColumn("blah")), "blah");
+ EXPECT_EQ(ToString(TSortColumn("blah", ESortOrder::SO_DESCENDING)), "{\"name\"=\"blah\";\"sort_order\"=\"descending\"}");
+}
+
+TEST(TCommonTest, SortColumns)
+{
+ TSortColumns ascending("a", "b");
+ EXPECT_TRUE(ascending.Parts_ == (TSortColumns{"a", "b"}));
+ EXPECT_NO_THROW(ascending.EnsureAscending());
+ EXPECT_EQ(static_cast<TColumnNames>(ascending).Parts_, (TVector<TString>{"a", "b"}));
+ EXPECT_EQ(ascending.GetNames(), (TVector<TString>{"a", "b"}));
+
+ auto mixed = ascending;
+ mixed.Add(TSortColumn("c", ESortOrder::SO_DESCENDING), "d");
+ EXPECT_TRUE((mixed.Parts_ != TVector<TSortColumn>{"a", "b", "c", "d"}));
+ EXPECT_TRUE((mixed.Parts_ == TVector<TSortColumn>{"a", "b", TSortColumn("c", ESortOrder::SO_DESCENDING), "d"}));
+ EXPECT_EQ(mixed.GetNames(), (TVector<TString>{"a", "b", "c", "d"}));
+ EXPECT_THROW(mixed.EnsureAscending(), yexception);
+ EXPECT_THROW(Y_UNUSED(static_cast<TColumnNames>(mixed)), yexception);
+}
+
+TEST(TCommonTest, KeyBound)
+{
+ auto keyBound = TKeyBound(ERelation::Greater, TKey(7, "a", TNode()("x", "y")));
+ EXPECT_EQ(keyBound.Relation(), ERelation::Greater);
+ EXPECT_EQ(keyBound.Key(), TKey(7, "a", TNode()("x", "y")));
+
+ auto keyBound1 = TKeyBound().Relation(ERelation::Greater).Key(TKey(7, "a", TNode()("x", "y")));
+ auto expectedNode = TNode()
+ .Add(">")
+ .Add(TNode().Add(7).Add("a").Add(TNode()("x", "y")));
+
+ EXPECT_EQ(expectedNode, BuildYsonNodeFluently().Value(keyBound));
+ EXPECT_EQ(expectedNode, BuildYsonNodeFluently().Value(keyBound1));
+
+ keyBound.Relation(ERelation::LessOrEqual);
+ keyBound.Key(TKey("A", 7));
+ EXPECT_EQ(keyBound.Relation(), ERelation::LessOrEqual);
+ EXPECT_EQ(keyBound.Key(), TKey("A", 7));
+
+ EXPECT_EQ(
+ BuildYsonNodeFluently().Value(keyBound),
+ TNode()
+ .Add("<=")
+ .Add(TNode().Add("A").Add(7)));
+}
+
+TEST(TCommonTest, TTableSchema)
+{
+ TTableSchema schema;
+ schema
+ .AddColumn(TColumnSchema().Name("a").Type(EValueType::VT_STRING).SortOrder(SO_ASCENDING))
+ .AddColumn(TColumnSchema().Name("b").Type(EValueType::VT_UINT64))
+ .AddColumn(TColumnSchema().Name("c").Type(EValueType::VT_INT64));
+ auto checkSortBy = [](TTableSchema schema, const TVector<TString>& columns) {
+ auto initialSchema = schema;
+ schema.SortBy(columns);
+ for (auto i: xrange(columns.size())) {
+ EXPECT_EQ(schema.Columns()[i].Name(), columns[i]);
+ EXPECT_EQ(schema.Columns()[i].SortOrder(), ESortOrder::SO_ASCENDING);
+ }
+ for (auto i: xrange(columns.size(), (size_t)initialSchema.Columns().size())) {
+ EXPECT_EQ(schema.Columns()[i].SortOrder(), Nothing());
+ }
+ EXPECT_EQ(initialSchema.Columns().size(), schema.Columns().size());
+ return schema;
+ };
+ auto newSchema = checkSortBy(schema, {"b"});
+ EXPECT_EQ(newSchema.Columns()[1].Name(), TString("a"));
+ EXPECT_EQ(newSchema.Columns()[2].Name(), TString("c"));
+ checkSortBy(schema, {"b", "c"});
+ checkSortBy(schema, {"c", "a"});
+ EXPECT_THROW(checkSortBy(schema, {"b", "b"}), yexception);
+ EXPECT_THROW(checkSortBy(schema, {"a", "junk"}), yexception);
+}
+
+TEST(TCommonTest, TTableSchema_Decimal)
+{
+ NYT::TTableSchema tableSchema;
+
+ tableSchema.AddColumn("a", NTi::Decimal(35, 18));
+ tableSchema.AddColumn("b", NTi::Optional(NTi::Decimal(35, 18)));
+ tableSchema.AddColumn("c", NTi::List(NTi::Decimal(35, 18)));
+
+ auto tableSchemaNode = tableSchema.ToNode();
+ const auto& tableSchemaNodeList = tableSchemaNode.AsList();
+
+ // There was a bug in the serialization of decimal type: https://github.com/ytsaurus/ytsaurus/issues/173
+ {
+ const auto& currentType = tableSchemaNodeList[0];
+ EXPECT_EQ(currentType.ChildAsString("type"), "string");
+ EXPECT_TRUE(currentType.ChildAsBool("required"));
+ EXPECT_TRUE(currentType.HasKey("type_v3"));
+ EXPECT_EQ(currentType.At("type_v3").ChildAsString("type_name"), "decimal");
+ }
+ {
+ const auto& currentType = tableSchemaNodeList[1];
+ EXPECT_EQ(currentType.ChildAsString("type"), "string");
+ EXPECT_TRUE(!currentType.ChildAsBool("required"));
+ EXPECT_TRUE(currentType.HasKey("type_v3"));
+ EXPECT_EQ(currentType.At("type_v3").ChildAsString("type_name"), "optional");
+ EXPECT_EQ(currentType.At("type_v3").At("item").ChildAsString("type_name"), "decimal");
+ }
+ {
+ const auto& currentType = tableSchemaNodeList[2];
+ EXPECT_EQ(currentType.ChildAsString("type"), "any");
+ EXPECT_TRUE(currentType.ChildAsBool("required"));
+ EXPECT_TRUE(currentType.HasKey("type_v3"));
+ EXPECT_EQ(currentType.At("type_v3").ChildAsString("type_name"), "list");
+ EXPECT_EQ(currentType.At("type_v3").At("item").ChildAsString("type_name"), "decimal");
+ }
+
+ EXPECT_EQ(tableSchema, TTableSchema::FromNode(tableSchemaNode));
+}
+
+TEST(TCommonTest, TColumnSchema_TypeV3)
+{
+ {
+ auto column = TColumnSchema().Type(NTi::Interval());
+ EXPECT_EQ(column.Required(), true);
+ EXPECT_EQ(column.Type(), VT_INTERVAL);
+ }
+ {
+ auto column = TColumnSchema().Type(NTi::Optional(NTi::Date()));
+ EXPECT_EQ(column.Required(), false);
+ EXPECT_EQ(column.Type(), VT_DATE);
+ }
+ {
+ auto column = TColumnSchema().Type(NTi::Interval64());
+ EXPECT_EQ(column.Required(), true);
+ EXPECT_EQ(column.Type(), VT_INTERVAL64);
+ }
+ {
+ auto column = TColumnSchema().Type(NTi::Optional(NTi::Date32()));
+ EXPECT_EQ(column.Required(), false);
+ EXPECT_EQ(column.Type(), VT_DATE32);
+ }
+ {
+ auto column = TColumnSchema().Type(NTi::Null());
+ EXPECT_EQ(column.Required(), false);
+ EXPECT_EQ(column.Type(), VT_NULL);
+ }
+ {
+ auto column = TColumnSchema().Type(NTi::Optional(NTi::Null()));
+ EXPECT_EQ(column.Required(), false);
+ EXPECT_EQ(column.Type(), VT_ANY);
+ }
+ {
+ auto column = TColumnSchema().Type(NTi::Decimal(35, 18));
+ EXPECT_EQ(column.Required(), true);
+ EXPECT_EQ(column.Type(), VT_STRING);
+ }
+}
+
+TEST(TCommonTest, ToTypeV3)
+{
+ EXPECT_EQ(*ToTypeV3(VT_INT32, true), *NTi::Int32());
+ EXPECT_EQ(*ToTypeV3(VT_UTF8, false), *NTi::Optional(NTi::Utf8()));
+}
+
+TEST(TCommonTest, DeserializeColumn)
+{
+ auto deserialize = [] (TStringBuf yson) {
+ auto node = NodeFromYsonString(yson);
+ TColumnSchema column;
+ Deserialize(column, node);
+ return column;
+ };
+
+ auto column = deserialize("{name=foo; type=int64; required=%false}");
+ EXPECT_EQ(column.Name(), "foo");
+ EXPECT_EQ(*column.TypeV3(), *NTi::Optional(NTi::Int64()));
+
+ column = deserialize("{name=bar; type=utf8; required=%true; type_v3=utf8}");
+ EXPECT_EQ(column.Name(), "bar");
+ EXPECT_EQ(*column.TypeV3(), *NTi::Utf8());
+}
+
+TEST(TCommonTest, ColumnSchemaEquality)
+{
+ auto base = TColumnSchema()
+ .Name("col")
+ .TypeV3(NTi::Optional(NTi::List(NTi::String())))
+ .SortOrder(ESortOrder::SO_ASCENDING)
+ .Lock("lock")
+ .Expression("x + 12")
+ .Aggregate("sum")
+ .Group("group");
+
+ auto other = base;
+ ASSERT_SERIALIZABLES_EQ(other, base);
+ other.Name("other");
+ ASSERT_SERIALIZABLES_NE(other, base);
+
+ other = base;
+ other.TypeV3(NTi::List(NTi::String()));
+ ASSERT_SERIALIZABLES_NE(other, base);
+
+ other = base;
+ other.ResetSortOrder();
+ ASSERT_SERIALIZABLES_NE(other, base);
+
+ other = base;
+ other.Lock("lock1");
+ ASSERT_SERIALIZABLES_NE(other, base);
+
+ other = base;
+ other.Expression("x + 13");
+ ASSERT_SERIALIZABLES_NE(other, base);
+
+ other = base;
+ other.ResetAggregate();
+ ASSERT_SERIALIZABLES_NE(other, base);
+
+ other = base;
+ other.Group("group1");
+ ASSERT_SERIALIZABLES_NE(other, base);
+}
+
+TEST(TCommonTest, TableSchemaEquality)
+{
+ auto col1 = TColumnSchema()
+ .Name("col1")
+ .TypeV3(NTi::Optional(NTi::List(NTi::String())))
+ .SortOrder(ESortOrder::SO_ASCENDING);
+
+ auto col2 = TColumnSchema()
+ .Name("col2")
+ .TypeV3(NTi::Uint32());
+
+ auto schema = TTableSchema()
+ .AddColumn(col1)
+ .AddColumn(col2)
+ .Strict(true)
+ .UniqueKeys(true);
+
+ auto other = schema;
+ ASSERT_SERIALIZABLES_EQ(other, schema);
+
+ other.Strict(false);
+ ASSERT_SERIALIZABLES_NE(other, schema);
+
+ other = schema;
+ other.MutableColumns()[0].TypeV3(NTi::List(NTi::String()));
+ ASSERT_SERIALIZABLES_NE(other, schema);
+
+ other = schema;
+ other.MutableColumns().push_back(col1);
+ ASSERT_SERIALIZABLES_NE(other, schema);
+
+ other = schema;
+ other.UniqueKeys(false);
+ ASSERT_SERIALIZABLES_NE(other, schema);
+}
diff --git a/yt/cpp/mapreduce/interface/ut/common_ut.h b/yt/cpp/mapreduce/interface/ut/common_ut.h
new file mode 100644
index 00000000000..6f70f09beec
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/ut/common_ut.h
@@ -0,0 +1 @@
+#pragma once
diff --git a/yt/cpp/mapreduce/interface/ut/config_ut.cpp b/yt/cpp/mapreduce/interface/ut/config_ut.cpp
new file mode 100644
index 00000000000..780a57f3f25
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/ut/config_ut.cpp
@@ -0,0 +1,17 @@
+#include <library/cpp/testing/gtest/gtest.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+
+using namespace NYT;
+
+TEST(TConfigTest, Reset) {
+ // Very limited test, checks only one config field.
+
+ auto origConfig = *TConfig::Get();
+ TConfig::Get()->Reset();
+ EXPECT_EQ(origConfig.Hosts, TConfig::Get()->Hosts);
+
+ TConfig::Get()->Hosts = "hosts/fb867";
+ TConfig::Get()->Reset();
+ EXPECT_EQ(origConfig.Hosts, TConfig::Get()->Hosts);
+}
diff --git a/yt/cpp/mapreduce/interface/ut/error_ut.cpp b/yt/cpp/mapreduce/interface/ut/error_ut.cpp
new file mode 100644
index 00000000000..4911f29d97a
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/ut/error_ut.cpp
@@ -0,0 +1,81 @@
+#include <library/cpp/testing/gtest/gtest.h>
+
+#include <library/cpp/json/json_reader.h>
+
+#include <yt/cpp/mapreduce/interface/errors.h>
+
+#include <yt/cpp/mapreduce/common/helpers.h>
+
+#include <util/generic/set.h>
+
+using namespace NYT;
+
+template<>
+void Out<NYT::TNode>(IOutputStream& s, const NYT::TNode& node)
+{
+ s << "TNode:" << NodeToYsonString(node);
+}
+
+TEST(TErrorTest, ParseJson)
+{
+ // Scary real world error! Бу!
+ const char* jsonText =
+ R"""({)"""
+ R"""("code":500,)"""
+ R"""("message":"Error resolving path //home/user/link",)"""
+ R"""("attributes":{)"""
+ R"""("fid":18446484571700269066,)"""
+ R"""("method":"Create",)"""
+ R"""("tid":17558639495721339338,)"""
+ R"""("datetime":"2017-04-07T13:38:56.474819Z",)"""
+ R"""("pid":414529,)"""
+ R"""("host":"build01-01g.yt.yandex.net"},)"""
+ R"""("inner_errors":[{)"""
+ R"""("code":1,)"""
+ R"""("message":"Node //tt cannot have children",)"""
+ R"""("attributes":{)"""
+ R"""("fid":18446484571700269066,)"""
+ R"""("tid":17558639495721339338,)"""
+ R"""("datetime":"2017-04-07T13:38:56.474725Z",)"""
+ R"""("pid":414529,)"""
+ R"""("host":"build01-01g.yt.yandex.net"},)"""
+ R"""("inner_errors":[]}]})""";
+
+ NJson::TJsonValue jsonValue;
+ ReadJsonFastTree(jsonText, &jsonValue, /*throwOnError=*/ true);
+
+ TYtError error(jsonValue);
+ EXPECT_EQ(error.GetCode(), 500);
+ EXPECT_EQ(error.GetMessage(), R"""(Error resolving path //home/user/link)""");
+ EXPECT_EQ(error.InnerErrors().size(), 1u);
+ EXPECT_EQ(error.InnerErrors()[0].GetCode(), 1);
+
+ EXPECT_EQ(error.HasAttributes(), true);
+ EXPECT_EQ(error.GetAttributes().at("method"), TNode("Create"));
+
+ EXPECT_EQ(error.GetAllErrorCodes(), TSet<int>({500, 1}));
+}
+
+TEST(TErrorTest, GetYsonText) {
+ const char* jsonText =
+ R"""({)"""
+ R"""("code":500,)"""
+ R"""("message":"outer error",)"""
+ R"""("attributes":{)"""
+ R"""("method":"Create",)"""
+ R"""("pid":414529},)"""
+ R"""("inner_errors":[{)"""
+ R"""("code":1,)"""
+ R"""("message":"inner error",)"""
+ R"""("attributes":{},)"""
+ R"""("inner_errors":[])"""
+ R"""(}]})""";
+ TYtError error;
+ error.ParseFrom(jsonText);
+ TString ysonText = error.GetYsonText();
+ TYtError error2(NodeFromYsonString(ysonText));
+ EXPECT_EQ(
+ ysonText,
+ R"""({"code"=500;"message"="outer error";"attributes"={"method"="Create";"pid"=414529};"inner_errors"=[{"code"=1;"message"="inner error"}]})""");
+ EXPECT_EQ(error2.GetYsonText(), ysonText);
+}
diff --git a/yt/cpp/mapreduce/interface/ut/format_ut.cpp b/yt/cpp/mapreduce/interface/ut/format_ut.cpp
new file mode 100644
index 00000000000..83b860ab94d
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/ut/format_ut.cpp
@@ -0,0 +1,232 @@
+#include "common_ut.h"
+
+#include <yt/cpp/mapreduce/interface/common.h>
+#include <yt/cpp/mapreduce/interface/errors.h>
+#include <yt/cpp/mapreduce/interface/format.h>
+
+#include <yt/cpp/mapreduce/interface/ut/proto3_ut.pb.h>
+#include <yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.pb.h>
+
+#include <library/cpp/testing/gtest/gtest.h>
+
+using namespace NYT;
+
+static TNode GetColumns(const TFormat& format, int tableIndex = 0)
+{
+ return format.Config.GetAttributes()["tables"][tableIndex]["columns"];
+}
+
+TEST(TProtobufFormatTest, TIntegral)
+{
+ const auto format = TFormat::Protobuf<NUnitTesting::TIntegral>();
+ auto columns = GetColumns(format);
+
+ struct TColumn
+ {
+ TString Name;
+ TString ProtoType;
+ int FieldNumber;
+ };
+
+ auto expected = TVector<TColumn>{
+ {"DoubleField", "double", 1},
+ {"FloatField", "float", 2},
+ {"Int32Field", "int32", 3},
+ {"Int64Field", "int64", 4},
+ {"Uint32Field", "uint32", 5},
+ {"Uint64Field", "uint64", 6},
+ {"Sint32Field", "sint32", 7},
+ {"Sint64Field", "sint64", 8},
+ {"Fixed32Field", "fixed32", 9},
+ {"Fixed64Field", "fixed64", 10},
+ {"Sfixed32Field", "sfixed32", 11},
+ {"Sfixed64Field", "sfixed64", 12},
+ {"BoolField", "bool", 13},
+ {"EnumField", "enum_string", 14},
+ };
+
+ EXPECT_EQ(columns.Size(), expected.size());
+ for (int i = 0; i < static_cast<int>(columns.Size()); ++i) {
+ EXPECT_EQ(columns[i]["name"], expected[i].Name);
+ EXPECT_EQ(columns[i]["proto_type"], expected[i].ProtoType);
+ EXPECT_EQ(columns[i]["field_number"], expected[i].FieldNumber);
+ }
+}
+
+TEST(TProtobufFormatTest, TRowFieldSerializationOption)
+{
+ const auto format = TFormat::Protobuf<NUnitTesting::TRowFieldSerializationOption>();
+ auto columns = GetColumns(format);
+
+ EXPECT_EQ(columns[0]["name"], "UrlRow_1");
+ EXPECT_EQ(columns[0]["proto_type"], "structured_message");
+ EXPECT_EQ(columns[0]["field_number"], 1);
+ const auto& fields = columns[0]["fields"];
+ EXPECT_EQ(fields[0]["name"], "Host");
+ EXPECT_EQ(fields[0]["proto_type"], "string");
+ EXPECT_EQ(fields[0]["field_number"], 1);
+
+ EXPECT_EQ(fields[1]["name"], "Path");
+ EXPECT_EQ(fields[1]["proto_type"], "string");
+ EXPECT_EQ(fields[1]["field_number"], 2);
+
+ EXPECT_EQ(fields[2]["name"], "HttpCode");
+ EXPECT_EQ(fields[2]["proto_type"], "sint32");
+ EXPECT_EQ(fields[2]["field_number"], 3);
+
+ EXPECT_EQ(columns[1]["name"], "UrlRow_2");
+ EXPECT_EQ(columns[1]["proto_type"], "message");
+ EXPECT_EQ(columns[1]["field_number"], 2);
+}
+
+
+TEST(TProtobufFormatTest, TPacked)
+{
+ const auto format = TFormat::Protobuf<NUnitTesting::TPacked>();
+ auto column = GetColumns(format)[0];
+
+ EXPECT_EQ(column["name"], "PackedListInt64");
+ EXPECT_EQ(column["proto_type"], "int64");
+ EXPECT_EQ(column["field_number"], 1);
+ EXPECT_EQ(column["packed"], true);
+ EXPECT_EQ(column["repeated"], true);
+}
+
+TEST(TProtobufFormatTest, TCyclic)
+{
+ EXPECT_THROW(TFormat::Protobuf<NUnitTesting::TCyclic>(), TApiUsageError);
+ EXPECT_THROW(TFormat::Protobuf<NUnitTesting::TCyclic::TA>(), TApiUsageError);
+ EXPECT_THROW(TFormat::Protobuf<NUnitTesting::TCyclic::TB>(), TApiUsageError);
+ EXPECT_THROW(TFormat::Protobuf<NUnitTesting::TCyclic::TC>(), TApiUsageError);
+ EXPECT_THROW(TFormat::Protobuf<NUnitTesting::TCyclic::TD>(), TApiUsageError);
+
+ const auto format = TFormat::Protobuf<NUnitTesting::TCyclic::TE>();
+ auto column = GetColumns(format)[0];
+ EXPECT_EQ(column["name"], "d");
+ EXPECT_EQ(column["proto_type"], "message");
+ EXPECT_EQ(column["field_number"], 1);
+}
+
+TEST(TProtobufFormatTest, Map)
+{
+ const auto format = TFormat::Protobuf<NUnitTesting::TWithMap>();
+ auto columns = GetColumns(format);
+
+ EXPECT_EQ(columns.Size(), 5u);
+ {
+ const auto& column = columns[0];
+ EXPECT_EQ(column["name"], "MapDefault");
+ EXPECT_EQ(column["proto_type"], "structured_message");
+ EXPECT_EQ(column["fields"].Size(), 2u);
+ EXPECT_EQ(column["fields"][0]["proto_type"], "int64");
+ EXPECT_EQ(column["fields"][1]["proto_type"], "message");
+ }
+ {
+ const auto& column = columns[1];
+ EXPECT_EQ(column["name"], "MapListOfStructsLegacy");
+ EXPECT_EQ(column["proto_type"], "structured_message");
+ EXPECT_EQ(column["fields"].Size(), 2u);
+ EXPECT_EQ(column["fields"][0]["proto_type"], "int64");
+ EXPECT_EQ(column["fields"][1]["proto_type"], "message");
+ }
+ {
+ const auto& column = columns[2];
+ EXPECT_EQ(column["name"], "MapListOfStructs");
+ EXPECT_EQ(column["proto_type"], "structured_message");
+ EXPECT_EQ(column["fields"].Size(), 2u);
+ EXPECT_EQ(column["fields"][0]["proto_type"], "int64");
+ EXPECT_EQ(column["fields"][1]["proto_type"], "structured_message");
+ }
+ {
+ const auto& column = columns[3];
+ EXPECT_EQ(column["name"], "MapOptionalDict");
+ EXPECT_EQ(column["proto_type"], "structured_message");
+ EXPECT_EQ(column["fields"].Size(), 2u);
+ EXPECT_EQ(column["fields"][0]["proto_type"], "int64");
+ EXPECT_EQ(column["fields"][1]["proto_type"], "structured_message");
+ }
+ {
+ const auto& column = columns[4];
+ EXPECT_EQ(column["name"], "MapDict");
+ EXPECT_EQ(column["proto_type"], "structured_message");
+ EXPECT_EQ(column["fields"].Size(), 2u);
+ EXPECT_EQ(column["fields"][0]["proto_type"], "int64");
+ EXPECT_EQ(column["fields"][1]["proto_type"], "structured_message");
+ }
+}
+
+
+TEST(TProtobufFormatTest, Oneof)
+{
+ const auto format = TFormat::Protobuf<NUnitTesting::TWithOneof>();
+ auto columns = GetColumns(format);
+
+ EXPECT_EQ(columns.Size(), 4u);
+ auto check = [] (const TNode& column, TStringBuf name, TStringBuf oneof2Name) {
+ EXPECT_EQ(column["name"], name);
+ EXPECT_EQ(column["proto_type"], "structured_message");
+ EXPECT_EQ(column["fields"].Size(), 5u);
+ EXPECT_EQ(column["fields"][0]["name"], "field");
+
+ const auto& oneof2 = column["fields"][1];
+ EXPECT_EQ(oneof2["name"], oneof2Name);
+ EXPECT_EQ(oneof2["proto_type"], "oneof");
+ EXPECT_EQ(oneof2["fields"][0]["name"], "y2");
+ EXPECT_EQ(oneof2["fields"][1]["name"], "z2");
+ EXPECT_EQ(oneof2["fields"][1]["proto_type"], "structured_message");
+ const auto& embeddedOneof = oneof2["fields"][1]["fields"][0];
+ EXPECT_EQ(embeddedOneof["name"], "Oneof");
+ EXPECT_EQ(embeddedOneof["fields"][0]["name"], "x");
+ EXPECT_EQ(embeddedOneof["fields"][1]["name"], "y");
+ EXPECT_EQ(oneof2["fields"][2]["name"], "x2");
+
+ EXPECT_EQ(column["fields"][2]["name"], "x1");
+ EXPECT_EQ(column["fields"][3]["name"], "y1");
+ EXPECT_EQ(column["fields"][4]["name"], "z1");
+ };
+
+ check(columns[0], "DefaultSeparateFields", "variant_field_name");
+ check(columns[1], "NoDefault", "Oneof2");
+
+ {
+ const auto& column = columns[2];
+ EXPECT_EQ(column["name"], "SerializationProtobuf");
+ EXPECT_EQ(column["proto_type"], "structured_message");
+ EXPECT_EQ(column["fields"].Size(), 3u);
+ EXPECT_EQ(column["fields"][0]["name"], "x1");
+ EXPECT_EQ(column["fields"][1]["name"], "y1");
+ EXPECT_EQ(column["fields"][2]["name"], "z1");
+ }
+ {
+ const auto& column = columns[3];
+ EXPECT_EQ(column["name"], "TopLevelOneof");
+ EXPECT_EQ(column["proto_type"], "oneof");
+ EXPECT_EQ(column["fields"].Size(), 1u);
+ EXPECT_EQ(column["fields"][0]["name"], "MemberOfTopLevelOneof");
+ }
+}
+
+TEST(TProto3Test, TWithOptional)
+{
+ const auto format = TFormat::Protobuf<NTestingProto3::TWithOptional>();
+ auto columns = GetColumns(format);
+
+ EXPECT_EQ(columns[0]["name"], "x");
+ EXPECT_EQ(columns[0]["proto_type"], "int64");
+ EXPECT_EQ(columns[0]["field_number"], 1);
+}
+
+TEST(TProto3Test, TWithOptionalMessage)
+{
+ const auto format = TFormat::Protobuf<NTestingProto3::TWithOptionalMessage>();
+ auto columns = GetColumns(format);
+
+ EXPECT_EQ(columns[0]["name"], "x");
+ EXPECT_EQ(columns[0]["proto_type"], "structured_message");
+ EXPECT_EQ(columns[0]["field_number"], 1);
+
+ EXPECT_EQ(columns[0]["fields"].Size(), 1u);
+ EXPECT_EQ(columns[0]["fields"][0]["name"], "x");
+ EXPECT_EQ(columns[0]["fields"][0]["proto_type"], "int64");
+ EXPECT_EQ(columns[0]["fields"][0]["field_number"], 1);
+}
diff --git a/yt/cpp/mapreduce/interface/ut/job_counters_ut.cpp b/yt/cpp/mapreduce/interface/ut/job_counters_ut.cpp
new file mode 100644
index 00000000000..9972637affe
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/ut/job_counters_ut.cpp
@@ -0,0 +1,100 @@
+#include <yt/cpp/mapreduce/interface/job_counters.h>
+#include <yt/cpp/mapreduce/interface/operation.h>
+
+#include <library/cpp/yson/node/node_io.h>
+
+#include <library/cpp/testing/gtest/gtest.h>
+
+using namespace NYT;
+
+TEST(TJobCountersTest, Full)
+{
+ const TString input = R"""(
+ {
+ "completed" = {
+ "total" = 6;
+ "non-interrupted" = 1;
+ "interrupted" = {
+ "whatever_interrupted" = 2;
+ "whatever_else_interrupted" = 3;
+ };
+ };
+ "aborted" = {
+ "non_scheduled" = {
+ "whatever_non_scheduled" = 4;
+ "whatever_else_non_scheduled" = 5;
+ };
+ "scheduled" = {
+ "whatever_scheduled" = 6;
+ "whatever_else_scheduled" = 7;
+ };
+ "total" = 22;
+ };
+ "lost" = 8;
+ "invalidated" = 9;
+ "failed" = 10;
+ "running" = 11;
+ "suspended" = 12;
+ "pending" = 13;
+ "blocked" = 14;
+ "total" = 105;
+ })""";
+
+ TJobCounters counters(NodeFromYsonString(input));
+
+ EXPECT_EQ(counters.GetTotal(), 105u);
+
+ EXPECT_EQ(counters.GetCompleted().GetTotal(), 6u);
+ EXPECT_EQ(counters.GetCompletedNonInterrupted().GetTotal(), 1u);
+ EXPECT_EQ(counters.GetCompletedInterrupted().GetTotal(), 5u);
+ EXPECT_EQ(counters.GetAborted().GetTotal(), 22u);
+ EXPECT_EQ(counters.GetAbortedNonScheduled().GetTotal(), 9u);
+ EXPECT_EQ(counters.GetAbortedScheduled().GetTotal(), 13u);
+ EXPECT_EQ(counters.GetLost().GetTotal(), 8u);
+ EXPECT_EQ(counters.GetInvalidated().GetTotal(), 9u);
+ EXPECT_EQ(counters.GetFailed().GetTotal(), 10u);
+ EXPECT_EQ(counters.GetRunning().GetTotal(), 11u);
+ EXPECT_EQ(counters.GetSuspended().GetTotal(), 12u);
+ EXPECT_EQ(counters.GetPending().GetTotal(), 13u);
+ EXPECT_EQ(counters.GetBlocked().GetTotal(), 14u);
+
+ EXPECT_EQ(counters.GetCompletedInterrupted().GetValue("whatever_interrupted"), 2u);
+ EXPECT_EQ(counters.GetCompletedInterrupted().GetValue("whatever_else_interrupted"), 3u);
+ EXPECT_EQ(counters.GetAbortedNonScheduled().GetValue("whatever_non_scheduled"), 4u);
+ EXPECT_EQ(counters.GetAbortedNonScheduled().GetValue("whatever_else_non_scheduled"), 5u);
+ EXPECT_EQ(counters.GetAbortedScheduled().GetValue("whatever_scheduled"), 6u);
+ EXPECT_EQ(counters.GetAbortedScheduled().GetValue("whatever_else_scheduled"), 7u);
+
+ EXPECT_THROW(counters.GetCompletedInterrupted().GetValue("Nothingness"), yexception);
+}
+
+TEST(TJobCountersTest, Empty)
+{
+ const TString input = "{}";
+
+ TJobCounters counters(NodeFromYsonString(input));
+
+ EXPECT_EQ(counters.GetTotal(), 0u);
+
+ EXPECT_EQ(counters.GetCompleted().GetTotal(), 0u);
+ EXPECT_EQ(counters.GetCompletedNonInterrupted().GetTotal(), 0u);
+ EXPECT_EQ(counters.GetCompletedInterrupted().GetTotal(), 0u);
+ EXPECT_EQ(counters.GetAborted().GetTotal(), 0u);
+ EXPECT_EQ(counters.GetAbortedNonScheduled().GetTotal(), 0u);
+ EXPECT_EQ(counters.GetAbortedScheduled().GetTotal(), 0u);
+ EXPECT_EQ(counters.GetLost().GetTotal(), 0u);
+ EXPECT_EQ(counters.GetInvalidated().GetTotal(), 0u);
+ EXPECT_EQ(counters.GetFailed().GetTotal(), 0u);
+ EXPECT_EQ(counters.GetRunning().GetTotal(), 0u);
+ EXPECT_EQ(counters.GetSuspended().GetTotal(), 0u);
+ EXPECT_EQ(counters.GetPending().GetTotal(), 0u);
+ EXPECT_EQ(counters.GetBlocked().GetTotal(), 0u);
+}
+
+TEST(TJobCountersTest, Broken)
+{
+ EXPECT_THROW_MESSAGE_HAS_SUBSTR((TJobCounters(TNode())), yexception, "TJobCounters");
+ EXPECT_THROW_MESSAGE_HAS_SUBSTR((TJobCounters(TNode(1))), yexception, "TJobCounters");
+ EXPECT_THROW_MESSAGE_HAS_SUBSTR((TJobCounters(TNode(1.0))), yexception, "TJobCounters");
+ EXPECT_THROW_MESSAGE_HAS_SUBSTR((TJobCounters(TNode("Whatever"))), yexception, "TJobCounters");
+}
diff --git a/yt/cpp/mapreduce/interface/ut/job_statistics_ut.cpp b/yt/cpp/mapreduce/interface/ut/job_statistics_ut.cpp
new file mode 100644
index 00000000000..90d40623c17
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/ut/job_statistics_ut.cpp
@@ -0,0 +1,254 @@
+#include <yt/cpp/mapreduce/interface/job_statistics.h>
+#include <yt/cpp/mapreduce/interface/operation.h>
+
+#include <library/cpp/yson/node/node_io.h>
+
+#include <library/cpp/testing/gtest/gtest.h>
+
+using namespace NYT;
+
+TEST(TJobStatisticsTest, Simple)
+{
+ const TString input = R"""(
+ {
+ "data" = {
+ "output" = {
+ "0" = {
+ "uncompressed_data_size" = {
+ "$" = {
+ "completed" = {
+ "simple_sort" = {
+ "max" = 130;
+ "count" = 1;
+ "min" = 130;
+ "sum" = 130;
+ };
+ "map" = {
+ "max" = 42;
+ "count" = 1;
+ "min" = 42;
+ "sum" = 42;
+ };
+ };
+ "aborted" = {
+ "simple_sort" = {
+ "max" = 24;
+ "count" = 1;
+ "min" = 24;
+ "sum" = 24;
+ };
+ };
+ };
+ };
+ };
+ };
+ };
+ })""";
+
+ TJobStatistics stat(NodeFromYsonString(input));
+
+ EXPECT_TRUE(stat.HasStatistics("data/output/0/uncompressed_data_size"));
+ EXPECT_TRUE(!stat.HasStatistics("nonexistent-statistics"));
+ EXPECT_THROW_MESSAGE_HAS_SUBSTR(stat.GetStatistics("BLAH-BLAH"), yexception, "Statistics");
+
+ EXPECT_EQ(stat.GetStatisticsNames(), TVector<TString>{"data/output/0/uncompressed_data_size"});
+
+ EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Max(), 130);
+ EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Count(), 2);
+ EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Min(), 42);
+ EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Sum(), 172);
+ EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Avg(), 172 / 2);
+
+ EXPECT_EQ(stat.JobState({EJobState::Aborted}).GetStatistics("data/output/0/uncompressed_data_size").Sum(), 24);
+ EXPECT_EQ(stat.JobType({EJobType::Map}).JobState({EJobState::Aborted}).GetStatistics("data/output/0/uncompressed_data_size").Sum(), TMaybe<i64>());
+}
+
+TEST(TJobStatisticsTest, OtherTypes)
+{
+ const TString input = R"""(
+ {
+ "time" = {
+ "exec" = {
+ "$" = {
+ "completed" = {
+ "map" = {
+ "max" = 2482468;
+ "count" = 38;
+ "min" = 578976;
+ "sum" = 47987270;
+ };
+ };
+ };
+ };
+ };
+ })""";
+
+ TJobStatistics stat(NodeFromYsonString(input));
+
+ EXPECT_EQ(stat.GetStatisticsAs<TDuration>("time/exec").Max(), TDuration::MilliSeconds(2482468));
+}
+
+TEST(TJobStatisticsTest, Custom)
+{
+ const TString input = R"""(
+ {
+ "custom" = {
+ "some" = {
+ "path" = {
+ "$" = {
+ "completed" = {
+ "map" = {
+ "max" = -1;
+ "count" = 1;
+ "min" = -1;
+ "sum" = -1;
+ };
+ };
+ };
+ };
+ };
+ "another" = {
+ "path" = {
+ "$" = {
+ "completed" = {
+ "map" = {
+ "max" = 1001;
+ "count" = 2;
+ "min" = 1001;
+ "sum" = 2002;
+ };
+ };
+ };
+ };
+ };
+ };
+ })""";
+
+ TJobStatistics stat(NodeFromYsonString(input));
+
+ EXPECT_TRUE(stat.HasCustomStatistics("some/path"));
+ EXPECT_TRUE(!stat.HasCustomStatistics("nonexistent-statistics"));
+ EXPECT_THROW_MESSAGE_HAS_SUBSTR(stat.GetCustomStatistics("BLAH-BLAH"), yexception, "Statistics");
+
+ const auto names = stat.GetCustomStatisticsNames();
+ const THashSet<TString> expected = {"some/path", "another/path"};
+ EXPECT_EQ(THashSet<TString>(names.begin(), names.end()), expected);
+
+ EXPECT_EQ(stat.GetCustomStatistics("some/path").Max(), -1);
+ EXPECT_EQ(stat.GetCustomStatistics("another/path").Avg(), 1001);
+}
+
+TEST(TJobStatisticsTest, TaskNames)
+{
+ const TString input = R"""(
+ {
+ "data" = {
+ "output" = {
+ "0" = {
+ "uncompressed_data_size" = {
+ "$" = {
+ "completed" = {
+ "partition_map" = {
+ "max" = 130;
+ "count" = 1;
+ "min" = 130;
+ "sum" = 130;
+ };
+ "partition(0)" = {
+ "max" = 42;
+ "count" = 1;
+ "min" = 42;
+ "sum" = 42;
+ };
+ };
+ "aborted" = {
+ "simple_sort" = {
+ "max" = 24;
+ "count" = 1;
+ "min" = 24;
+ "sum" = 24;
+ };
+ };
+ };
+ };
+ };
+ };
+ };
+ })""";
+
+ TJobStatistics stat(NodeFromYsonString(input));
+
+ EXPECT_TRUE(stat.HasStatistics("data/output/0/uncompressed_data_size"));
+ EXPECT_TRUE(!stat.HasStatistics("nonexistent-statistics"));
+ EXPECT_THROW_MESSAGE_HAS_SUBSTR(stat.GetStatistics("BLAH-BLAH"), yexception, "Statistics");
+
+ EXPECT_EQ(stat.GetStatisticsNames(), TVector<TString>{"data/output/0/uncompressed_data_size"});
+
+ EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Max(), 130);
+ EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Count(), 2);
+ EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Min(), 42);
+ EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Sum(), 172);
+ EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Avg(), 172 / 2);
+
+ EXPECT_EQ(
+ stat
+ .JobState({EJobState::Aborted})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ 24);
+ EXPECT_EQ(
+ stat
+ .JobType({EJobType::Partition})
+ .JobState({EJobState::Aborted})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ TMaybe<i64>());
+ EXPECT_EQ(
+ stat
+ .TaskName({"partition(0)"})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ 42);
+ EXPECT_EQ(
+ stat
+ .TaskName({"partition"})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ TMaybe<i64>());
+ EXPECT_EQ(
+ stat
+ .TaskName({"partition_map(0)"})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ 130);
+ EXPECT_EQ(
+ stat
+ .JobType({EJobType::Partition})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ 42);
+ EXPECT_EQ(
+ stat
+ .JobType({EJobType::PartitionMap})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ 130);
+ EXPECT_EQ(
+ stat
+ .TaskName({ETaskName::Partition0})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ 42);
+ EXPECT_EQ(
+ stat
+ .TaskName({ETaskName::Partition1})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ TMaybe<i64>());
+ EXPECT_EQ(
+ stat
+ .TaskName({ETaskName::PartitionMap0})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ 130);
+}
diff --git a/yt/cpp/mapreduce/interface/ut/operation_ut.cpp b/yt/cpp/mapreduce/interface/ut/operation_ut.cpp
new file mode 100644
index 00000000000..81d03d06186
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/ut/operation_ut.cpp
@@ -0,0 +1,272 @@
+#include "common_ut.h"
+
+#include <yt/cpp/mapreduce/interface/job_statistics.h>
+#include <yt/cpp/mapreduce/interface/operation.h>
+
+#include <yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.pb.h>
+
+#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h>
+
+#include <library/cpp/yson/node/node_io.h>
+
+#include <library/cpp/testing/gtest/gtest.h>
+
+using namespace NYT;
+using namespace NYT::NUnitTesting;
+
+////////////////////////////////////////////////////////////////////
+
+class TDummyInferenceContext
+ : public IOperationPreparationContext
+{
+public:
+ TDummyInferenceContext(int inputCount, int outputCount)
+ : InputCount_(inputCount)
+ , OutputCount_(outputCount)
+ , InputSchemas_(inputCount)
+ { }
+
+ int GetInputCount() const override
+ {
+ return InputCount_;
+ }
+
+ int GetOutputCount() const override
+ {
+ return OutputCount_;
+ }
+
+ const TVector<TTableSchema>& GetInputSchemas() const override
+ {
+ return InputSchemas_;
+ }
+
+ const TTableSchema& GetInputSchema(int index) const override
+ {
+ return InputSchemas_[index];
+ }
+
+ TMaybe<TYPath> GetInputPath(int) const override
+ {
+ return Nothing();
+ }
+
+ TMaybe<TYPath> GetOutputPath(int) const override
+ {
+ return Nothing();
+ }
+
+private:
+ int InputCount_;
+ int OutputCount_;
+ TVector<TTableSchema> InputSchemas_;
+};
+
+////////////////////////////////////////////////////////////////////
+
+TEST(TPrepareOperationTest, BasicSchemas)
+{
+ auto firstSchema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("some_column").Type(EValueType::VT_UINT64));
+ auto otherSchema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("other_column").Type(EValueType::VT_BOOLEAN));
+ auto thirdSchema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("third_column").Type(EValueType::VT_STRING));
+
+ TDummyInferenceContext context(3,7);
+ TJobOperationPreparer builder(context);
+
+ builder
+ .OutputSchema(1, firstSchema)
+ .BeginOutputGroup(TVector<int>{2, 5})
+ .Schema(otherSchema)
+ .EndOutputGroup()
+ .BeginOutputGroup(3, 5)
+ .Schema(thirdSchema)
+ .EndOutputGroup()
+ .BeginOutputGroup(TVector<int>{0, 6})
+ .Schema(thirdSchema)
+ .EndOutputGroup();
+
+ EXPECT_THROW(builder.OutputSchema(1, otherSchema), TApiUsageError);
+ EXPECT_THROW(builder.BeginOutputGroup(3, 5).Schema(otherSchema), TApiUsageError);
+ EXPECT_THROW(builder.BeginOutputGroup(TVector<int>{3,6,7}).Schema(otherSchema), TApiUsageError);
+
+ builder.Finish();
+ auto result = builder.GetOutputSchemas();
+
+ ASSERT_SERIALIZABLES_EQ(result[0], thirdSchema);
+ ASSERT_SERIALIZABLES_EQ(result[1], firstSchema);
+ ASSERT_SERIALIZABLES_EQ(result[2], otherSchema);
+ ASSERT_SERIALIZABLES_EQ(result[3], thirdSchema);
+ ASSERT_SERIALIZABLES_EQ(result[4], thirdSchema);
+ ASSERT_SERIALIZABLES_EQ(result[5], otherSchema);
+ ASSERT_SERIALIZABLES_EQ(result[6], thirdSchema);
+}
+
+TEST(TPrepareOperationTest, NoSchema)
+{
+ auto schema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("some_column").Type(EValueType::VT_UINT64));
+
+ TDummyInferenceContext context(3,4);
+ TJobOperationPreparer builder(context);
+
+ builder
+ .OutputSchema(1, schema)
+ .NoOutputSchema(0)
+ .BeginOutputGroup(2, 4)
+ .Schema(schema)
+ .EndOutputGroup();
+
+ EXPECT_THROW(builder.OutputSchema(0, schema), TApiUsageError);
+
+ builder.Finish();
+ auto result = builder.GetOutputSchemas();
+
+ EXPECT_TRUE(result[0].Empty());
+
+ ASSERT_SERIALIZABLES_EQ(result[1], schema);
+ ASSERT_SERIALIZABLES_EQ(result[2], schema);
+ ASSERT_SERIALIZABLES_EQ(result[3], schema);
+}
+
+TEST(TPrepareOperationTest, Descriptions)
+{
+ auto urlRowSchema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("Host").Type(NTi::Optional(NTi::String())))
+ .AddColumn(TColumnSchema().Name("Path").Type(NTi::Optional(NTi::String())))
+ .AddColumn(TColumnSchema().Name("HttpCode").Type(NTi::Optional(NTi::Int32())));
+
+ auto urlRowStruct = NTi::Struct({
+ {"Host", NTi::Optional(NTi::String())},
+ {"Path", NTi::Optional(NTi::String())},
+ {"HttpCode", NTi::Optional(NTi::Int32())},
+ });
+
+ auto rowFieldSerializationOptionSchema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("UrlRow_1").Type(NTi::Optional(urlRowStruct)))
+ .AddColumn(TColumnSchema().Name("UrlRow_2").Type(NTi::Optional(NTi::String())));
+
+ auto rowSerializedRepeatedFieldsSchema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("Ints").Type(NTi::List(NTi::Int64())))
+ .AddColumn(TColumnSchema().Name("UrlRows").Type(NTi::List(urlRowStruct)));
+
+ TDummyInferenceContext context(5,7);
+ TJobOperationPreparer builder(context);
+
+ builder
+ .InputDescription<TUrlRow>(0)
+ .BeginInputGroup(2, 3)
+ .Description<TUrlRow>()
+ .EndInputGroup()
+ .BeginInputGroup(TVector<int>{1, 4})
+ .Description<TRowSerializedRepeatedFields>()
+ .EndInputGroup()
+ .InputDescription<TUrlRow>(3);
+
+ EXPECT_THROW(builder.InputDescription<TUrlRow>(0), TApiUsageError);
+
+ builder
+ .OutputDescription<TUrlRow>(0, false)
+ .OutputDescription<TRowFieldSerializationOption>(1)
+ .BeginOutputGroup(2, 4)
+ .Description<TUrlRow>()
+ .EndOutputGroup()
+ .BeginOutputGroup(TVector<int>{4,6})
+ .Description<TRowSerializedRepeatedFields>()
+ .EndOutputGroup()
+ .OutputDescription<TUrlRow>(5, false);
+
+ EXPECT_THROW(builder.OutputDescription<TUrlRow>(0), TApiUsageError);
+ EXPECT_NO_THROW(builder.OutputSchema(0, urlRowSchema));
+ EXPECT_NO_THROW(builder.OutputSchema(5, urlRowSchema));
+ EXPECT_THROW(builder.OutputSchema(1, urlRowSchema), TApiUsageError);
+
+ builder.Finish();
+ auto result = builder.GetOutputSchemas();
+
+ ASSERT_SERIALIZABLES_EQ(result[0], urlRowSchema);
+ ASSERT_SERIALIZABLES_EQ(result[1], rowFieldSerializationOptionSchema);
+ ASSERT_SERIALIZABLES_EQ(result[2], urlRowSchema);
+ ASSERT_SERIALIZABLES_EQ(result[3], urlRowSchema);
+ ASSERT_SERIALIZABLES_EQ(result[4], rowSerializedRepeatedFieldsSchema);
+ ASSERT_SERIALIZABLES_EQ(result[5], urlRowSchema);
+ ASSERT_SERIALIZABLES_EQ(result[6], rowSerializedRepeatedFieldsSchema);
+
+ auto expectedInputDescriptions = TVector<TMaybe<TTableStructure>>{
+ {TProtobufTableStructure{TUrlRow::descriptor()}},
+ {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}},
+ {TProtobufTableStructure{TUrlRow::descriptor()}},
+ {TProtobufTableStructure{TUrlRow::descriptor()}},
+ {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}},
+ };
+ EXPECT_EQ(expectedInputDescriptions, builder.GetInputDescriptions());
+
+ auto expectedOutputDescriptions = TVector<TMaybe<TTableStructure>>{
+ {TProtobufTableStructure{TUrlRow::descriptor()}},
+ {TProtobufTableStructure{TRowFieldSerializationOption::descriptor()}},
+ {TProtobufTableStructure{TUrlRow::descriptor()}},
+ {TProtobufTableStructure{TUrlRow::descriptor()}},
+ {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}},
+ {TProtobufTableStructure{TUrlRow::descriptor()}},
+ {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}},
+ };
+ EXPECT_EQ(expectedOutputDescriptions, builder.GetOutputDescriptions());
+}
+
+TEST(TPrepareOperationTest, InputColumns)
+{
+ TDummyInferenceContext context(5, 1);
+ TJobOperationPreparer builder(context);
+ builder
+ .InputColumnFilter(2, {"a", "b"})
+ .BeginInputGroup(0, 2)
+ .ColumnFilter({"b", "c"})
+ .ColumnRenaming({{"b", "B"}, {"c", "C"}})
+ .EndInputGroup()
+ .InputColumnRenaming(3, {{"a", "AAA"}})
+ .NoOutputSchema(0);
+ builder.Finish();
+
+ auto expectedRenamings = TVector<THashMap<TString, TString>>{
+ {{"b", "B"}, {"c", "C"}},
+ {{"b", "B"}, {"c", "C"}},
+ {},
+ {{"a", "AAA"}},
+ {},
+ };
+ EXPECT_EQ(builder.GetInputColumnRenamings(), expectedRenamings);
+
+ auto expectedFilters = TVector<TMaybe<TVector<TString>>>{
+ {{"b", "c"}},
+ {{"b", "c"}},
+ {{"a", "b"}},
+ {},
+ {},
+ };
+ EXPECT_EQ(builder.GetInputColumnFilters(), expectedFilters);
+}
+
+TEST(TPrepareOperationTest, Bug_r7349102)
+{
+ auto firstSchema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("some_column").Type(EValueType::VT_UINT64));
+ auto otherSchema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("other_column").Type(EValueType::VT_BOOLEAN));
+ auto thirdSchema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("third_column").Type(EValueType::VT_STRING));
+
+ TDummyInferenceContext context(3,1);
+ TJobOperationPreparer builder(context);
+
+ builder
+ .InputDescription<TUrlRow>(0)
+ .InputDescription<TUrlRow>(1)
+ .InputDescription<TUrlRow>(2)
+ .OutputDescription<TUrlRow>(0);
+
+ builder.Finish();
+}
+
+////////////////////////////////////////////////////////////////////
diff --git a/yt/cpp/mapreduce/interface/ut/proto3_ut.proto b/yt/cpp/mapreduce/interface/ut/proto3_ut.proto
new file mode 100644
index 00000000000..b24c13085bd
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/ut/proto3_ut.proto
@@ -0,0 +1,17 @@
+syntax = "proto3";
+
+import "yt/yt_proto/yt/formats/extension.proto";
+
+package NYT.NTestingProto3;
+
+option (NYT.file_default_field_flags) = SERIALIZATION_YT;
+
+message TWithOptional
+{
+ optional int64 x = 1;
+}
+
+message TWithOptionalMessage
+{
+ optional TWithOptional x = 1;
+}
diff --git a/yt/cpp/mapreduce/interface/ut/protobuf_file_options_ut.cpp b/yt/cpp/mapreduce/interface/ut/protobuf_file_options_ut.cpp
new file mode 100644
index 00000000000..abfe5bbfdce
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/ut/protobuf_file_options_ut.cpp
@@ -0,0 +1,270 @@
+#include "common_ut.h"
+
+#include <yt/cpp/mapreduce/interface/errors.h>
+#include <yt/cpp/mapreduce/interface/format.h>
+
+#include <yt/cpp/mapreduce/interface/ut/protobuf_file_options_ut.pb.h>
+
+#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h>
+
+#include <library/cpp/testing/gtest/gtest.h>
+
+using namespace NYT;
+
+namespace {
+
+NTi::TTypePtr GetUrlRowType(bool required)
+{
+ static const NTi::TTypePtr structType = NTi::Struct({
+ {"Host", ToTypeV3(EValueType::VT_STRING, false)},
+ {"Path", ToTypeV3(EValueType::VT_STRING, false)},
+ {"HttpCode", ToTypeV3(EValueType::VT_INT32, false)}});
+ return required ? structType : NTi::TTypePtr(NTi::Optional(structType));
+}
+
+} // namespace
+
+TEST(TProtobufFileOptionsTest, TRowFieldSerializationOption)
+{
+ const auto schema = CreateTableSchema<NTestingFileOptions::TRowFieldSerializationOption>();
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("UrlRow_1").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("UrlRow_2").Type(GetUrlRowType(false))));
+}
+
+TEST(TProtobufFileOptionsTest, TRowMixedSerializationOptions)
+{
+ const auto schema = CreateTableSchema<NTestingFileOptions::TRowMixedSerializationOptions>();
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("UrlRow_1").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("UrlRow_2").Type(GetUrlRowType(false))));
+}
+
+TEST(TProtobufFileOptionsTest, FieldSortOrder)
+{
+ const auto schema = CreateTableSchema<NTestingFileOptions::TFieldSortOrder>();
+
+ auto asInProtoFile = NTi::Optional(NTi::Struct({
+ {"x", NTi::Optional(NTi::Int64())},
+ {"y", NTi::Optional(NTi::String())},
+ {"z", NTi::Optional(NTi::Bool())},
+ }));
+ auto byFieldNumber = NTi::Optional(NTi::Struct({
+ {"z", NTi::Optional(NTi::Bool())},
+ {"x", NTi::Optional(NTi::Int64())},
+ {"y", NTi::Optional(NTi::String())},
+ }));
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("EmbeddedDefault").Type(asInProtoFile))
+ .AddColumn(TColumnSchema().Name("EmbeddedAsInProtoFile").Type(asInProtoFile))
+ .AddColumn(TColumnSchema().Name("EmbeddedByFieldNumber").Type(byFieldNumber)));
+}
+
+TEST(TProtobufFileOptionsTest, Map)
+{
+ const auto schema = CreateTableSchema<NTestingFileOptions::TWithMap>();
+
+ auto createKeyValueStruct = [] (NTi::TTypePtr key, NTi::TTypePtr value) {
+ return NTi::List(NTi::Struct({
+ {"key", NTi::Optional(key)},
+ {"value", NTi::Optional(value)},
+ }));
+ };
+
+ auto embedded = NTi::Struct({
+ {"x", NTi::Optional(NTi::Int64())},
+ {"y", NTi::Optional(NTi::String())},
+ });
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema()
+ .Name("MapDefault")
+ .Type(createKeyValueStruct(NTi::Int64(), embedded)))
+ .AddColumn(TColumnSchema()
+ .Name("MapDict")
+ .Type(NTi::Dict(NTi::Int64(), embedded))));
+}
+
+TEST(TProtobufFileOptionsTest, Oneof)
+{
+ const auto schema = CreateTableSchema<NTestingFileOptions::TWithOneof>();
+
+ auto embedded = NTi::Struct({
+ {"x", NTi::Optional(NTi::Int64())},
+ {"y", NTi::Optional(NTi::String())},
+ });
+
+ auto defaultVariantType = NTi::Optional(NTi::Struct({
+ {"field", NTi::Optional(NTi::String())},
+ {"Oneof2", NTi::Optional(NTi::Variant(NTi::Struct({
+ {"y2", NTi::String()},
+ {"z2", embedded},
+ {"x2", NTi::Int64()},
+ })))},
+ {"x1", NTi::Optional(NTi::Int64())},
+ {"y1", NTi::Optional(NTi::String())},
+ {"z1", NTi::Optional(embedded)},
+ }));
+
+ auto noDefaultType = NTi::Optional(NTi::Struct({
+ {"field", NTi::Optional(NTi::String())},
+ {"y2", NTi::Optional(NTi::String())},
+ {"z2", NTi::Optional(embedded)},
+ {"x2", NTi::Optional(NTi::Int64())},
+ {"x1", NTi::Optional(NTi::Int64())},
+ {"y1", NTi::Optional(NTi::String())},
+ {"z1", NTi::Optional(embedded)},
+ }));
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema()
+ .Name("DefaultVariant")
+ .Type(defaultVariantType)
+ )
+ .AddColumn(TColumnSchema()
+ .Name("NoDefault")
+ .Type(noDefaultType)
+ )
+ .AddColumn(TColumnSchema()
+ .Name("SerializationProtobuf")
+ .Type(NTi::Optional(NTi::Struct({
+ {"x1", NTi::Optional(NTi::Int64())},
+ {"y1", NTi::Optional(NTi::String())},
+ {"z1", NTi::Optional(NTi::String())},
+ })))
+ )
+ .AddColumn(TColumnSchema()
+ .Name("MemberOfTopLevelOneof")
+ .Type(NTi::Optional(NTi::Int64()))
+ )
+ );
+}
+
+static TNode GetColumns(const TFormat& format, int tableIndex = 0)
+{
+ return format.Config.GetAttributes()["tables"][tableIndex]["columns"];
+}
+
+TEST(TProtobufFormatFileOptionsTest, TRowFieldSerializationOption)
+{
+ const auto format = TFormat::Protobuf<NTestingFileOptions::TRowFieldSerializationOption>();
+ auto columns = GetColumns(format);
+
+ EXPECT_EQ(columns[0]["name"], "UrlRow_1");
+ EXPECT_EQ(columns[0]["proto_type"], "message");
+ EXPECT_EQ(columns[0]["field_number"], 1);
+
+ EXPECT_EQ(columns[1]["name"], "UrlRow_2");
+ EXPECT_EQ(columns[1]["proto_type"], "structured_message");
+ EXPECT_EQ(columns[1]["field_number"], 2);
+ const auto& fields = columns[1]["fields"];
+ EXPECT_EQ(fields[0]["name"], "Host");
+ EXPECT_EQ(fields[0]["proto_type"], "string");
+ EXPECT_EQ(fields[0]["field_number"], 1);
+
+ EXPECT_EQ(fields[1]["name"], "Path");
+ EXPECT_EQ(fields[1]["proto_type"], "string");
+ EXPECT_EQ(fields[1]["field_number"], 2);
+
+ EXPECT_EQ(fields[2]["name"], "HttpCode");
+ EXPECT_EQ(fields[2]["proto_type"], "sint32");
+ EXPECT_EQ(fields[2]["field_number"], 3);
+}
+
+TEST(TProtobufFormatFileOptionsTest, Map)
+{
+ const auto format = TFormat::Protobuf<NTestingFileOptions::TWithMap>();
+ auto columns = GetColumns(format);
+
+ EXPECT_EQ(columns.Size(), 2u);
+ {
+ const auto& column = columns[0];
+ EXPECT_EQ(column["name"], "MapDefault");
+ EXPECT_EQ(column["proto_type"], "structured_message");
+ EXPECT_EQ(column["fields"].Size(), 2u);
+ EXPECT_EQ(column["fields"][0]["proto_type"], "int64");
+ EXPECT_EQ(column["fields"][1]["proto_type"], "structured_message");
+ }
+ {
+ const auto& column = columns[1];
+ EXPECT_EQ(column["name"], "MapDict");
+ EXPECT_EQ(column["proto_type"], "structured_message");
+ EXPECT_EQ(column["fields"].Size(), 2u);
+ EXPECT_EQ(column["fields"][0]["proto_type"], "int64");
+ EXPECT_EQ(column["fields"][1]["proto_type"], "structured_message");
+ }
+}
+
+TEST(TProtobufFormatFileOptionsTest, Oneof)
+{
+ const auto format = TFormat::Protobuf<NTestingFileOptions::TWithOneof>();
+ auto columns = GetColumns(format);
+
+ EXPECT_EQ(columns.Size(), 4u);
+
+ {
+ const auto& column = columns[0];
+ EXPECT_EQ(column["name"], "DefaultVariant");
+ EXPECT_EQ(column["proto_type"], "structured_message");
+ EXPECT_EQ(column["fields"].Size(), 5u);
+ EXPECT_EQ(column["fields"][0]["name"], "field");
+
+ const auto& oneof2 = column["fields"][1];
+ EXPECT_EQ(oneof2["name"], "Oneof2");
+ EXPECT_EQ(oneof2["proto_type"], "oneof");
+ EXPECT_EQ(oneof2["fields"][0]["name"], "y2");
+ EXPECT_EQ(oneof2["fields"][1]["name"], "z2");
+ EXPECT_EQ(oneof2["fields"][1]["proto_type"], "structured_message");
+ const auto& embeddedFields = oneof2["fields"][1]["fields"];
+ EXPECT_EQ(embeddedFields[0]["name"], "x");
+ EXPECT_EQ(embeddedFields[1]["name"], "y");
+
+ EXPECT_EQ(oneof2["fields"][2]["name"], "x2");
+
+ EXPECT_EQ(column["fields"][2]["name"], "x1");
+ EXPECT_EQ(column["fields"][3]["name"], "y1");
+ EXPECT_EQ(column["fields"][4]["name"], "z1");
+ };
+
+ {
+ const auto& column = columns[1];
+ EXPECT_EQ(column["name"], "NoDefault");
+ EXPECT_EQ(column["proto_type"], "structured_message");
+ const auto& fields = column["fields"];
+ EXPECT_EQ(fields.Size(), 7u);
+
+ EXPECT_EQ(fields[0]["name"], "field");
+
+ EXPECT_EQ(fields[1]["name"], "y2");
+
+ EXPECT_EQ(fields[2]["name"], "z2");
+ EXPECT_EQ(fields[2]["proto_type"], "structured_message");
+ const auto& embeddedFields = fields[2]["fields"];
+ EXPECT_EQ(embeddedFields[0]["name"], "x");
+ EXPECT_EQ(embeddedFields[1]["name"], "y");
+
+ EXPECT_EQ(fields[3]["name"], "x2");
+
+ EXPECT_EQ(fields[4]["name"], "x1");
+ EXPECT_EQ(fields[5]["name"], "y1");
+ EXPECT_EQ(fields[6]["name"], "z1");
+ };
+
+ {
+ const auto& column = columns[2];
+ EXPECT_EQ(column["name"], "SerializationProtobuf");
+ EXPECT_EQ(column["proto_type"], "structured_message");
+ EXPECT_EQ(column["fields"].Size(), 3u);
+ EXPECT_EQ(column["fields"][0]["name"], "x1");
+ EXPECT_EQ(column["fields"][1]["name"], "y1");
+ EXPECT_EQ(column["fields"][2]["name"], "z1");
+ }
+ {
+ const auto& column = columns[3];
+ EXPECT_EQ(column["name"], "MemberOfTopLevelOneof");
+ EXPECT_EQ(column["proto_type"], "int64");
+ }
+}
diff --git a/yt/cpp/mapreduce/interface/ut/protobuf_file_options_ut.proto b/yt/cpp/mapreduce/interface/ut/protobuf_file_options_ut.proto
new file mode 100644
index 00000000000..4804b2f60c1
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/ut/protobuf_file_options_ut.proto
@@ -0,0 +1,142 @@
+import "yt/yt_proto/yt/formats/extension.proto";
+
+package NYT.NTestingFileOptions;
+
+option (NYT.file_default_field_flags) = SERIALIZATION_YT;
+option (NYT.file_default_field_flags) = MAP_AS_LIST_OF_STRUCTS;
+option (NYT.file_default_message_flags) = DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE;
+option (NYT.file_default_oneof_flags) = SEPARATE_FIELDS;
+
+message TUrlRow
+{
+ optional string Host = 1 [(NYT.column_name) = "Host"];
+ optional string Path = 2 [(NYT.column_name) = "Path"];
+ optional sint32 HttpCode = 3 [(NYT.column_name) = "HttpCode"];
+}
+
+message TRowFieldSerializationOption
+{
+ optional TUrlRow UrlRow_1 = 1 [(NYT.flags) = SERIALIZATION_PROTOBUF];
+ optional TUrlRow UrlRow_2 = 2;
+}
+
+message TRowMixedSerializationOptions
+{
+ option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF;
+ optional TUrlRow UrlRow_1 = 1;
+ optional TUrlRow UrlRow_2 = 2 [(NYT.flags) = SERIALIZATION_YT];
+}
+
+message TRowSerializedRepeatedFields
+{
+ repeated int64 Ints = 1;
+ repeated TUrlRow UrlRows = 2;
+}
+
+message TFieldSortOrder
+{
+ message TEmbeddedDefault {
+ optional int64 x = 2;
+ optional string y = 12;
+ optional bool z = 1;
+ }
+ message TEmbeddedAsInProtoFile {
+ option (NYT.message_flags) = DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE;
+ optional int64 x = 2;
+ optional string y = 12;
+ optional bool z = 1;
+ }
+ message TEmbeddedByFieldNumber {
+ option (NYT.message_flags) = SORT_FIELDS_BY_FIELD_NUMBER;
+ optional int64 x = 2;
+ optional string y = 12;
+ optional bool z = 1;
+ }
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ optional TEmbeddedDefault EmbeddedDefault = 1;
+ optional TEmbeddedAsInProtoFile EmbeddedAsInProtoFile = 2;
+ optional TEmbeddedByFieldNumber EmbeddedByFieldNumber = 3;
+}
+
+message TWithMap
+{
+ message TEmbedded {
+ optional int64 x = 1;
+ optional string y = 2;
+ }
+
+ map<int64, TEmbedded> MapDefault = 1;
+ map<int64, TEmbedded> MapDict = 5 [(NYT.flags) = MAP_AS_DICT];
+}
+
+message TWithOneof
+{
+ message TEmbedded
+ {
+ oneof Oneof {
+ int64 x = 1;
+ string y = 2;
+ }
+ }
+
+ message TDefaultVariant
+ {
+ option (NYT.default_oneof_flags) = VARIANT;
+ optional string field = 1;
+
+ oneof Oneof2
+ {
+ string y2 = 4;
+ TEmbedded z2 = 6;
+ int64 x2 = 2;
+ }
+
+ oneof Oneof1
+ {
+ option (NYT.oneof_flags) = SEPARATE_FIELDS;
+ int64 x1 = 10;
+ string y1 = 3;
+ TEmbedded z1 = 5;
+ }
+ }
+
+ message TNoDefault
+ {
+ optional string field = 1;
+
+ oneof Oneof2
+ {
+ string y2 = 4;
+ TEmbedded z2 = 6;
+ int64 x2 = 2;
+ }
+
+ oneof Oneof1
+ {
+ int64 x1 = 10;
+ string y1 = 3;
+ TEmbedded z1 = 5;
+ }
+ }
+
+ message TSerializationProtobuf
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF;
+ oneof Oneof
+ {
+ int64 x1 = 2;
+ string y1 = 1;
+ TEmbedded z1 = 3;
+ }
+ }
+
+ optional TDefaultVariant DefaultVariant = 1;
+ optional TNoDefault NoDefault = 2;
+ optional TSerializationProtobuf SerializationProtobuf = 3;
+
+ oneof TopLevelOneof
+ {
+ int64 MemberOfTopLevelOneof = 4;
+ }
+}
diff --git a/yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.cpp b/yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.cpp
new file mode 100644
index 00000000000..d7bee1e6d20
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.cpp
@@ -0,0 +1,444 @@
+#include "common_ut.h"
+
+#include <yt/cpp/mapreduce/interface/common.h>
+#include <yt/cpp/mapreduce/interface/errors.h>
+
+#include <yt/cpp/mapreduce/interface/ut/proto3_ut.pb.h>
+#include <yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.pb.h>
+
+#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h>
+
+#include <library/cpp/testing/gtest/gtest.h>
+
+#include <util/generic/fwd.h>
+
+#include <algorithm>
+
+using namespace NYT;
+
+bool IsFieldPresent(const TTableSchema& schema, TStringBuf name)
+{
+ for (const auto& field : schema.Columns()) {
+ if (field.Name() == name) {
+ return true;
+ }
+ }
+ return false;
+}
+
+TEST(TProtoSchemaSimpleTest, TIntegral)
+{
+ const auto schema = CreateTableSchema<NUnitTesting::TIntegral>();
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("DoubleField").Type(ToTypeV3(EValueType::VT_DOUBLE, false)))
+ .AddColumn(TColumnSchema().Name("FloatField").Type(ToTypeV3(EValueType::VT_DOUBLE, false)))
+ .AddColumn(TColumnSchema().Name("Int32Field").Type(ToTypeV3(EValueType::VT_INT32, false)))
+ .AddColumn(TColumnSchema().Name("Int64Field").Type(ToTypeV3(EValueType::VT_INT64, false)))
+ .AddColumn(TColumnSchema().Name("Uint32Field").Type(ToTypeV3(EValueType::VT_UINT32, false)))
+ .AddColumn(TColumnSchema().Name("Uint64Field").Type(ToTypeV3(EValueType::VT_UINT64, false)))
+ .AddColumn(TColumnSchema().Name("Sint32Field").Type(ToTypeV3(EValueType::VT_INT32, false)))
+ .AddColumn(TColumnSchema().Name("Sint64Field").Type(ToTypeV3(EValueType::VT_INT64, false)))
+ .AddColumn(TColumnSchema().Name("Fixed32Field").Type(ToTypeV3(EValueType::VT_UINT32, false)))
+ .AddColumn(TColumnSchema().Name("Fixed64Field").Type(ToTypeV3(EValueType::VT_UINT64, false)))
+ .AddColumn(TColumnSchema().Name("Sfixed32Field").Type(ToTypeV3(EValueType::VT_INT32, false)))
+ .AddColumn(TColumnSchema().Name("Sfixed64Field").Type(ToTypeV3(EValueType::VT_INT64, false)))
+ .AddColumn(TColumnSchema().Name("BoolField").Type(ToTypeV3(EValueType::VT_BOOLEAN, false)))
+ .AddColumn(TColumnSchema().Name("EnumField").Type(ToTypeV3(EValueType::VT_STRING, false))));
+}
+
+TEST(TProtoSchemaSimpleTest, TOneOf)
+{
+ const auto schema = CreateTableSchema<NUnitTesting::TOneOf>();
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("DoubleField").Type(ToTypeV3(EValueType::VT_DOUBLE, false)))
+ .AddColumn(TColumnSchema().Name("Int32Field").Type(ToTypeV3(EValueType::VT_INT32, false)))
+ .AddColumn(TColumnSchema().Name("BoolField").Type(ToTypeV3(EValueType::VT_BOOLEAN, false))));
+}
+
+TEST(TProtoSchemaSimpleTest, TWithRequired)
+{
+ const auto schema = CreateTableSchema<NUnitTesting::TWithRequired>();
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("RequiredField").Type(ToTypeV3(EValueType::VT_STRING, true)))
+ .AddColumn(TColumnSchema().Name("NotRequiredField").Type(ToTypeV3(EValueType::VT_STRING, false))));
+}
+
+TEST(TProtoSchemaSimpleTest, TAggregated)
+{
+ const auto schema = CreateTableSchema<NUnitTesting::TAggregated>();
+
+ EXPECT_EQ(6u, schema.Columns().size());
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("StringField").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("BytesField").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("NestedField").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("NestedRepeatedField").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("NestedOneOfField").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("NestedRecursiveField").Type(ToTypeV3(EValueType::VT_STRING, false))));
+}
+
+TEST(TProtoSchemaSimpleTest, TAliased)
+{
+ const auto schema = CreateTableSchema<NUnitTesting::TAliased>();
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("key").Type(ToTypeV3(EValueType::VT_INT32, false)))
+ .AddColumn(TColumnSchema().Name("subkey").Type(ToTypeV3(EValueType::VT_DOUBLE, false)))
+ .AddColumn(TColumnSchema().Name("Data").Type(ToTypeV3(EValueType::VT_STRING, false))));
+}
+
+TEST(TProtoSchemaSimpleTest, SortColumns)
+{
+ const TSortColumns keys = {"key", "subkey"};
+
+ const auto schema = CreateTableSchema<NUnitTesting::TAliased>(keys);
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema()
+ .Name("key")
+ .Type(ToTypeV3(EValueType::VT_INT32, false))
+ .SortOrder(ESortOrder::SO_ASCENDING))
+ .AddColumn(TColumnSchema()
+ .Name("subkey")
+ .Type(ToTypeV3(EValueType::VT_DOUBLE, false))
+ .SortOrder(ESortOrder::SO_ASCENDING))
+ .AddColumn(TColumnSchema().Name("Data").Type(ToTypeV3(EValueType::VT_STRING, false))));
+}
+
+TEST(TProtoSchemaSimpleTest, SortColumnsReordered)
+{
+ const TSortColumns keys = {"subkey"};
+
+ const auto schema = CreateTableSchema<NUnitTesting::TAliased>(keys);
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema()
+ .Name("subkey")
+ .Type(ToTypeV3(EValueType::VT_DOUBLE, false))
+ .SortOrder(ESortOrder::SO_ASCENDING))
+ .AddColumn(TColumnSchema().Name("key").Type(ToTypeV3(EValueType::VT_INT32, false)))
+ .AddColumn(TColumnSchema().Name("Data").Type(ToTypeV3(EValueType::VT_STRING, false))));
+}
+
+TEST(TProtoSchemaSimpleTest, SortColumnsInvalid)
+{
+ EXPECT_THROW(CreateTableSchema<NUnitTesting::TAliased>({"subkey", "subkey"}), yexception);
+ EXPECT_THROW(CreateTableSchema<NUnitTesting::TAliased>({"key", "junk"}), yexception);
+}
+
+TEST(TProtoSchemaSimpleTest, KeepFieldsWithoutExtensionTrue)
+{
+ const auto schema = CreateTableSchema<NUnitTesting::TAliased>({}, true);
+ EXPECT_TRUE(IsFieldPresent(schema, "key"));
+ EXPECT_TRUE(IsFieldPresent(schema, "subkey"));
+ EXPECT_TRUE(IsFieldPresent(schema, "Data"));
+ EXPECT_TRUE(schema.Strict());
+}
+
+TEST(TProtoSchemaSimpleTest, KeepFieldsWithoutExtensionFalse)
+{
+ const auto schema = CreateTableSchema<NUnitTesting::TAliased>({}, false);
+ EXPECT_TRUE(IsFieldPresent(schema, "key"));
+ EXPECT_TRUE(IsFieldPresent(schema, "subkey"));
+ EXPECT_TRUE(!IsFieldPresent(schema, "Data"));
+ EXPECT_TRUE(schema.Strict());
+}
+
+TEST(TProtoSchemaSimpleTest, ProtobufTypeOption)
+{
+ const auto schema = CreateTableSchema<NUnitTesting::TWithTypeOptions>({});
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .Strict(false)
+ .AddColumn(TColumnSchema().Name("ColorIntField").Type(ToTypeV3(EValueType::VT_INT64, false)))
+ .AddColumn(TColumnSchema().Name("ColorStringField").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("AnyField").Type(ToTypeV3(EValueType::VT_ANY, false)))
+ .AddColumn(TColumnSchema().Name("EmbeddedField").Type(
+ NTi::Optional(NTi::Struct({
+ {"ColorIntField", ToTypeV3(EValueType::VT_INT64, false)},
+ {"ColorStringField", ToTypeV3(EValueType::VT_STRING, false)},
+ {"AnyField", ToTypeV3(EValueType::VT_ANY, false)}}))))
+ .AddColumn(TColumnSchema().Name("RepeatedEnumIntField").Type(NTi::List(NTi::Int64()))));
+}
+
+TEST(TProtoSchemaSimpleTest, ProtobufTypeOption_TypeMismatch)
+{
+ EXPECT_THROW(
+ CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_EnumInt>({}),
+ yexception);
+ EXPECT_THROW(
+ CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_EnumString>({}),
+ yexception);
+ EXPECT_THROW(
+ CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_Any>({}),
+ yexception);
+ EXPECT_THROW(
+ CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_OtherColumns>({}),
+ yexception);
+}
+
+NTi::TTypePtr GetUrlRowType_ColumnNames(bool required)
+{
+ static const NTi::TTypePtr type = NTi::Struct({
+ {"Host_ColumnName", ToTypeV3(EValueType::VT_STRING, false)},
+ {"Path_KeyColumnName", ToTypeV3(EValueType::VT_STRING, false)},
+ {"HttpCode", ToTypeV3(EValueType::VT_INT32, false)},
+ });
+ return required ? type : NTi::TTypePtr(NTi::Optional(type));
+}
+
+TEST(TProtoSchemaComplexTest, TRepeated)
+{
+ EXPECT_THROW(CreateTableSchema<NUnitTesting::TRepeated>(), yexception);
+
+ const auto schema = CreateTableSchema<NUnitTesting::TRepeatedYtMode>();
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("Int32Field").Type(NTi::List(ToTypeV3(EValueType::VT_INT32, true)))));
+}
+
+TEST(TProtoSchemaComplexTest, TRepeatedOptionalList)
+{
+ const auto schema = CreateTableSchema<NUnitTesting::TOptionalList>();
+ auto type = NTi::Optional(NTi::List(NTi::Int64()));
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("OptionalListInt64").TypeV3(type)));
+}
+
+NTi::TTypePtr GetUrlRowType(bool required)
+{
+ static const NTi::TTypePtr structType = NTi::Struct({
+ {"Host", ToTypeV3(EValueType::VT_STRING, false)},
+ {"Path", ToTypeV3(EValueType::VT_STRING, false)},
+ {"HttpCode", ToTypeV3(EValueType::VT_INT32, false)}});
+ return required ? structType : NTi::TTypePtr(NTi::Optional(structType));
+}
+
+TEST(TProtoSchemaComplexTest, TRowFieldSerializationOption)
+{
+ const auto schema = CreateTableSchema<NUnitTesting::TRowFieldSerializationOption>();
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType(false)))
+ .AddColumn(TColumnSchema().Name("UrlRow_2").Type(ToTypeV3(EValueType::VT_STRING, false))));
+}
+
+TEST(TProtoSchemaComplexTest, TRowMessageSerializationOption)
+{
+ const auto schema = CreateTableSchema<NUnitTesting::TRowMessageSerializationOption>();
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType(false)))
+ .AddColumn(TColumnSchema().Name("UrlRow_2").Type(GetUrlRowType(false))));
+}
+
+TEST(TProtoSchemaComplexTest, TRowMixedSerializationOptions)
+{
+ const auto schema = CreateTableSchema<NUnitTesting::TRowMixedSerializationOptions>();
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType(false)))
+ .AddColumn(TColumnSchema().Name("UrlRow_2").Type(ToTypeV3(EValueType::VT_STRING, false))));
+}
+
+TEST(TProtoSchemaComplexTest, TRowMixedSerializationOptions_ColumnNames)
+{
+ const auto schema = CreateTableSchema<NUnitTesting::TRowMixedSerializationOptions_ColumnNames>();
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType_ColumnNames(false)))
+ .AddColumn(TColumnSchema().Name("UrlRow_2").Type(ToTypeV3(EValueType::VT_STRING, false))));
+}
+
+TEST(TProtoSchemaComplexTest, NoOptionInheritance)
+{
+ auto deepestEmbedded = NTi::Optional(NTi::Struct({{"x", ToTypeV3(EValueType::VT_INT64, false)}}));
+
+ const auto schema = CreateTableSchema<NUnitTesting::TNoOptionInheritance>();
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema()
+ .Name("EmbeddedYt_YtOption")
+ .Type(NTi::Optional(NTi::Struct({{"embedded", deepestEmbedded}}))))
+ .AddColumn(TColumnSchema().Name("EmbeddedYt_ProtobufOption").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("EmbeddedYt_NoOption").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema()
+ .Name("EmbeddedProtobuf_YtOption")
+ .Type(NTi::Optional(NTi::Struct({{"embedded", ToTypeV3(EValueType::VT_STRING, false)}}))))
+ .AddColumn(TColumnSchema().Name("EmbeddedProtobuf_ProtobufOption").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("EmbeddedProtobuf_NoOption").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema()
+ .Name("Embedded_YtOption")
+ .Type(NTi::Optional(NTi::Struct({{"embedded", ToTypeV3(EValueType::VT_STRING, false)}}))))
+ .AddColumn(TColumnSchema().Name("Embedded_ProtobufOption").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("Embedded_NoOption").Type(ToTypeV3(EValueType::VT_STRING, false))));
+}
+
+TEST(TProtoSchemaComplexTest, Cyclic)
+{
+ EXPECT_THROW(CreateTableSchema<NUnitTesting::TCyclic>(), TApiUsageError);
+ EXPECT_THROW(CreateTableSchema<NUnitTesting::TCyclic::TA>(), TApiUsageError);
+ EXPECT_THROW(CreateTableSchema<NUnitTesting::TCyclic::TB>(), TApiUsageError);
+ EXPECT_THROW(CreateTableSchema<NUnitTesting::TCyclic::TC>(), TApiUsageError);
+ EXPECT_THROW(CreateTableSchema<NUnitTesting::TCyclic::TD>(), TApiUsageError);
+
+ ASSERT_SERIALIZABLES_EQ(
+ TTableSchema().AddColumn(
+ TColumnSchema().Name("d").TypeV3(NTi::Optional(NTi::String()))),
+ CreateTableSchema<NUnitTesting::TCyclic::TE>());
+}
+
+TEST(TProtoSchemaComplexTest, FieldSortOrder)
+{
+ const auto schema = CreateTableSchema<NUnitTesting::TFieldSortOrder>();
+
+ auto byFieldNumber = NTi::Optional(NTi::Struct({
+ {"z", NTi::Optional(NTi::Bool())},
+ {"x", NTi::Optional(NTi::Int64())},
+ {"y", NTi::Optional(NTi::String())},
+ }));
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("EmbeddedDefault").Type(byFieldNumber))
+ .AddColumn(TColumnSchema()
+ .Name("EmbeddedAsInProtoFile")
+ .Type(NTi::Optional(NTi::Struct({
+ {"x", NTi::Optional(NTi::Int64())},
+ {"y", NTi::Optional(NTi::String())},
+ {"z", NTi::Optional(NTi::Bool())},
+ }))))
+ .AddColumn(TColumnSchema().Name("EmbeddedByFieldNumber").Type(byFieldNumber)));
+}
+
+TEST(TProtoSchemaComplexTest, Map)
+{
+ const auto schema = CreateTableSchema<NUnitTesting::TWithMap>();
+
+ auto createKeyValueStruct = [] (NTi::TTypePtr key, NTi::TTypePtr value) {
+ return NTi::List(NTi::Struct({
+ {"key", NTi::Optional(key)},
+ {"value", NTi::Optional(value)},
+ }));
+ };
+
+ auto embedded = NTi::Struct({
+ {"x", NTi::Optional(NTi::Int64())},
+ {"y", NTi::Optional(NTi::String())},
+ });
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema()
+ .Name("MapDefault")
+ .Type(createKeyValueStruct(NTi::Int64(), NTi::String())))
+ .AddColumn(TColumnSchema()
+ .Name("MapListOfStructsLegacy")
+ .Type(createKeyValueStruct(NTi::Int64(), NTi::String())))
+ .AddColumn(TColumnSchema()
+ .Name("MapListOfStructs")
+ .Type(createKeyValueStruct(NTi::Int64(), embedded)))
+ .AddColumn(TColumnSchema()
+ .Name("MapOptionalDict")
+ .Type(NTi::Optional(NTi::Dict(NTi::Int64(), embedded))))
+ .AddColumn(TColumnSchema()
+ .Name("MapDict")
+ .Type(NTi::Dict(NTi::Int64(), embedded))));
+}
+
+TEST(TProtoSchemaComplexTest, Oneof)
+{
+ const auto schema = CreateTableSchema<NUnitTesting::TWithOneof>();
+
+ auto embedded = NTi::Struct({
+ {"Oneof", NTi::Optional(NTi::Variant(NTi::Struct({
+ {"x", NTi::Int64()},
+ {"y", NTi::String()},
+ })))},
+ });
+
+ auto createType = [&] (TString oneof2Name) {
+ return NTi::Optional(NTi::Struct({
+ {"field", NTi::Optional(NTi::String())},
+ {oneof2Name, NTi::Optional(NTi::Variant(NTi::Struct({
+ {"x2", NTi::Int64()},
+ {"y2", NTi::String()},
+ {"z2", embedded},
+ })))},
+ {"y1", NTi::Optional(NTi::String())},
+ {"z1", NTi::Optional(embedded)},
+ {"x1", NTi::Optional(NTi::Int64())},
+ }));
+ };
+
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema()
+ .Name("DefaultSeparateFields")
+ .Type(createType("variant_field_name")))
+ .AddColumn(TColumnSchema()
+ .Name("NoDefault")
+ .Type(createType("Oneof2")))
+ .AddColumn(TColumnSchema()
+ .Name("SerializationProtobuf")
+ .Type(NTi::Optional(NTi::Struct({
+ {"y1", NTi::Optional(NTi::String())},
+ {"x1", NTi::Optional(NTi::Int64())},
+ {"z1", NTi::Optional(NTi::String())},
+ }))))
+ .AddColumn(TColumnSchema()
+ .Name("TopLevelOneof")
+ .Type(
+ NTi::Optional(
+ NTi::Variant(NTi::Struct({
+ {"MemberOfTopLevelOneof", NTi::Int64()}
+ }))
+ )
+ ))
+ );
+}
+
+TEST(TProtoSchemaComplexTest, Embedded)
+{
+ const auto schema = CreateTableSchema<NUnitTesting::TEmbeddingMessage>();
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .Strict(false)
+ .AddColumn(TColumnSchema().Name("embedded2_num").Type(NTi::Optional(NTi::Uint64())))
+ .AddColumn(TColumnSchema().Name("embedded2_struct").Type(NTi::Optional(NTi::Struct({
+ {"float1", NTi::Optional(NTi::Double())},
+ {"string1", NTi::Optional(NTi::String())},
+ }))))
+ .AddColumn(TColumnSchema().Name("embedded2_repeated").Type(NTi::List(NTi::String())))
+ .AddColumn(TColumnSchema().Name("embedded_num").Type(NTi::Optional(NTi::Uint64())))
+ .AddColumn(TColumnSchema().Name("embedded_extra_field").Type(NTi::Optional(NTi::String())))
+ .AddColumn(TColumnSchema().Name("variant").Type(NTi::Optional(NTi::Variant(NTi::Struct({
+ {"str_variant", NTi::String()},
+ {"uint_variant", NTi::Uint64()},
+ })))))
+ .AddColumn(TColumnSchema().Name("num").Type(NTi::Optional(NTi::Uint64())))
+ .AddColumn(TColumnSchema().Name("extra_field").Type(NTi::Optional(NTi::String())))
+ );
+}
+
+TEST(TProtoSchemaProto3Test, TWithOptional)
+{
+ const auto schema = CreateTableSchema<NTestingProto3::TWithOptional>();
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema()
+ .Name("x").Type(NTi::Optional(NTi::Int64()))
+ )
+ );
+}
+
+TEST(TProtoSchemaProto3Test, TWithOptionalMessage)
+{
+ const auto schema = CreateTableSchema<NTestingProto3::TWithOptionalMessage>();
+ ASSERT_SERIALIZABLES_EQ(schema, TTableSchema()
+ .AddColumn(TColumnSchema()
+ .Name("x").Type(
+ NTi::Optional(
+ NTi::Struct({{"x", NTi::Optional(NTi::Int64())}})
+ )
+ )
+ )
+ );
+}
diff --git a/yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.proto b/yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.proto
new file mode 100644
index 00000000000..da1e48f691b
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.proto
@@ -0,0 +1,402 @@
+import "yt/yt_proto/yt/formats/extension.proto";
+
+package NYT.NUnitTesting;
+
+message TIntegral
+{
+ optional double DoubleField = 1;
+ optional float FloatField = 2;
+ optional int32 Int32Field = 3;
+ optional int64 Int64Field = 4;
+ optional uint32 Uint32Field = 5;
+ optional uint64 Uint64Field = 6;
+ optional sint32 Sint32Field = 7;
+ optional sint64 Sint64Field = 8;
+ optional fixed32 Fixed32Field = 9;
+ optional fixed64 Fixed64Field = 10;
+ optional sfixed32 Sfixed32Field = 11;
+ optional sfixed64 Sfixed64Field = 12;
+ optional bool BoolField = 13;
+ enum TriBool
+ {
+ TRI_FALSE = 0;
+ TRI_TRUE = 1;
+ TRI_UNDEF = -1;
+ }
+ optional TriBool EnumField = 14;
+}
+
+message TRepeated
+{
+ repeated int32 Int32Field = 1;
+}
+
+message TRepeatedYtMode
+{
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ repeated int32 Int32Field = 1;
+}
+
+message TWithTypeOptions
+{
+ enum Color
+ {
+ WHITE = 0;
+ BLUE = 1;
+ RED = -1;
+ }
+
+ message TEmbedded
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ optional Color ColorIntField = 1 [(NYT.flags) = ENUM_INT];
+ optional Color ColorStringField = 2 [(NYT.flags) = ENUM_STRING];
+ optional bytes AnyField = 3 [(NYT.flags) = ANY];
+ }
+
+ optional Color ColorIntField = 1 [(NYT.flags) = ENUM_INT];
+ optional Color ColorStringField = 2 [(NYT.flags) = ENUM_STRING];
+ optional bytes AnyField = 3 [(NYT.flags) = ANY];
+ optional bytes OtherColumnsField = 4 [(NYT.flags) = OTHER_COLUMNS];
+ optional TEmbedded EmbeddedField = 5 [(NYT.flags) = SERIALIZATION_YT];
+ repeated Color RepeatedEnumIntField = 6 [(NYT.flags) = SERIALIZATION_YT, (NYT.flags) = ENUM_INT];
+}
+
+message TWithTypeOptions_TypeMismatch_EnumInt
+{
+ optional int64 EnumField = 1 [(NYT.flags) = ENUM_INT];
+}
+
+message TWithTypeOptions_TypeMismatch_EnumString
+{
+ optional string EnumField = 1 [(NYT.flags) = ENUM_STRING];
+}
+
+message TWithTypeOptions_TypeMismatch_Any
+{
+ optional string AnyField = 1 [(NYT.flags) = ANY];
+}
+
+message TWithTypeOptions_TypeMismatch_OtherColumns
+{
+ optional string OtherColumnsField = 1 [(NYT.flags) = OTHER_COLUMNS];
+}
+
+message TOneOf
+{
+ oneof Chooser
+ {
+ double DoubleField = 1;
+ int32 Int32Field = 2;
+ }
+ optional bool BoolField = 3;
+}
+
+message TWithRequired
+{
+ required string RequiredField = 1;
+ optional string NotRequiredField = 2;
+};
+
+message TAggregated
+{
+ optional string StringField = 1;
+ optional bytes BytesField = 2;
+ optional TIntegral NestedField = 3;
+ optional TRepeated NestedRepeatedField = 4;
+ optional TOneOf NestedOneOfField = 5;
+ optional TAggregated NestedRecursiveField = 6;
+}
+
+message TAliased
+{
+ optional int32 Key = 1 [(NYT.key_column_name) = "key"];
+ optional double Subkey = 2 [(NYT.key_column_name) = "subkey"];
+ optional TAggregated Data = 3;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+message TUrlRow
+{
+ optional string Host = 1 [(NYT.column_name) = "Host"];
+ optional string Path = 2 [(NYT.column_name) = "Path"];
+ optional sint32 HttpCode = 3 [(NYT.column_name) = "HttpCode"];
+}
+
+message TRowFieldSerializationOption
+{
+ optional TUrlRow UrlRow_1 = 1 [(NYT.flags) = SERIALIZATION_YT];
+ optional TUrlRow UrlRow_2 = 2;
+}
+
+message TRowMessageSerializationOption
+{
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ optional TUrlRow UrlRow_1 = 1;
+ optional TUrlRow UrlRow_2 = 2;
+}
+
+message TRowMixedSerializationOptions
+{
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ optional TUrlRow UrlRow_1 = 1;
+ optional TUrlRow UrlRow_2 = 2 [(NYT.flags) = SERIALIZATION_PROTOBUF];
+}
+
+message TRowSerializedRepeatedFields
+{
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ repeated int64 Ints = 1;
+ repeated TUrlRow UrlRows = 2;
+}
+
+message TUrlRowWithColumnNames
+{
+ optional string Host = 1 [(NYT.column_name) = "Host_ColumnName", (NYT.key_column_name) = "Host_KeyColumnName"];
+ optional string Path = 2 [(NYT.key_column_name) = "Path_KeyColumnName"];
+ optional sint32 HttpCode = 3;
+}
+
+message TRowMixedSerializationOptions_ColumnNames
+{
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ optional TUrlRowWithColumnNames UrlRow_1 = 1;
+ optional TUrlRowWithColumnNames UrlRow_2 = 2 [(NYT.flags) = SERIALIZATION_PROTOBUF];
+}
+
+message TNoOptionInheritance
+{
+ message TDeepestEmbedded
+ {
+ optional int64 x = 1;
+ }
+
+ message TEmbedded
+ {
+ optional TDeepestEmbedded embedded = 1;
+ }
+
+ message TEmbeddedYt
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ optional TDeepestEmbedded embedded = 1;
+ }
+
+ message TEmbeddedProtobuf
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF;
+
+ optional TDeepestEmbedded embedded = 1;
+ }
+
+ optional TEmbeddedYt EmbeddedYt_YtOption = 1 [(NYT.flags) = SERIALIZATION_YT];
+ optional TEmbeddedYt EmbeddedYt_ProtobufOption = 2 [(NYT.flags) = SERIALIZATION_PROTOBUF];
+ optional TEmbeddedYt EmbeddedYt_NoOption = 3;
+ optional TEmbeddedProtobuf EmbeddedProtobuf_YtOption = 4 [(NYT.flags) = SERIALIZATION_YT];
+ optional TEmbeddedProtobuf EmbeddedProtobuf_ProtobufOption = 5 [(NYT.flags) = SERIALIZATION_PROTOBUF];
+ optional TEmbeddedProtobuf EmbeddedProtobuf_NoOption = 6;
+ optional TEmbedded Embedded_YtOption = 7 [(NYT.flags) = SERIALIZATION_YT];
+ optional TEmbedded Embedded_ProtobufOption = 8 [(NYT.flags) = SERIALIZATION_PROTOBUF];
+ optional TEmbedded Embedded_NoOption = 9;
+}
+
+message TOptionalList
+{
+ repeated int64 OptionalListInt64 = 1 [(NYT.flags) = OPTIONAL_LIST, (NYT.flags) = SERIALIZATION_YT];
+}
+
+message TPacked
+{
+ repeated int64 PackedListInt64 = 1 [(NYT.flags) = SERIALIZATION_YT, packed=true];
+}
+
+message TCyclic
+{
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ message TA
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ repeated TB b = 1;
+ optional TC c = 2;
+ }
+
+ message TB
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ optional TD d = 1;
+ }
+
+ message TC
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ optional TD d = 1;
+ }
+
+ message TD
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ optional TA a = 1;
+ }
+
+ message TE
+ {
+ optional TD d = 1 [(NYT.flags) = SERIALIZATION_PROTOBUF];
+ }
+
+ optional TA a = 1;
+}
+
+message TFieldSortOrder
+{
+ message TEmbeddedDefault {
+ optional int64 x = 2;
+ optional string y = 12;
+ optional bool z = 1;
+ }
+ message TEmbeddedAsInProtoFile {
+ option (NYT.message_flags) = DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE;
+ optional int64 x = 2;
+ optional string y = 12;
+ optional bool z = 1;
+ }
+ message TEmbeddedByFieldNumber {
+ option (NYT.message_flags) = SORT_FIELDS_BY_FIELD_NUMBER;
+ optional int64 x = 2;
+ optional string y = 12;
+ optional bool z = 1;
+ }
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ optional TEmbeddedDefault EmbeddedDefault = 1;
+ optional TEmbeddedAsInProtoFile EmbeddedAsInProtoFile = 2;
+ optional TEmbeddedByFieldNumber EmbeddedByFieldNumber = 3;
+}
+
+message TWithMap
+{
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ message TEmbedded {
+ optional int64 x = 1;
+ optional string y = 2;
+ }
+
+ map<int64, TEmbedded> MapDefault = 1;
+ map<int64, TEmbedded> MapListOfStructsLegacy = 2 [(NYT.flags) = MAP_AS_LIST_OF_STRUCTS_LEGACY];
+ map<int64, TEmbedded> MapListOfStructs = 3 [(NYT.flags) = MAP_AS_LIST_OF_STRUCTS];
+ map<int64, TEmbedded> MapOptionalDict = 4 [(NYT.flags) = MAP_AS_OPTIONAL_DICT];
+ map<int64, TEmbedded> MapDict = 5 [(NYT.flags) = MAP_AS_DICT];
+}
+
+message TWithOneof
+{
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ message TEmbedded
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ oneof Oneof {
+ int64 x = 1;
+ string y = 2;
+ }
+ }
+
+ message TDefaultSeparateFields
+ {
+ option (NYT.default_oneof_flags) = SEPARATE_FIELDS;
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ optional string field = 1;
+
+ oneof Oneof2
+ {
+ option (NYT.variant_field_name) = "variant_field_name";
+ option (NYT.oneof_flags) = VARIANT;
+ string y2 = 4;
+ TEmbedded z2 = 6;
+ int64 x2 = 2;
+ }
+
+ oneof Oneof1
+ {
+ int64 x1 = 10;
+ string y1 = 3;
+ TEmbedded z1 = 5;
+ }
+ }
+
+ message TNoDefault
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ optional string field = 1;
+
+ oneof Oneof2
+ {
+ string y2 = 4;
+ TEmbedded z2 = 6;
+ int64 x2 = 2;
+ }
+
+ oneof Oneof1
+ {
+ option (NYT.oneof_flags) = SEPARATE_FIELDS;
+ int64 x1 = 10;
+ string y1 = 3;
+ TEmbedded z1 = 5;
+ }
+ }
+
+ message TSerializationProtobuf
+ {
+ oneof Oneof
+ {
+ int64 x1 = 2;
+ string y1 = 1;
+ TEmbedded z1 = 3;
+ }
+ }
+
+ optional TDefaultSeparateFields DefaultSeparateFields = 1;
+ optional TNoDefault NoDefault = 2;
+ optional TSerializationProtobuf SerializationProtobuf = 3;
+
+ oneof TopLevelOneof
+ {
+ int64 MemberOfTopLevelOneof = 4;
+ }
+}
+
+message TEmbeddedStruct {
+ optional float float1 = 1;
+ optional string string1 = 2;
+}
+
+message TEmbedded2Message {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ optional uint64 embedded2_num = 10;
+ optional TEmbeddedStruct embedded2_struct = 17;
+ repeated string embedded2_repeated = 42;
+}
+
+message TEmbedded1Message {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ required TEmbedded2Message t2 = 1 [(NYT.flags) = EMBEDDED];
+ oneof variant {
+ string str_variant = 101;
+ uint64 uint_variant = 102;
+ }
+ optional uint64 embedded_num = 10; // make intentional field_num collision!
+ optional string embedded_extra_field = 11;
+}
+
+message TEmbeddingMessage {
+ optional bytes other_columns_field = 15 [(NYT.flags) = OTHER_COLUMNS];
+ required TEmbedded1Message t1 = 2 [(NYT.flags) = EMBEDDED];
+ optional uint64 num = 12;
+ optional string extra_field = 13;
+}
diff --git a/yt/cpp/mapreduce/interface/ut/serialize_ut.cpp b/yt/cpp/mapreduce/interface/ut/serialize_ut.cpp
new file mode 100644
index 00000000000..0acec154d4e
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/ut/serialize_ut.cpp
@@ -0,0 +1,46 @@
+#include <yt/cpp/mapreduce/interface/serialize.h>
+#include <yt/cpp/mapreduce/interface/common.h>
+
+#include <library/cpp/yson/node/node_builder.h>
+
+#include <library/cpp/testing/gtest/gtest.h>
+
+#include <util/generic/serialized_enum.h>
+
+using namespace NYT;
+
+TEST(TSerializationTest, TableSchema)
+{
+ auto schema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("a").Type(EValueType::VT_STRING).SortOrder(SO_ASCENDING))
+ .AddColumn(TColumnSchema().Name("b").Type(EValueType::VT_UINT64))
+ .AddColumn(TColumnSchema().Name("c").Type(EValueType::VT_INT64, true));
+
+ auto schemaNode = schema.ToNode();
+ EXPECT_TRUE(schemaNode.IsList());
+ EXPECT_EQ(schemaNode.Size(), 3u);
+
+
+ EXPECT_EQ(schemaNode[0]["name"], "a");
+ EXPECT_EQ(schemaNode[0]["type"], "string");
+ EXPECT_EQ(schemaNode[0]["required"], false);
+ EXPECT_EQ(schemaNode[0]["sort_order"], "ascending");
+
+ EXPECT_EQ(schemaNode[1]["name"], "b");
+ EXPECT_EQ(schemaNode[1]["type"], "uint64");
+ EXPECT_EQ(schemaNode[1]["required"], false);
+
+ EXPECT_EQ(schemaNode[2]["name"], "c");
+ EXPECT_EQ(schemaNode[2]["type"], "int64");
+ EXPECT_EQ(schemaNode[2]["required"], true);
+}
+
+TEST(TSerializationTest, ValueTypeSerialization)
+{
+ for (const auto value : GetEnumAllValues<EValueType>()) {
+ TNode serialized = NYT::NDetail::ToString(value);
+ EValueType deserialized;
+ Deserialize(deserialized, serialized);
+ EXPECT_EQ(value, deserialized);
+ }
+}
diff --git a/yt/cpp/mapreduce/interface/ut/ya.make b/yt/cpp/mapreduce/interface/ut/ya.make
index 0219e6430ca..9e92931b5de 100644
--- a/yt/cpp/mapreduce/interface/ut/ya.make
+++ b/yt/cpp/mapreduce/interface/ut/ya.make
@@ -1,4 +1,4 @@
-UNITTEST_FOR(yt/cpp/mapreduce/interface)
+GTEST()
SRCS(
common_ut.cpp
@@ -18,8 +18,9 @@ SRCS(
PEERDIR(
contrib/libs/protobuf
- library/cpp/testing/unittest
+ library/cpp/testing/gtest
yt/yt_proto/yt/formats
+ yt/cpp/mapreduce/interface
)
END()