diff options
| author | nadya73 <[email protected]> | 2024-07-02 23:10:50 +0300 | 
|---|---|---|
| committer | nadya73 <[email protected]> | 2024-07-02 23:21:03 +0300 | 
| commit | 5ea9afc5ee7edc24efa5f45b3a15e184872b0854 (patch) | |
| tree | 4ccc339d97575cba8b3ed47b6f0615326bdb5324 /yt/cpp/mapreduce/interface/ut | |
| parent | 96b239778766d32d5158aca805e08199b3c0a743 (diff) | |
[yt/cpp/mapreduce] YT-21595: Use gtest instead of ytest in all mapreduce tests
85671f0cf4f45b4f015fa2cc0d195b81c16c6e8a
Diffstat (limited to 'yt/cpp/mapreduce/interface/ut')
| -rw-r--r-- | yt/cpp/mapreduce/interface/ut/common_ut.cpp | 353 | ||||
| -rw-r--r-- | yt/cpp/mapreduce/interface/ut/common_ut.h | 1 | ||||
| -rw-r--r-- | yt/cpp/mapreduce/interface/ut/config_ut.cpp | 17 | ||||
| -rw-r--r-- | yt/cpp/mapreduce/interface/ut/error_ut.cpp | 81 | ||||
| -rw-r--r-- | yt/cpp/mapreduce/interface/ut/format_ut.cpp | 232 | ||||
| -rw-r--r-- | yt/cpp/mapreduce/interface/ut/job_counters_ut.cpp | 100 | ||||
| -rw-r--r-- | yt/cpp/mapreduce/interface/ut/job_statistics_ut.cpp | 254 | ||||
| -rw-r--r-- | yt/cpp/mapreduce/interface/ut/operation_ut.cpp | 272 | ||||
| -rw-r--r-- | yt/cpp/mapreduce/interface/ut/proto3_ut.proto | 17 | ||||
| -rw-r--r-- | yt/cpp/mapreduce/interface/ut/protobuf_file_options_ut.cpp | 270 | ||||
| -rw-r--r-- | yt/cpp/mapreduce/interface/ut/protobuf_file_options_ut.proto | 142 | ||||
| -rw-r--r-- | yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.cpp | 444 | ||||
| -rw-r--r-- | yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.proto | 402 | ||||
| -rw-r--r-- | yt/cpp/mapreduce/interface/ut/serialize_ut.cpp | 46 | ||||
| -rw-r--r-- | yt/cpp/mapreduce/interface/ut/ya.make | 5 | 
15 files changed, 2634 insertions, 2 deletions
diff --git a/yt/cpp/mapreduce/interface/ut/common_ut.cpp b/yt/cpp/mapreduce/interface/ut/common_ut.cpp new file mode 100644 index 00000000000..85122a97ec6 --- /dev/null +++ b/yt/cpp/mapreduce/interface/ut/common_ut.cpp @@ -0,0 +1,353 @@ +#include "common_ut.h" + +#include <yt/cpp/mapreduce/interface/common.h> +#include <yt/cpp/mapreduce/interface/fluent.h> + +#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h> + +#include <library/cpp/testing/gtest/gtest.h> + +#include <library/cpp/yson/node/node_io.h> +#include <library/cpp/yson/node/node_builder.h> + +#include <util/generic/xrange.h> + +using namespace NYT; + +template <class T> +TString SaveToString(const T& obj) +{ +    TString s; +    TStringOutput out(s); +    ::Save(&out, obj); +    return s; +} + +template <class T> +T LoadFromString(TStringBuf s) +{ +    TMemoryInput in(s); +    T obj; +    ::Load(&in, obj); +    return obj; +} + +template <class T> +T SaveLoad(const T& obj) +{ +    return LoadFromString<T>(SaveToString(obj)); +} + +TEST(TCommonTest, SortColumnsLegacy) +{ +    TSortColumns keys1("a", "b"); +    EXPECT_TRUE((keys1.Parts_ == TSortColumns{"a", "b"})); + +    keys1.Add("c", "d"); +    EXPECT_TRUE((keys1.Parts_ == TSortColumns{"a", "b", "c", "d"})); + +    auto keys2 = TSortColumns(keys1).Add("e", "f"); +    EXPECT_TRUE((keys1.Parts_ == TSortColumns{"a", "b", "c", "d"})); +    EXPECT_TRUE((keys2.Parts_ == TSortColumns{"a", "b", "c", "d", "e", "f"})); + +    auto keys3 = TSortColumns(keys1).Add("e").Add("f").Add("g"); +    EXPECT_TRUE((keys1.Parts_ == TSortColumns{"a", "b", "c", "d"})); +    EXPECT_TRUE((keys3.Parts_ == TSortColumns{"a", "b", "c", "d", "e", "f", "g"})); +} + +TEST(TCommonTest, SortColumn) +{ +    auto ascending = TSortColumn("a"); +    EXPECT_EQ(ascending.Name(), "a"); +    EXPECT_EQ(ascending.SortOrder(), ESortOrder::SO_ASCENDING); +    EXPECT_EQ(ascending, TSortColumn("a", ESortOrder::SO_ASCENDING)); +    EXPECT_NE(ascending, TSortColumn("a", ESortOrder::SO_DESCENDING)); + +    EXPECT_NO_THROW(ascending.EnsureAscending()); +    EXPECT_EQ(static_cast<TString>(ascending), "a"); +    EXPECT_EQ(ascending, "a"); + +    auto another = ascending; +    EXPECT_NO_THROW(another = "another"); +    EXPECT_EQ(another.Name(), "another"); +    EXPECT_EQ(another.SortOrder(), ESortOrder::SO_ASCENDING); +    EXPECT_EQ(another, TSortColumn("another", ESortOrder::SO_ASCENDING)); +    EXPECT_NE(another, TSortColumn("another", ESortOrder::SO_DESCENDING)); + +    auto ascendingNode = BuildYsonNodeFluently().Value(ascending); +    EXPECT_EQ(ascendingNode, TNode("a")); + +    EXPECT_EQ(SaveLoad(ascending), ascending); +    EXPECT_NE(SaveToString(ascending), SaveToString(TString("a"))); + +    auto descending = TSortColumn("a", ESortOrder::SO_DESCENDING); +    EXPECT_EQ(descending.Name(), "a"); +    EXPECT_EQ(descending.SortOrder(), ESortOrder::SO_DESCENDING); +    EXPECT_EQ(descending, TSortColumn("a", ESortOrder::SO_DESCENDING)); +    EXPECT_NE(descending, TSortColumn("a", ESortOrder::SO_ASCENDING)); + +    EXPECT_THROW(descending.EnsureAscending(), yexception); +    EXPECT_THROW(Y_UNUSED(static_cast<TString>(descending)), yexception); +    EXPECT_THROW(Y_UNUSED(descending == "a"), yexception); +    EXPECT_THROW(descending = "a", yexception); + +    auto descendingNode = BuildYsonNodeFluently().Value(descending); +    EXPECT_EQ(descendingNode, TNode()("name", "a")("sort_order", "descending")); + +    EXPECT_EQ(SaveLoad(descending), descending); +    EXPECT_NE(SaveToString(descending), SaveToString("a")); + +    EXPECT_EQ(ToString(TSortColumn("blah")), "blah"); +    EXPECT_EQ(ToString(TSortColumn("blah", ESortOrder::SO_DESCENDING)), "{\"name\"=\"blah\";\"sort_order\"=\"descending\"}"); +} + +TEST(TCommonTest, SortColumns) +{ +    TSortColumns ascending("a", "b"); +    EXPECT_TRUE(ascending.Parts_ == (TSortColumns{"a", "b"})); +    EXPECT_NO_THROW(ascending.EnsureAscending()); +    EXPECT_EQ(static_cast<TColumnNames>(ascending).Parts_, (TVector<TString>{"a", "b"})); +    EXPECT_EQ(ascending.GetNames(), (TVector<TString>{"a", "b"})); + +    auto mixed = ascending; +    mixed.Add(TSortColumn("c", ESortOrder::SO_DESCENDING), "d"); +    EXPECT_TRUE((mixed.Parts_ != TVector<TSortColumn>{"a", "b", "c", "d"})); +    EXPECT_TRUE((mixed.Parts_ == TVector<TSortColumn>{"a", "b", TSortColumn("c", ESortOrder::SO_DESCENDING), "d"})); +    EXPECT_EQ(mixed.GetNames(), (TVector<TString>{"a", "b", "c", "d"})); +    EXPECT_THROW(mixed.EnsureAscending(), yexception); +    EXPECT_THROW(Y_UNUSED(static_cast<TColumnNames>(mixed)), yexception); +} + +TEST(TCommonTest, KeyBound) +{ +    auto keyBound = TKeyBound(ERelation::Greater, TKey(7, "a", TNode()("x", "y"))); +    EXPECT_EQ(keyBound.Relation(), ERelation::Greater); +    EXPECT_EQ(keyBound.Key(), TKey(7, "a", TNode()("x", "y"))); + +    auto keyBound1 = TKeyBound().Relation(ERelation::Greater).Key(TKey(7, "a", TNode()("x", "y"))); +    auto expectedNode = TNode() +        .Add(">") +        .Add(TNode().Add(7).Add("a").Add(TNode()("x", "y"))); + +    EXPECT_EQ(expectedNode, BuildYsonNodeFluently().Value(keyBound)); +    EXPECT_EQ(expectedNode, BuildYsonNodeFluently().Value(keyBound1)); + +    keyBound.Relation(ERelation::LessOrEqual); +    keyBound.Key(TKey("A", 7)); +    EXPECT_EQ(keyBound.Relation(), ERelation::LessOrEqual); +    EXPECT_EQ(keyBound.Key(), TKey("A", 7)); + +    EXPECT_EQ( +        BuildYsonNodeFluently().Value(keyBound), +        TNode() +            .Add("<=") +            .Add(TNode().Add("A").Add(7))); +} + +TEST(TCommonTest, TTableSchema) +{ +    TTableSchema schema; +    schema +        .AddColumn(TColumnSchema().Name("a").Type(EValueType::VT_STRING).SortOrder(SO_ASCENDING)) +        .AddColumn(TColumnSchema().Name("b").Type(EValueType::VT_UINT64)) +        .AddColumn(TColumnSchema().Name("c").Type(EValueType::VT_INT64)); +    auto checkSortBy = [](TTableSchema schema, const TVector<TString>& columns) { +        auto initialSchema = schema; +        schema.SortBy(columns); +        for (auto i: xrange(columns.size())) { +            EXPECT_EQ(schema.Columns()[i].Name(), columns[i]); +            EXPECT_EQ(schema.Columns()[i].SortOrder(), ESortOrder::SO_ASCENDING); +        } +        for (auto i: xrange(columns.size(), (size_t)initialSchema.Columns().size())) { +            EXPECT_EQ(schema.Columns()[i].SortOrder(), Nothing()); +        } +        EXPECT_EQ(initialSchema.Columns().size(), schema.Columns().size()); +        return schema; +    }; +    auto newSchema = checkSortBy(schema, {"b"}); +    EXPECT_EQ(newSchema.Columns()[1].Name(), TString("a")); +    EXPECT_EQ(newSchema.Columns()[2].Name(), TString("c")); +    checkSortBy(schema, {"b", "c"}); +    checkSortBy(schema, {"c", "a"}); +    EXPECT_THROW(checkSortBy(schema, {"b", "b"}), yexception); +    EXPECT_THROW(checkSortBy(schema, {"a", "junk"}), yexception); +} + +TEST(TCommonTest, TTableSchema_Decimal) +{ +    NYT::TTableSchema tableSchema; + +    tableSchema.AddColumn("a", NTi::Decimal(35, 18)); +    tableSchema.AddColumn("b", NTi::Optional(NTi::Decimal(35, 18))); +    tableSchema.AddColumn("c", NTi::List(NTi::Decimal(35, 18))); + +    auto tableSchemaNode = tableSchema.ToNode(); +    const auto& tableSchemaNodeList = tableSchemaNode.AsList(); + +    // There was a bug in the serialization of decimal type: https://github.com/ytsaurus/ytsaurus/issues/173 +    { +        const auto& currentType = tableSchemaNodeList[0]; +        EXPECT_EQ(currentType.ChildAsString("type"), "string"); +        EXPECT_TRUE(currentType.ChildAsBool("required")); +        EXPECT_TRUE(currentType.HasKey("type_v3")); +        EXPECT_EQ(currentType.At("type_v3").ChildAsString("type_name"), "decimal"); +    } +    { +        const auto& currentType = tableSchemaNodeList[1]; +        EXPECT_EQ(currentType.ChildAsString("type"), "string"); +        EXPECT_TRUE(!currentType.ChildAsBool("required")); +        EXPECT_TRUE(currentType.HasKey("type_v3")); +        EXPECT_EQ(currentType.At("type_v3").ChildAsString("type_name"), "optional"); +        EXPECT_EQ(currentType.At("type_v3").At("item").ChildAsString("type_name"), "decimal"); +    } +    { +        const auto& currentType = tableSchemaNodeList[2]; +        EXPECT_EQ(currentType.ChildAsString("type"), "any"); +        EXPECT_TRUE(currentType.ChildAsBool("required")); +        EXPECT_TRUE(currentType.HasKey("type_v3")); +        EXPECT_EQ(currentType.At("type_v3").ChildAsString("type_name"), "list"); +        EXPECT_EQ(currentType.At("type_v3").At("item").ChildAsString("type_name"), "decimal"); +    } + +    EXPECT_EQ(tableSchema, TTableSchema::FromNode(tableSchemaNode)); +} + +TEST(TCommonTest, TColumnSchema_TypeV3) +{ +    { +        auto column = TColumnSchema().Type(NTi::Interval()); +        EXPECT_EQ(column.Required(), true); +        EXPECT_EQ(column.Type(), VT_INTERVAL); +    } +    { +        auto column = TColumnSchema().Type(NTi::Optional(NTi::Date())); +        EXPECT_EQ(column.Required(), false); +        EXPECT_EQ(column.Type(), VT_DATE); +    } +    { +        auto column = TColumnSchema().Type(NTi::Interval64()); +        EXPECT_EQ(column.Required(), true); +        EXPECT_EQ(column.Type(), VT_INTERVAL64); +    } +    { +        auto column = TColumnSchema().Type(NTi::Optional(NTi::Date32())); +        EXPECT_EQ(column.Required(), false); +        EXPECT_EQ(column.Type(), VT_DATE32); +    } +    { +        auto column = TColumnSchema().Type(NTi::Null()); +        EXPECT_EQ(column.Required(), false); +        EXPECT_EQ(column.Type(), VT_NULL); +    } +    { +        auto column = TColumnSchema().Type(NTi::Optional(NTi::Null())); +        EXPECT_EQ(column.Required(), false); +        EXPECT_EQ(column.Type(), VT_ANY); +    } +    { +        auto column = TColumnSchema().Type(NTi::Decimal(35, 18)); +        EXPECT_EQ(column.Required(), true); +        EXPECT_EQ(column.Type(), VT_STRING); +    } +} + +TEST(TCommonTest, ToTypeV3) +{ +    EXPECT_EQ(*ToTypeV3(VT_INT32, true), *NTi::Int32()); +    EXPECT_EQ(*ToTypeV3(VT_UTF8, false), *NTi::Optional(NTi::Utf8())); +} + +TEST(TCommonTest, DeserializeColumn) +{ +    auto deserialize = [] (TStringBuf yson) { +        auto node = NodeFromYsonString(yson); +        TColumnSchema column; +        Deserialize(column, node); +        return column; +    }; + +    auto column = deserialize("{name=foo; type=int64; required=%false}"); +    EXPECT_EQ(column.Name(), "foo"); +    EXPECT_EQ(*column.TypeV3(), *NTi::Optional(NTi::Int64())); + +    column = deserialize("{name=bar; type=utf8; required=%true; type_v3=utf8}"); +    EXPECT_EQ(column.Name(), "bar"); +    EXPECT_EQ(*column.TypeV3(), *NTi::Utf8()); +} + +TEST(TCommonTest, ColumnSchemaEquality) +{ +    auto base = TColumnSchema() +        .Name("col") +        .TypeV3(NTi::Optional(NTi::List(NTi::String()))) +        .SortOrder(ESortOrder::SO_ASCENDING) +        .Lock("lock") +        .Expression("x + 12") +        .Aggregate("sum") +        .Group("group"); + +    auto other = base; +    ASSERT_SERIALIZABLES_EQ(other, base); +    other.Name("other"); +    ASSERT_SERIALIZABLES_NE(other, base); + +    other = base; +    other.TypeV3(NTi::List(NTi::String())); +    ASSERT_SERIALIZABLES_NE(other, base); + +    other = base; +    other.ResetSortOrder(); +    ASSERT_SERIALIZABLES_NE(other, base); + +    other = base; +    other.Lock("lock1"); +    ASSERT_SERIALIZABLES_NE(other, base); + +    other = base; +    other.Expression("x + 13"); +    ASSERT_SERIALIZABLES_NE(other, base); + +    other = base; +    other.ResetAggregate(); +    ASSERT_SERIALIZABLES_NE(other, base); + +    other = base; +    other.Group("group1"); +    ASSERT_SERIALIZABLES_NE(other, base); +} + +TEST(TCommonTest, TableSchemaEquality) +{ +    auto col1 = TColumnSchema() +        .Name("col1") +        .TypeV3(NTi::Optional(NTi::List(NTi::String()))) +        .SortOrder(ESortOrder::SO_ASCENDING); + +    auto col2 = TColumnSchema() +        .Name("col2") +        .TypeV3(NTi::Uint32()); + +    auto schema = TTableSchema() +        .AddColumn(col1) +        .AddColumn(col2) +        .Strict(true) +        .UniqueKeys(true); + +    auto other = schema; +    ASSERT_SERIALIZABLES_EQ(other, schema); + +    other.Strict(false); +    ASSERT_SERIALIZABLES_NE(other, schema); + +    other = schema; +    other.MutableColumns()[0].TypeV3(NTi::List(NTi::String())); +    ASSERT_SERIALIZABLES_NE(other, schema); + +    other = schema; +    other.MutableColumns().push_back(col1); +    ASSERT_SERIALIZABLES_NE(other, schema); + +    other = schema; +    other.UniqueKeys(false); +    ASSERT_SERIALIZABLES_NE(other, schema); +} diff --git a/yt/cpp/mapreduce/interface/ut/common_ut.h b/yt/cpp/mapreduce/interface/ut/common_ut.h new file mode 100644 index 00000000000..6f70f09beec --- /dev/null +++ b/yt/cpp/mapreduce/interface/ut/common_ut.h @@ -0,0 +1 @@ +#pragma once diff --git a/yt/cpp/mapreduce/interface/ut/config_ut.cpp b/yt/cpp/mapreduce/interface/ut/config_ut.cpp new file mode 100644 index 00000000000..780a57f3f25 --- /dev/null +++ b/yt/cpp/mapreduce/interface/ut/config_ut.cpp @@ -0,0 +1,17 @@ +#include <library/cpp/testing/gtest/gtest.h> + +#include <yt/cpp/mapreduce/interface/config.h> + +using namespace NYT; + +TEST(TConfigTest, Reset) { +    // Very limited test, checks only one config field. + +    auto origConfig = *TConfig::Get(); +    TConfig::Get()->Reset(); +    EXPECT_EQ(origConfig.Hosts, TConfig::Get()->Hosts); + +    TConfig::Get()->Hosts = "hosts/fb867"; +    TConfig::Get()->Reset(); +    EXPECT_EQ(origConfig.Hosts, TConfig::Get()->Hosts); +} diff --git a/yt/cpp/mapreduce/interface/ut/error_ut.cpp b/yt/cpp/mapreduce/interface/ut/error_ut.cpp new file mode 100644 index 00000000000..4911f29d97a --- /dev/null +++ b/yt/cpp/mapreduce/interface/ut/error_ut.cpp @@ -0,0 +1,81 @@ +#include <library/cpp/testing/gtest/gtest.h> + +#include <library/cpp/json/json_reader.h> + +#include <yt/cpp/mapreduce/interface/errors.h> + +#include <yt/cpp/mapreduce/common/helpers.h> + +#include <util/generic/set.h> + +using namespace NYT; + +template<> +void Out<NYT::TNode>(IOutputStream& s, const NYT::TNode& node) +{ +    s << "TNode:" << NodeToYsonString(node); +} + +TEST(TErrorTest, ParseJson) +{ +    // Scary real world error! Бу! +    const char* jsonText = +        R"""({)""" +            R"""("code":500,)""" +            R"""("message":"Error resolving path //home/user/link",)""" +            R"""("attributes":{)""" +                R"""("fid":18446484571700269066,)""" +                R"""("method":"Create",)""" +                R"""("tid":17558639495721339338,)""" +                R"""("datetime":"2017-04-07T13:38:56.474819Z",)""" +                R"""("pid":414529,)""" +                R"""("host":"build01-01g.yt.yandex.net"},)""" +            R"""("inner_errors":[{)""" +                R"""("code":1,)""" +                R"""("message":"Node //tt cannot have children",)""" +                R"""("attributes":{)""" +                    R"""("fid":18446484571700269066,)""" +                    R"""("tid":17558639495721339338,)""" +                    R"""("datetime":"2017-04-07T13:38:56.474725Z",)""" +                    R"""("pid":414529,)""" +                    R"""("host":"build01-01g.yt.yandex.net"},)""" +                R"""("inner_errors":[]}]})"""; + +    NJson::TJsonValue jsonValue; +    ReadJsonFastTree(jsonText, &jsonValue, /*throwOnError=*/ true); + +    TYtError error(jsonValue); +    EXPECT_EQ(error.GetCode(), 500); +    EXPECT_EQ(error.GetMessage(), R"""(Error resolving path //home/user/link)"""); +    EXPECT_EQ(error.InnerErrors().size(), 1u); +    EXPECT_EQ(error.InnerErrors()[0].GetCode(), 1); + +    EXPECT_EQ(error.HasAttributes(), true); +    EXPECT_EQ(error.GetAttributes().at("method"), TNode("Create")); + +    EXPECT_EQ(error.GetAllErrorCodes(), TSet<int>({500, 1})); +} + +TEST(TErrorTest, GetYsonText) { +    const char* jsonText = +        R"""({)""" +            R"""("code":500,)""" +            R"""("message":"outer error",)""" +            R"""("attributes":{)""" +                R"""("method":"Create",)""" +                R"""("pid":414529},)""" +            R"""("inner_errors":[{)""" +                R"""("code":1,)""" +                R"""("message":"inner error",)""" +                R"""("attributes":{},)""" +                R"""("inner_errors":[])""" +            R"""(}]})"""; +    TYtError error; +    error.ParseFrom(jsonText); +    TString ysonText = error.GetYsonText(); +    TYtError error2(NodeFromYsonString(ysonText)); +    EXPECT_EQ( +        ysonText, +        R"""({"code"=500;"message"="outer error";"attributes"={"method"="Create";"pid"=414529};"inner_errors"=[{"code"=1;"message"="inner error"}]})"""); +    EXPECT_EQ(error2.GetYsonText(), ysonText); +} diff --git a/yt/cpp/mapreduce/interface/ut/format_ut.cpp b/yt/cpp/mapreduce/interface/ut/format_ut.cpp new file mode 100644 index 00000000000..83b860ab94d --- /dev/null +++ b/yt/cpp/mapreduce/interface/ut/format_ut.cpp @@ -0,0 +1,232 @@ +#include "common_ut.h" + +#include <yt/cpp/mapreduce/interface/common.h> +#include <yt/cpp/mapreduce/interface/errors.h> +#include <yt/cpp/mapreduce/interface/format.h> + +#include <yt/cpp/mapreduce/interface/ut/proto3_ut.pb.h> +#include <yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.pb.h> + +#include <library/cpp/testing/gtest/gtest.h> + +using namespace NYT; + +static TNode GetColumns(const TFormat& format, int tableIndex = 0) +{ +    return format.Config.GetAttributes()["tables"][tableIndex]["columns"]; +} + +TEST(TProtobufFormatTest, TIntegral) +{ +    const auto format = TFormat::Protobuf<NUnitTesting::TIntegral>(); +    auto columns = GetColumns(format); + +    struct TColumn +    { +        TString Name; +        TString ProtoType; +        int FieldNumber; +    }; + +    auto expected = TVector<TColumn>{ +        {"DoubleField", "double", 1}, +        {"FloatField", "float", 2}, +        {"Int32Field", "int32", 3}, +        {"Int64Field", "int64", 4}, +        {"Uint32Field", "uint32", 5}, +        {"Uint64Field", "uint64", 6}, +        {"Sint32Field", "sint32", 7}, +        {"Sint64Field", "sint64", 8}, +        {"Fixed32Field", "fixed32", 9}, +        {"Fixed64Field", "fixed64", 10}, +        {"Sfixed32Field", "sfixed32", 11}, +        {"Sfixed64Field", "sfixed64", 12}, +        {"BoolField", "bool", 13}, +        {"EnumField", "enum_string", 14}, +    }; + +    EXPECT_EQ(columns.Size(), expected.size()); +    for (int i = 0; i < static_cast<int>(columns.Size()); ++i) { +        EXPECT_EQ(columns[i]["name"], expected[i].Name); +        EXPECT_EQ(columns[i]["proto_type"], expected[i].ProtoType); +        EXPECT_EQ(columns[i]["field_number"], expected[i].FieldNumber); +    } +} + +TEST(TProtobufFormatTest, TRowFieldSerializationOption) +{ +    const auto format = TFormat::Protobuf<NUnitTesting::TRowFieldSerializationOption>(); +    auto columns = GetColumns(format); + +    EXPECT_EQ(columns[0]["name"], "UrlRow_1"); +    EXPECT_EQ(columns[0]["proto_type"], "structured_message"); +    EXPECT_EQ(columns[0]["field_number"], 1); +    const auto& fields = columns[0]["fields"]; +    EXPECT_EQ(fields[0]["name"], "Host"); +    EXPECT_EQ(fields[0]["proto_type"], "string"); +    EXPECT_EQ(fields[0]["field_number"], 1); + +    EXPECT_EQ(fields[1]["name"], "Path"); +    EXPECT_EQ(fields[1]["proto_type"], "string"); +    EXPECT_EQ(fields[1]["field_number"], 2); + +    EXPECT_EQ(fields[2]["name"], "HttpCode"); +    EXPECT_EQ(fields[2]["proto_type"], "sint32"); +    EXPECT_EQ(fields[2]["field_number"], 3); + +    EXPECT_EQ(columns[1]["name"], "UrlRow_2"); +    EXPECT_EQ(columns[1]["proto_type"], "message"); +    EXPECT_EQ(columns[1]["field_number"], 2); +} + + +TEST(TProtobufFormatTest, TPacked) +{ +    const auto format = TFormat::Protobuf<NUnitTesting::TPacked>(); +    auto column = GetColumns(format)[0]; + +    EXPECT_EQ(column["name"], "PackedListInt64"); +    EXPECT_EQ(column["proto_type"], "int64"); +    EXPECT_EQ(column["field_number"], 1); +    EXPECT_EQ(column["packed"], true); +    EXPECT_EQ(column["repeated"], true); +} + +TEST(TProtobufFormatTest, TCyclic) +{ +    EXPECT_THROW(TFormat::Protobuf<NUnitTesting::TCyclic>(), TApiUsageError); +    EXPECT_THROW(TFormat::Protobuf<NUnitTesting::TCyclic::TA>(), TApiUsageError); +    EXPECT_THROW(TFormat::Protobuf<NUnitTesting::TCyclic::TB>(), TApiUsageError); +    EXPECT_THROW(TFormat::Protobuf<NUnitTesting::TCyclic::TC>(), TApiUsageError); +    EXPECT_THROW(TFormat::Protobuf<NUnitTesting::TCyclic::TD>(), TApiUsageError); + +    const auto format = TFormat::Protobuf<NUnitTesting::TCyclic::TE>(); +    auto column = GetColumns(format)[0]; +    EXPECT_EQ(column["name"], "d"); +    EXPECT_EQ(column["proto_type"], "message"); +    EXPECT_EQ(column["field_number"], 1); +} + +TEST(TProtobufFormatTest, Map) +{ +    const auto format = TFormat::Protobuf<NUnitTesting::TWithMap>(); +    auto columns = GetColumns(format); + +    EXPECT_EQ(columns.Size(), 5u); +    { +        const auto& column = columns[0]; +        EXPECT_EQ(column["name"], "MapDefault"); +        EXPECT_EQ(column["proto_type"], "structured_message"); +        EXPECT_EQ(column["fields"].Size(), 2u); +        EXPECT_EQ(column["fields"][0]["proto_type"], "int64"); +        EXPECT_EQ(column["fields"][1]["proto_type"], "message"); +    } +    { +        const auto& column = columns[1]; +        EXPECT_EQ(column["name"], "MapListOfStructsLegacy"); +        EXPECT_EQ(column["proto_type"], "structured_message"); +        EXPECT_EQ(column["fields"].Size(), 2u); +        EXPECT_EQ(column["fields"][0]["proto_type"], "int64"); +        EXPECT_EQ(column["fields"][1]["proto_type"], "message"); +    } +    { +        const auto& column = columns[2]; +        EXPECT_EQ(column["name"], "MapListOfStructs"); +        EXPECT_EQ(column["proto_type"], "structured_message"); +        EXPECT_EQ(column["fields"].Size(), 2u); +        EXPECT_EQ(column["fields"][0]["proto_type"], "int64"); +        EXPECT_EQ(column["fields"][1]["proto_type"], "structured_message"); +    } +    { +        const auto& column = columns[3]; +        EXPECT_EQ(column["name"], "MapOptionalDict"); +        EXPECT_EQ(column["proto_type"], "structured_message"); +        EXPECT_EQ(column["fields"].Size(), 2u); +        EXPECT_EQ(column["fields"][0]["proto_type"], "int64"); +        EXPECT_EQ(column["fields"][1]["proto_type"], "structured_message"); +    } +    { +        const auto& column = columns[4]; +        EXPECT_EQ(column["name"], "MapDict"); +        EXPECT_EQ(column["proto_type"], "structured_message"); +        EXPECT_EQ(column["fields"].Size(), 2u); +        EXPECT_EQ(column["fields"][0]["proto_type"], "int64"); +        EXPECT_EQ(column["fields"][1]["proto_type"], "structured_message"); +    } +} + + +TEST(TProtobufFormatTest, Oneof) +{ +    const auto format = TFormat::Protobuf<NUnitTesting::TWithOneof>(); +    auto columns = GetColumns(format); + +    EXPECT_EQ(columns.Size(), 4u); +    auto check = [] (const TNode& column, TStringBuf name, TStringBuf oneof2Name) { +        EXPECT_EQ(column["name"], name); +        EXPECT_EQ(column["proto_type"], "structured_message"); +        EXPECT_EQ(column["fields"].Size(), 5u); +        EXPECT_EQ(column["fields"][0]["name"], "field"); + +        const auto& oneof2 = column["fields"][1]; +        EXPECT_EQ(oneof2["name"], oneof2Name); +        EXPECT_EQ(oneof2["proto_type"], "oneof"); +        EXPECT_EQ(oneof2["fields"][0]["name"], "y2"); +        EXPECT_EQ(oneof2["fields"][1]["name"], "z2"); +        EXPECT_EQ(oneof2["fields"][1]["proto_type"], "structured_message"); +        const auto& embeddedOneof = oneof2["fields"][1]["fields"][0]; +        EXPECT_EQ(embeddedOneof["name"], "Oneof"); +        EXPECT_EQ(embeddedOneof["fields"][0]["name"], "x"); +        EXPECT_EQ(embeddedOneof["fields"][1]["name"], "y"); +        EXPECT_EQ(oneof2["fields"][2]["name"], "x2"); + +        EXPECT_EQ(column["fields"][2]["name"], "x1"); +        EXPECT_EQ(column["fields"][3]["name"], "y1"); +        EXPECT_EQ(column["fields"][4]["name"], "z1"); +    }; + +    check(columns[0], "DefaultSeparateFields", "variant_field_name"); +    check(columns[1], "NoDefault", "Oneof2"); + +    { +        const auto& column = columns[2]; +        EXPECT_EQ(column["name"], "SerializationProtobuf"); +        EXPECT_EQ(column["proto_type"], "structured_message"); +        EXPECT_EQ(column["fields"].Size(), 3u); +        EXPECT_EQ(column["fields"][0]["name"], "x1"); +        EXPECT_EQ(column["fields"][1]["name"], "y1"); +        EXPECT_EQ(column["fields"][2]["name"], "z1"); +    } +    { +        const auto& column = columns[3]; +        EXPECT_EQ(column["name"], "TopLevelOneof"); +        EXPECT_EQ(column["proto_type"], "oneof"); +        EXPECT_EQ(column["fields"].Size(), 1u); +        EXPECT_EQ(column["fields"][0]["name"], "MemberOfTopLevelOneof"); +    } +} + +TEST(TProto3Test, TWithOptional) +{ +    const auto format = TFormat::Protobuf<NTestingProto3::TWithOptional>(); +    auto columns = GetColumns(format); + +    EXPECT_EQ(columns[0]["name"], "x"); +    EXPECT_EQ(columns[0]["proto_type"], "int64"); +    EXPECT_EQ(columns[0]["field_number"], 1); +} + +TEST(TProto3Test, TWithOptionalMessage) +{ +    const auto format = TFormat::Protobuf<NTestingProto3::TWithOptionalMessage>(); +    auto columns = GetColumns(format); + +    EXPECT_EQ(columns[0]["name"], "x"); +    EXPECT_EQ(columns[0]["proto_type"], "structured_message"); +    EXPECT_EQ(columns[0]["field_number"], 1); + +    EXPECT_EQ(columns[0]["fields"].Size(), 1u); +    EXPECT_EQ(columns[0]["fields"][0]["name"], "x"); +    EXPECT_EQ(columns[0]["fields"][0]["proto_type"], "int64"); +    EXPECT_EQ(columns[0]["fields"][0]["field_number"], 1); +} diff --git a/yt/cpp/mapreduce/interface/ut/job_counters_ut.cpp b/yt/cpp/mapreduce/interface/ut/job_counters_ut.cpp new file mode 100644 index 00000000000..9972637affe --- /dev/null +++ b/yt/cpp/mapreduce/interface/ut/job_counters_ut.cpp @@ -0,0 +1,100 @@ +#include <yt/cpp/mapreduce/interface/job_counters.h> +#include <yt/cpp/mapreduce/interface/operation.h> + +#include <library/cpp/yson/node/node_io.h> + +#include <library/cpp/testing/gtest/gtest.h> + +using namespace NYT; + +TEST(TJobCountersTest, Full) +{ +    const TString input = R"""( +        { +            "completed" = { +                "total" = 6; +                "non-interrupted" = 1; +                "interrupted" = { +                    "whatever_interrupted" = 2; +                    "whatever_else_interrupted" = 3; +                }; +            }; +            "aborted" = { +                "non_scheduled" = { +                    "whatever_non_scheduled" = 4; +                    "whatever_else_non_scheduled" = 5; +                }; +                "scheduled" = { +                    "whatever_scheduled" = 6; +                    "whatever_else_scheduled" = 7; +                }; +                "total" = 22; +            }; +            "lost" = 8; +            "invalidated" = 9; +            "failed" = 10; +            "running" = 11; +            "suspended" = 12; +            "pending" = 13; +            "blocked" = 14; +            "total" = 105; +        })"""; + +    TJobCounters counters(NodeFromYsonString(input)); + +    EXPECT_EQ(counters.GetTotal(), 105u); + +    EXPECT_EQ(counters.GetCompleted().GetTotal(), 6u); +    EXPECT_EQ(counters.GetCompletedNonInterrupted().GetTotal(), 1u); +    EXPECT_EQ(counters.GetCompletedInterrupted().GetTotal(), 5u); +    EXPECT_EQ(counters.GetAborted().GetTotal(), 22u); +    EXPECT_EQ(counters.GetAbortedNonScheduled().GetTotal(), 9u); +    EXPECT_EQ(counters.GetAbortedScheduled().GetTotal(), 13u); +    EXPECT_EQ(counters.GetLost().GetTotal(), 8u); +    EXPECT_EQ(counters.GetInvalidated().GetTotal(), 9u); +    EXPECT_EQ(counters.GetFailed().GetTotal(), 10u); +    EXPECT_EQ(counters.GetRunning().GetTotal(), 11u); +    EXPECT_EQ(counters.GetSuspended().GetTotal(), 12u); +    EXPECT_EQ(counters.GetPending().GetTotal(), 13u); +    EXPECT_EQ(counters.GetBlocked().GetTotal(), 14u); + +    EXPECT_EQ(counters.GetCompletedInterrupted().GetValue("whatever_interrupted"), 2u); +    EXPECT_EQ(counters.GetCompletedInterrupted().GetValue("whatever_else_interrupted"), 3u); +    EXPECT_EQ(counters.GetAbortedNonScheduled().GetValue("whatever_non_scheduled"), 4u); +    EXPECT_EQ(counters.GetAbortedNonScheduled().GetValue("whatever_else_non_scheduled"), 5u); +    EXPECT_EQ(counters.GetAbortedScheduled().GetValue("whatever_scheduled"), 6u); +    EXPECT_EQ(counters.GetAbortedScheduled().GetValue("whatever_else_scheduled"), 7u); + +    EXPECT_THROW(counters.GetCompletedInterrupted().GetValue("Nothingness"), yexception); +} + +TEST(TJobCountersTest, Empty) +{ +    const TString input = "{}"; + +    TJobCounters counters(NodeFromYsonString(input)); + +    EXPECT_EQ(counters.GetTotal(), 0u); + +    EXPECT_EQ(counters.GetCompleted().GetTotal(), 0u); +    EXPECT_EQ(counters.GetCompletedNonInterrupted().GetTotal(), 0u); +    EXPECT_EQ(counters.GetCompletedInterrupted().GetTotal(), 0u); +    EXPECT_EQ(counters.GetAborted().GetTotal(), 0u); +    EXPECT_EQ(counters.GetAbortedNonScheduled().GetTotal(), 0u); +    EXPECT_EQ(counters.GetAbortedScheduled().GetTotal(), 0u); +    EXPECT_EQ(counters.GetLost().GetTotal(), 0u); +    EXPECT_EQ(counters.GetInvalidated().GetTotal(), 0u); +    EXPECT_EQ(counters.GetFailed().GetTotal(), 0u); +    EXPECT_EQ(counters.GetRunning().GetTotal(), 0u); +    EXPECT_EQ(counters.GetSuspended().GetTotal(), 0u); +    EXPECT_EQ(counters.GetPending().GetTotal(), 0u); +    EXPECT_EQ(counters.GetBlocked().GetTotal(), 0u); +} + +TEST(TJobCountersTest, Broken) +{ +    EXPECT_THROW_MESSAGE_HAS_SUBSTR((TJobCounters(TNode())), yexception, "TJobCounters"); +    EXPECT_THROW_MESSAGE_HAS_SUBSTR((TJobCounters(TNode(1))), yexception, "TJobCounters"); +    EXPECT_THROW_MESSAGE_HAS_SUBSTR((TJobCounters(TNode(1.0))), yexception, "TJobCounters"); +    EXPECT_THROW_MESSAGE_HAS_SUBSTR((TJobCounters(TNode("Whatever"))), yexception, "TJobCounters"); +} diff --git a/yt/cpp/mapreduce/interface/ut/job_statistics_ut.cpp b/yt/cpp/mapreduce/interface/ut/job_statistics_ut.cpp new file mode 100644 index 00000000000..90d40623c17 --- /dev/null +++ b/yt/cpp/mapreduce/interface/ut/job_statistics_ut.cpp @@ -0,0 +1,254 @@ +#include <yt/cpp/mapreduce/interface/job_statistics.h> +#include <yt/cpp/mapreduce/interface/operation.h> + +#include <library/cpp/yson/node/node_io.h> + +#include <library/cpp/testing/gtest/gtest.h> + +using namespace NYT; + +TEST(TJobStatisticsTest, Simple) +{ +    const TString input = R"""( +        { +            "data" = { +                "output" = { +                    "0" = { +                        "uncompressed_data_size" = { +                            "$" = { +                                "completed" = { +                                    "simple_sort" = { +                                        "max" = 130; +                                        "count" = 1; +                                        "min" = 130; +                                        "sum" = 130; +                                    }; +                                    "map" = { +                                        "max" = 42; +                                        "count" = 1; +                                        "min" = 42; +                                        "sum" = 42; +                                    }; +                                }; +                                "aborted" = { +                                    "simple_sort" = { +                                        "max" = 24; +                                        "count" = 1; +                                        "min" = 24; +                                        "sum" = 24; +                                    }; +                                }; +                            }; +                        }; +                    }; +                }; +            }; +        })"""; + +    TJobStatistics stat(NodeFromYsonString(input)); + +    EXPECT_TRUE(stat.HasStatistics("data/output/0/uncompressed_data_size")); +    EXPECT_TRUE(!stat.HasStatistics("nonexistent-statistics")); +    EXPECT_THROW_MESSAGE_HAS_SUBSTR(stat.GetStatistics("BLAH-BLAH"), yexception, "Statistics"); + +    EXPECT_EQ(stat.GetStatisticsNames(), TVector<TString>{"data/output/0/uncompressed_data_size"}); + +    EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Max(), 130); +    EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Count(), 2); +    EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Min(), 42); +    EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Sum(), 172); +    EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Avg(), 172 / 2); + +    EXPECT_EQ(stat.JobState({EJobState::Aborted}).GetStatistics("data/output/0/uncompressed_data_size").Sum(), 24); +    EXPECT_EQ(stat.JobType({EJobType::Map}).JobState({EJobState::Aborted}).GetStatistics("data/output/0/uncompressed_data_size").Sum(), TMaybe<i64>()); +} + +TEST(TJobStatisticsTest, OtherTypes) +{ +    const TString input = R"""( +    { +        "time" = { +            "exec" = { +                "$" = { +                    "completed" = { +                        "map" = { +                            "max" = 2482468; +                            "count" = 38; +                            "min" = 578976; +                            "sum" = 47987270; +                        }; +                    }; +                }; +            }; +        }; +    })"""; + +    TJobStatistics stat(NodeFromYsonString(input)); + +    EXPECT_EQ(stat.GetStatisticsAs<TDuration>("time/exec").Max(), TDuration::MilliSeconds(2482468)); +} + +TEST(TJobStatisticsTest, Custom) +{ +    const TString input = R"""( +        { +            "custom" = { +                "some" = { +                    "path" = { +                        "$" = { +                            "completed" = { +                                "map" = { +                                    "max" = -1; +                                    "count" = 1; +                                    "min" = -1; +                                    "sum" = -1; +                                }; +                            }; +                        }; +                    }; +                }; +                "another" = { +                    "path" = { +                        "$" = { +                            "completed" = { +                                "map" = { +                                    "max" = 1001; +                                    "count" = 2; +                                    "min" = 1001; +                                    "sum" = 2002; +                                }; +                            }; +                        }; +                    }; +                }; +            }; +        })"""; + +    TJobStatistics stat(NodeFromYsonString(input)); + +    EXPECT_TRUE(stat.HasCustomStatistics("some/path")); +    EXPECT_TRUE(!stat.HasCustomStatistics("nonexistent-statistics")); +    EXPECT_THROW_MESSAGE_HAS_SUBSTR(stat.GetCustomStatistics("BLAH-BLAH"), yexception, "Statistics"); + +    const auto names = stat.GetCustomStatisticsNames(); +    const THashSet<TString> expected = {"some/path", "another/path"}; +    EXPECT_EQ(THashSet<TString>(names.begin(), names.end()), expected); + +    EXPECT_EQ(stat.GetCustomStatistics("some/path").Max(), -1); +    EXPECT_EQ(stat.GetCustomStatistics("another/path").Avg(), 1001); +} + +TEST(TJobStatisticsTest, TaskNames) +{ +    const TString input = R"""( +        { +            "data" = { +                "output" = { +                    "0" = { +                        "uncompressed_data_size" = { +                            "$" = { +                                "completed" = { +                                    "partition_map" = { +                                        "max" = 130; +                                        "count" = 1; +                                        "min" = 130; +                                        "sum" = 130; +                                    }; +                                    "partition(0)" = { +                                        "max" = 42; +                                        "count" = 1; +                                        "min" = 42; +                                        "sum" = 42; +                                    }; +                                }; +                                "aborted" = { +                                    "simple_sort" = { +                                        "max" = 24; +                                        "count" = 1; +                                        "min" = 24; +                                        "sum" = 24; +                                    }; +                                }; +                            }; +                        }; +                    }; +                }; +            }; +        })"""; + +    TJobStatistics stat(NodeFromYsonString(input)); + +    EXPECT_TRUE(stat.HasStatistics("data/output/0/uncompressed_data_size")); +    EXPECT_TRUE(!stat.HasStatistics("nonexistent-statistics")); +    EXPECT_THROW_MESSAGE_HAS_SUBSTR(stat.GetStatistics("BLAH-BLAH"), yexception, "Statistics"); + +    EXPECT_EQ(stat.GetStatisticsNames(), TVector<TString>{"data/output/0/uncompressed_data_size"}); + +    EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Max(), 130); +    EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Count(), 2); +    EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Min(), 42); +    EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Sum(), 172); +    EXPECT_EQ(stat.GetStatistics("data/output/0/uncompressed_data_size").Avg(), 172 / 2); + +    EXPECT_EQ( +        stat +            .JobState({EJobState::Aborted}) +            .GetStatistics("data/output/0/uncompressed_data_size") +            .Sum(), +        24); +    EXPECT_EQ( +        stat +            .JobType({EJobType::Partition}) +            .JobState({EJobState::Aborted}) +            .GetStatistics("data/output/0/uncompressed_data_size") +            .Sum(), +        TMaybe<i64>()); +    EXPECT_EQ( +        stat +            .TaskName({"partition(0)"}) +            .GetStatistics("data/output/0/uncompressed_data_size") +            .Sum(), +        42); +    EXPECT_EQ( +        stat +            .TaskName({"partition"}) +            .GetStatistics("data/output/0/uncompressed_data_size") +            .Sum(), +        TMaybe<i64>()); +    EXPECT_EQ( +        stat +            .TaskName({"partition_map(0)"}) +            .GetStatistics("data/output/0/uncompressed_data_size") +            .Sum(), +        130); +    EXPECT_EQ( +        stat +            .JobType({EJobType::Partition}) +            .GetStatistics("data/output/0/uncompressed_data_size") +            .Sum(), +        42); +    EXPECT_EQ( +        stat +            .JobType({EJobType::PartitionMap}) +            .GetStatistics("data/output/0/uncompressed_data_size") +            .Sum(), +        130); +    EXPECT_EQ( +        stat +            .TaskName({ETaskName::Partition0}) +            .GetStatistics("data/output/0/uncompressed_data_size") +            .Sum(), +        42); +    EXPECT_EQ( +        stat +            .TaskName({ETaskName::Partition1}) +            .GetStatistics("data/output/0/uncompressed_data_size") +            .Sum(), +        TMaybe<i64>()); +    EXPECT_EQ( +        stat +            .TaskName({ETaskName::PartitionMap0}) +            .GetStatistics("data/output/0/uncompressed_data_size") +            .Sum(), +        130); +} diff --git a/yt/cpp/mapreduce/interface/ut/operation_ut.cpp b/yt/cpp/mapreduce/interface/ut/operation_ut.cpp new file mode 100644 index 00000000000..81d03d06186 --- /dev/null +++ b/yt/cpp/mapreduce/interface/ut/operation_ut.cpp @@ -0,0 +1,272 @@ +#include "common_ut.h" + +#include <yt/cpp/mapreduce/interface/job_statistics.h> +#include <yt/cpp/mapreduce/interface/operation.h> + +#include <yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.pb.h> + +#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h> + +#include <library/cpp/yson/node/node_io.h> + +#include <library/cpp/testing/gtest/gtest.h> + +using namespace NYT; +using namespace NYT::NUnitTesting; + +//////////////////////////////////////////////////////////////////// + +class TDummyInferenceContext +    : public IOperationPreparationContext +{ +public: +    TDummyInferenceContext(int inputCount, int outputCount) +        : InputCount_(inputCount) +        , OutputCount_(outputCount) +        , InputSchemas_(inputCount) +    { } + +    int GetInputCount() const override +    { +        return InputCount_; +    } + +    int GetOutputCount() const override +    { +        return OutputCount_; +    } + +    const TVector<TTableSchema>& GetInputSchemas() const override +    { +        return InputSchemas_; +    } + +    const TTableSchema& GetInputSchema(int index) const override +    { +        return InputSchemas_[index]; +    } + +    TMaybe<TYPath> GetInputPath(int) const override +    { +        return Nothing(); +    } + +    TMaybe<TYPath> GetOutputPath(int) const override +    { +        return Nothing(); +    } + +private: +    int InputCount_; +    int OutputCount_; +    TVector<TTableSchema> InputSchemas_; +}; + +//////////////////////////////////////////////////////////////////// + +TEST(TPrepareOperationTest, BasicSchemas) +{ +    auto firstSchema = TTableSchema() +        .AddColumn(TColumnSchema().Name("some_column").Type(EValueType::VT_UINT64)); +    auto otherSchema = TTableSchema() +        .AddColumn(TColumnSchema().Name("other_column").Type(EValueType::VT_BOOLEAN)); +    auto thirdSchema = TTableSchema() +        .AddColumn(TColumnSchema().Name("third_column").Type(EValueType::VT_STRING)); + +    TDummyInferenceContext context(3,7); +    TJobOperationPreparer builder(context); + +    builder +        .OutputSchema(1, firstSchema) +        .BeginOutputGroup(TVector<int>{2, 5}) +            .Schema(otherSchema) +        .EndOutputGroup() +        .BeginOutputGroup(3, 5) +            .Schema(thirdSchema) +        .EndOutputGroup() +        .BeginOutputGroup(TVector<int>{0, 6}) +            .Schema(thirdSchema) +        .EndOutputGroup(); + +    EXPECT_THROW(builder.OutputSchema(1, otherSchema), TApiUsageError); +    EXPECT_THROW(builder.BeginOutputGroup(3, 5).Schema(otherSchema), TApiUsageError); +    EXPECT_THROW(builder.BeginOutputGroup(TVector<int>{3,6,7}).Schema(otherSchema), TApiUsageError); + +    builder.Finish(); +    auto result = builder.GetOutputSchemas(); + +    ASSERT_SERIALIZABLES_EQ(result[0], thirdSchema); +    ASSERT_SERIALIZABLES_EQ(result[1], firstSchema); +    ASSERT_SERIALIZABLES_EQ(result[2], otherSchema); +    ASSERT_SERIALIZABLES_EQ(result[3], thirdSchema); +    ASSERT_SERIALIZABLES_EQ(result[4], thirdSchema); +    ASSERT_SERIALIZABLES_EQ(result[5], otherSchema); +    ASSERT_SERIALIZABLES_EQ(result[6], thirdSchema); +} + +TEST(TPrepareOperationTest, NoSchema) +{ +    auto schema = TTableSchema() +        .AddColumn(TColumnSchema().Name("some_column").Type(EValueType::VT_UINT64)); + +    TDummyInferenceContext context(3,4); +    TJobOperationPreparer builder(context); + +    builder +        .OutputSchema(1, schema) +        .NoOutputSchema(0) +        .BeginOutputGroup(2, 4) +            .Schema(schema) +        .EndOutputGroup(); + +    EXPECT_THROW(builder.OutputSchema(0, schema), TApiUsageError); + +    builder.Finish(); +    auto result = builder.GetOutputSchemas(); + +    EXPECT_TRUE(result[0].Empty()); + +    ASSERT_SERIALIZABLES_EQ(result[1], schema); +    ASSERT_SERIALIZABLES_EQ(result[2], schema); +    ASSERT_SERIALIZABLES_EQ(result[3], schema); +} + +TEST(TPrepareOperationTest, Descriptions) +{ +    auto urlRowSchema = TTableSchema() +        .AddColumn(TColumnSchema().Name("Host").Type(NTi::Optional(NTi::String()))) +        .AddColumn(TColumnSchema().Name("Path").Type(NTi::Optional(NTi::String()))) +        .AddColumn(TColumnSchema().Name("HttpCode").Type(NTi::Optional(NTi::Int32()))); + +    auto urlRowStruct = NTi::Struct({ +        {"Host", NTi::Optional(NTi::String())}, +        {"Path", NTi::Optional(NTi::String())}, +        {"HttpCode", NTi::Optional(NTi::Int32())}, +    }); + +    auto rowFieldSerializationOptionSchema = TTableSchema() +        .AddColumn(TColumnSchema().Name("UrlRow_1").Type(NTi::Optional(urlRowStruct))) +        .AddColumn(TColumnSchema().Name("UrlRow_2").Type(NTi::Optional(NTi::String()))); + +    auto rowSerializedRepeatedFieldsSchema = TTableSchema() +        .AddColumn(TColumnSchema().Name("Ints").Type(NTi::List(NTi::Int64()))) +        .AddColumn(TColumnSchema().Name("UrlRows").Type(NTi::List(urlRowStruct))); + +    TDummyInferenceContext context(5,7); +    TJobOperationPreparer builder(context); + +    builder +        .InputDescription<TUrlRow>(0) +        .BeginInputGroup(2, 3) +            .Description<TUrlRow>() +        .EndInputGroup() +        .BeginInputGroup(TVector<int>{1, 4}) +            .Description<TRowSerializedRepeatedFields>() +        .EndInputGroup() +        .InputDescription<TUrlRow>(3); + +    EXPECT_THROW(builder.InputDescription<TUrlRow>(0), TApiUsageError); + +    builder +        .OutputDescription<TUrlRow>(0, false) +        .OutputDescription<TRowFieldSerializationOption>(1) +        .BeginOutputGroup(2, 4) +            .Description<TUrlRow>() +        .EndOutputGroup() +        .BeginOutputGroup(TVector<int>{4,6}) +            .Description<TRowSerializedRepeatedFields>() +        .EndOutputGroup() +        .OutputDescription<TUrlRow>(5, false); + +    EXPECT_THROW(builder.OutputDescription<TUrlRow>(0), TApiUsageError); +    EXPECT_NO_THROW(builder.OutputSchema(0, urlRowSchema)); +    EXPECT_NO_THROW(builder.OutputSchema(5, urlRowSchema)); +    EXPECT_THROW(builder.OutputSchema(1, urlRowSchema), TApiUsageError); + +    builder.Finish(); +    auto result = builder.GetOutputSchemas(); + +    ASSERT_SERIALIZABLES_EQ(result[0], urlRowSchema); +    ASSERT_SERIALIZABLES_EQ(result[1], rowFieldSerializationOptionSchema); +    ASSERT_SERIALIZABLES_EQ(result[2], urlRowSchema); +    ASSERT_SERIALIZABLES_EQ(result[3], urlRowSchema); +    ASSERT_SERIALIZABLES_EQ(result[4], rowSerializedRepeatedFieldsSchema); +    ASSERT_SERIALIZABLES_EQ(result[5], urlRowSchema); +    ASSERT_SERIALIZABLES_EQ(result[6], rowSerializedRepeatedFieldsSchema); + +    auto expectedInputDescriptions = TVector<TMaybe<TTableStructure>>{ +        {TProtobufTableStructure{TUrlRow::descriptor()}}, +        {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}}, +        {TProtobufTableStructure{TUrlRow::descriptor()}}, +        {TProtobufTableStructure{TUrlRow::descriptor()}}, +        {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}}, +    }; +    EXPECT_EQ(expectedInputDescriptions, builder.GetInputDescriptions()); + +    auto expectedOutputDescriptions = TVector<TMaybe<TTableStructure>>{ +        {TProtobufTableStructure{TUrlRow::descriptor()}}, +        {TProtobufTableStructure{TRowFieldSerializationOption::descriptor()}}, +        {TProtobufTableStructure{TUrlRow::descriptor()}}, +        {TProtobufTableStructure{TUrlRow::descriptor()}}, +        {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}}, +        {TProtobufTableStructure{TUrlRow::descriptor()}}, +        {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}}, +    }; +    EXPECT_EQ(expectedOutputDescriptions, builder.GetOutputDescriptions()); +} + +TEST(TPrepareOperationTest, InputColumns) +{ +    TDummyInferenceContext context(5, 1); +    TJobOperationPreparer builder(context); +    builder +        .InputColumnFilter(2, {"a", "b"}) +        .BeginInputGroup(0, 2) +            .ColumnFilter({"b", "c"}) +            .ColumnRenaming({{"b", "B"}, {"c", "C"}}) +        .EndInputGroup() +        .InputColumnRenaming(3, {{"a", "AAA"}}) +        .NoOutputSchema(0); +    builder.Finish(); + +    auto expectedRenamings = TVector<THashMap<TString, TString>>{ +        {{"b", "B"}, {"c", "C"}}, +        {{"b", "B"}, {"c", "C"}}, +        {}, +        {{"a", "AAA"}}, +        {}, +    }; +    EXPECT_EQ(builder.GetInputColumnRenamings(), expectedRenamings); + +    auto expectedFilters = TVector<TMaybe<TVector<TString>>>{ +        {{"b", "c"}}, +        {{"b", "c"}}, +        {{"a", "b"}}, +        {}, +        {}, +    }; +    EXPECT_EQ(builder.GetInputColumnFilters(), expectedFilters); +} + +TEST(TPrepareOperationTest, Bug_r7349102) +{ +    auto firstSchema = TTableSchema() +        .AddColumn(TColumnSchema().Name("some_column").Type(EValueType::VT_UINT64)); +    auto otherSchema = TTableSchema() +        .AddColumn(TColumnSchema().Name("other_column").Type(EValueType::VT_BOOLEAN)); +    auto thirdSchema = TTableSchema() +        .AddColumn(TColumnSchema().Name("third_column").Type(EValueType::VT_STRING)); + +    TDummyInferenceContext context(3,1); +    TJobOperationPreparer builder(context); + +    builder +        .InputDescription<TUrlRow>(0) +        .InputDescription<TUrlRow>(1) +        .InputDescription<TUrlRow>(2) +        .OutputDescription<TUrlRow>(0); + +    builder.Finish(); +} + +//////////////////////////////////////////////////////////////////// diff --git a/yt/cpp/mapreduce/interface/ut/proto3_ut.proto b/yt/cpp/mapreduce/interface/ut/proto3_ut.proto new file mode 100644 index 00000000000..b24c13085bd --- /dev/null +++ b/yt/cpp/mapreduce/interface/ut/proto3_ut.proto @@ -0,0 +1,17 @@ +syntax = "proto3"; + +import "yt/yt_proto/yt/formats/extension.proto"; + +package NYT.NTestingProto3; + +option (NYT.file_default_field_flags) = SERIALIZATION_YT; + +message TWithOptional +{ +    optional int64 x = 1; +} + +message TWithOptionalMessage +{ +    optional TWithOptional x = 1; +} diff --git a/yt/cpp/mapreduce/interface/ut/protobuf_file_options_ut.cpp b/yt/cpp/mapreduce/interface/ut/protobuf_file_options_ut.cpp new file mode 100644 index 00000000000..abfe5bbfdce --- /dev/null +++ b/yt/cpp/mapreduce/interface/ut/protobuf_file_options_ut.cpp @@ -0,0 +1,270 @@ +#include "common_ut.h" + +#include <yt/cpp/mapreduce/interface/errors.h> +#include <yt/cpp/mapreduce/interface/format.h> + +#include <yt/cpp/mapreduce/interface/ut/protobuf_file_options_ut.pb.h> + +#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h> + +#include <library/cpp/testing/gtest/gtest.h> + +using namespace NYT; + +namespace { + +NTi::TTypePtr GetUrlRowType(bool required) +{ +    static const NTi::TTypePtr structType = NTi::Struct({ +        {"Host", ToTypeV3(EValueType::VT_STRING, false)}, +        {"Path", ToTypeV3(EValueType::VT_STRING, false)}, +        {"HttpCode", ToTypeV3(EValueType::VT_INT32, false)}}); +    return required ? structType : NTi::TTypePtr(NTi::Optional(structType)); +} + +} // namespace + +TEST(TProtobufFileOptionsTest, TRowFieldSerializationOption) +{ +    const auto schema = CreateTableSchema<NTestingFileOptions::TRowFieldSerializationOption>(); + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema().Name("UrlRow_1").Type(ToTypeV3(EValueType::VT_STRING, false))) +        .AddColumn(TColumnSchema().Name("UrlRow_2").Type(GetUrlRowType(false)))); +} + +TEST(TProtobufFileOptionsTest, TRowMixedSerializationOptions) +{ +    const auto schema = CreateTableSchema<NTestingFileOptions::TRowMixedSerializationOptions>(); + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema().Name("UrlRow_1").Type(ToTypeV3(EValueType::VT_STRING, false))) +        .AddColumn(TColumnSchema().Name("UrlRow_2").Type(GetUrlRowType(false)))); +} + +TEST(TProtobufFileOptionsTest, FieldSortOrder) +{ +    const auto schema = CreateTableSchema<NTestingFileOptions::TFieldSortOrder>(); + +    auto asInProtoFile = NTi::Optional(NTi::Struct({ +        {"x", NTi::Optional(NTi::Int64())}, +        {"y", NTi::Optional(NTi::String())}, +        {"z", NTi::Optional(NTi::Bool())}, +    })); +    auto byFieldNumber = NTi::Optional(NTi::Struct({ +        {"z", NTi::Optional(NTi::Bool())}, +        {"x", NTi::Optional(NTi::Int64())}, +        {"y", NTi::Optional(NTi::String())}, +    })); + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema().Name("EmbeddedDefault").Type(asInProtoFile)) +        .AddColumn(TColumnSchema().Name("EmbeddedAsInProtoFile").Type(asInProtoFile)) +        .AddColumn(TColumnSchema().Name("EmbeddedByFieldNumber").Type(byFieldNumber))); +} + +TEST(TProtobufFileOptionsTest, Map) +{ +    const auto schema = CreateTableSchema<NTestingFileOptions::TWithMap>(); + +    auto createKeyValueStruct = [] (NTi::TTypePtr key, NTi::TTypePtr value) { +        return NTi::List(NTi::Struct({ +            {"key", NTi::Optional(key)}, +            {"value", NTi::Optional(value)}, +        })); +    }; + +    auto embedded = NTi::Struct({ +        {"x", NTi::Optional(NTi::Int64())}, +        {"y", NTi::Optional(NTi::String())}, +    }); + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema() +            .Name("MapDefault") +            .Type(createKeyValueStruct(NTi::Int64(), embedded))) +        .AddColumn(TColumnSchema() +            .Name("MapDict") +            .Type(NTi::Dict(NTi::Int64(), embedded)))); +} + +TEST(TProtobufFileOptionsTest, Oneof) +{ +    const auto schema = CreateTableSchema<NTestingFileOptions::TWithOneof>(); + +    auto embedded = NTi::Struct({ +        {"x", NTi::Optional(NTi::Int64())}, +        {"y", NTi::Optional(NTi::String())}, +    }); + +    auto defaultVariantType = NTi::Optional(NTi::Struct({ +        {"field", NTi::Optional(NTi::String())}, +        {"Oneof2", NTi::Optional(NTi::Variant(NTi::Struct({ +            {"y2", NTi::String()}, +            {"z2", embedded}, +            {"x2", NTi::Int64()}, +        })))}, +        {"x1", NTi::Optional(NTi::Int64())}, +        {"y1", NTi::Optional(NTi::String())}, +        {"z1", NTi::Optional(embedded)}, +    })); + +    auto noDefaultType = NTi::Optional(NTi::Struct({ +        {"field", NTi::Optional(NTi::String())}, +        {"y2", NTi::Optional(NTi::String())}, +        {"z2", NTi::Optional(embedded)}, +        {"x2", NTi::Optional(NTi::Int64())}, +        {"x1", NTi::Optional(NTi::Int64())}, +        {"y1", NTi::Optional(NTi::String())}, +        {"z1", NTi::Optional(embedded)}, +    })); + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema() +            .Name("DefaultVariant") +            .Type(defaultVariantType) +        ) +        .AddColumn(TColumnSchema() +            .Name("NoDefault") +            .Type(noDefaultType) +        ) +        .AddColumn(TColumnSchema() +            .Name("SerializationProtobuf") +            .Type(NTi::Optional(NTi::Struct({ +                {"x1", NTi::Optional(NTi::Int64())}, +                {"y1", NTi::Optional(NTi::String())}, +                {"z1", NTi::Optional(NTi::String())}, +            }))) +        ) +        .AddColumn(TColumnSchema() +            .Name("MemberOfTopLevelOneof") +            .Type(NTi::Optional(NTi::Int64())) +        ) +    ); +} + +static TNode GetColumns(const TFormat& format, int tableIndex = 0) +{ +    return format.Config.GetAttributes()["tables"][tableIndex]["columns"]; +} + +TEST(TProtobufFormatFileOptionsTest, TRowFieldSerializationOption) +{ +    const auto format = TFormat::Protobuf<NTestingFileOptions::TRowFieldSerializationOption>(); +    auto columns = GetColumns(format); + +    EXPECT_EQ(columns[0]["name"], "UrlRow_1"); +    EXPECT_EQ(columns[0]["proto_type"], "message"); +    EXPECT_EQ(columns[0]["field_number"], 1); + +    EXPECT_EQ(columns[1]["name"], "UrlRow_2"); +    EXPECT_EQ(columns[1]["proto_type"], "structured_message"); +    EXPECT_EQ(columns[1]["field_number"], 2); +    const auto& fields = columns[1]["fields"]; +    EXPECT_EQ(fields[0]["name"], "Host"); +    EXPECT_EQ(fields[0]["proto_type"], "string"); +    EXPECT_EQ(fields[0]["field_number"], 1); + +    EXPECT_EQ(fields[1]["name"], "Path"); +    EXPECT_EQ(fields[1]["proto_type"], "string"); +    EXPECT_EQ(fields[1]["field_number"], 2); + +    EXPECT_EQ(fields[2]["name"], "HttpCode"); +    EXPECT_EQ(fields[2]["proto_type"], "sint32"); +    EXPECT_EQ(fields[2]["field_number"], 3); +} + +TEST(TProtobufFormatFileOptionsTest, Map) +{ +    const auto format = TFormat::Protobuf<NTestingFileOptions::TWithMap>(); +    auto columns = GetColumns(format); + +    EXPECT_EQ(columns.Size(), 2u); +    { +        const auto& column = columns[0]; +        EXPECT_EQ(column["name"], "MapDefault"); +        EXPECT_EQ(column["proto_type"], "structured_message"); +        EXPECT_EQ(column["fields"].Size(), 2u); +        EXPECT_EQ(column["fields"][0]["proto_type"], "int64"); +        EXPECT_EQ(column["fields"][1]["proto_type"], "structured_message"); +    } +    { +        const auto& column = columns[1]; +        EXPECT_EQ(column["name"], "MapDict"); +        EXPECT_EQ(column["proto_type"], "structured_message"); +        EXPECT_EQ(column["fields"].Size(), 2u); +        EXPECT_EQ(column["fields"][0]["proto_type"], "int64"); +        EXPECT_EQ(column["fields"][1]["proto_type"], "structured_message"); +    } +} + +TEST(TProtobufFormatFileOptionsTest, Oneof) +{ +    const auto format = TFormat::Protobuf<NTestingFileOptions::TWithOneof>(); +    auto columns = GetColumns(format); + +    EXPECT_EQ(columns.Size(), 4u); + +    { +        const auto& column = columns[0]; +        EXPECT_EQ(column["name"], "DefaultVariant"); +        EXPECT_EQ(column["proto_type"], "structured_message"); +        EXPECT_EQ(column["fields"].Size(), 5u); +        EXPECT_EQ(column["fields"][0]["name"], "field"); + +        const auto& oneof2 = column["fields"][1]; +        EXPECT_EQ(oneof2["name"], "Oneof2"); +        EXPECT_EQ(oneof2["proto_type"], "oneof"); +        EXPECT_EQ(oneof2["fields"][0]["name"], "y2"); +        EXPECT_EQ(oneof2["fields"][1]["name"], "z2"); +        EXPECT_EQ(oneof2["fields"][1]["proto_type"], "structured_message"); +        const auto& embeddedFields = oneof2["fields"][1]["fields"]; +        EXPECT_EQ(embeddedFields[0]["name"], "x"); +        EXPECT_EQ(embeddedFields[1]["name"], "y"); + +        EXPECT_EQ(oneof2["fields"][2]["name"], "x2"); + +        EXPECT_EQ(column["fields"][2]["name"], "x1"); +        EXPECT_EQ(column["fields"][3]["name"], "y1"); +        EXPECT_EQ(column["fields"][4]["name"], "z1"); +    }; + +    { +        const auto& column = columns[1]; +        EXPECT_EQ(column["name"], "NoDefault"); +        EXPECT_EQ(column["proto_type"], "structured_message"); +        const auto& fields = column["fields"]; +        EXPECT_EQ(fields.Size(), 7u); + +        EXPECT_EQ(fields[0]["name"], "field"); + +        EXPECT_EQ(fields[1]["name"], "y2"); + +        EXPECT_EQ(fields[2]["name"], "z2"); +        EXPECT_EQ(fields[2]["proto_type"], "structured_message"); +        const auto& embeddedFields = fields[2]["fields"]; +        EXPECT_EQ(embeddedFields[0]["name"], "x"); +        EXPECT_EQ(embeddedFields[1]["name"], "y"); + +        EXPECT_EQ(fields[3]["name"], "x2"); + +        EXPECT_EQ(fields[4]["name"], "x1"); +        EXPECT_EQ(fields[5]["name"], "y1"); +        EXPECT_EQ(fields[6]["name"], "z1"); +    }; + +    { +        const auto& column = columns[2]; +        EXPECT_EQ(column["name"], "SerializationProtobuf"); +        EXPECT_EQ(column["proto_type"], "structured_message"); +        EXPECT_EQ(column["fields"].Size(), 3u); +        EXPECT_EQ(column["fields"][0]["name"], "x1"); +        EXPECT_EQ(column["fields"][1]["name"], "y1"); +        EXPECT_EQ(column["fields"][2]["name"], "z1"); +    } +    { +        const auto& column = columns[3]; +        EXPECT_EQ(column["name"], "MemberOfTopLevelOneof"); +        EXPECT_EQ(column["proto_type"], "int64"); +    } +} diff --git a/yt/cpp/mapreduce/interface/ut/protobuf_file_options_ut.proto b/yt/cpp/mapreduce/interface/ut/protobuf_file_options_ut.proto new file mode 100644 index 00000000000..4804b2f60c1 --- /dev/null +++ b/yt/cpp/mapreduce/interface/ut/protobuf_file_options_ut.proto @@ -0,0 +1,142 @@ +import "yt/yt_proto/yt/formats/extension.proto"; + +package NYT.NTestingFileOptions; + +option (NYT.file_default_field_flags) = SERIALIZATION_YT; +option (NYT.file_default_field_flags) = MAP_AS_LIST_OF_STRUCTS; +option (NYT.file_default_message_flags) = DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE; +option (NYT.file_default_oneof_flags) = SEPARATE_FIELDS; + +message TUrlRow +{ +    optional string Host = 1     [(NYT.column_name) = "Host"]; +    optional string Path = 2     [(NYT.column_name) = "Path"]; +    optional sint32 HttpCode = 3 [(NYT.column_name) = "HttpCode"]; +} + +message TRowFieldSerializationOption +{ +    optional TUrlRow UrlRow_1 = 1 [(NYT.flags) = SERIALIZATION_PROTOBUF]; +    optional TUrlRow UrlRow_2 = 2; +} + +message TRowMixedSerializationOptions +{ +    option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF; +    optional TUrlRow UrlRow_1 = 1; +    optional TUrlRow UrlRow_2 = 2 [(NYT.flags) = SERIALIZATION_YT]; +} + +message TRowSerializedRepeatedFields +{ +    repeated int64 Ints = 1; +    repeated TUrlRow UrlRows = 2; +} + +message TFieldSortOrder +{ +    message TEmbeddedDefault { +        optional int64 x = 2; +        optional string y = 12; +        optional bool z = 1; +    } +    message TEmbeddedAsInProtoFile { +        option (NYT.message_flags) = DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE; +        optional int64 x = 2; +        optional string y = 12; +        optional bool z = 1; +    } +    message TEmbeddedByFieldNumber { +        option (NYT.message_flags) = SORT_FIELDS_BY_FIELD_NUMBER; +        optional int64 x = 2; +        optional string y = 12; +        optional bool z = 1; +    } +    option (NYT.default_field_flags) = SERIALIZATION_YT; + +    optional TEmbeddedDefault EmbeddedDefault = 1; +    optional TEmbeddedAsInProtoFile EmbeddedAsInProtoFile = 2; +    optional TEmbeddedByFieldNumber EmbeddedByFieldNumber = 3; +} + +message TWithMap +{ +    message TEmbedded { +        optional int64 x = 1; +        optional string y = 2; +    } + +    map<int64, TEmbedded> MapDefault = 1; +    map<int64, TEmbedded> MapDict = 5 [(NYT.flags) = MAP_AS_DICT]; +} + +message TWithOneof +{ +    message TEmbedded +    { +        oneof Oneof { +            int64 x = 1; +            string y = 2; +        } +    } + +    message TDefaultVariant +    { +        option (NYT.default_oneof_flags) = VARIANT; +        optional string field = 1; + +        oneof Oneof2 +        { +            string y2 = 4; +            TEmbedded z2 = 6; +            int64 x2 = 2; +        } + +        oneof Oneof1 +        { +            option (NYT.oneof_flags) = SEPARATE_FIELDS; +            int64 x1 = 10; +            string y1 = 3; +            TEmbedded z1 = 5; +        } +    } + +    message TNoDefault +    { +        optional string field = 1; + +        oneof Oneof2 +        { +            string y2 = 4; +            TEmbedded z2 = 6; +            int64 x2 = 2; +        } + +        oneof Oneof1 +        { +            int64 x1 = 10; +            string y1 = 3; +            TEmbedded z1 = 5; +        } +    } + +    message TSerializationProtobuf +    { +        option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF; +        oneof Oneof +        { +            int64 x1 = 2; +            string y1 = 1; +            TEmbedded z1 = 3; +        } +    } + +    optional TDefaultVariant DefaultVariant = 1; +    optional TNoDefault NoDefault = 2; +    optional TSerializationProtobuf SerializationProtobuf = 3; + +    oneof TopLevelOneof +    { +        int64 MemberOfTopLevelOneof = 4; +    } +} diff --git a/yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.cpp b/yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.cpp new file mode 100644 index 00000000000..d7bee1e6d20 --- /dev/null +++ b/yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.cpp @@ -0,0 +1,444 @@ +#include "common_ut.h" + +#include <yt/cpp/mapreduce/interface/common.h> +#include <yt/cpp/mapreduce/interface/errors.h> + +#include <yt/cpp/mapreduce/interface/ut/proto3_ut.pb.h> +#include <yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.pb.h> + +#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h> + +#include <library/cpp/testing/gtest/gtest.h> + +#include <util/generic/fwd.h> + +#include <algorithm> + +using namespace NYT; + +bool IsFieldPresent(const TTableSchema& schema, TStringBuf name) +{ +    for (const auto& field : schema.Columns()) { +        if (field.Name() == name) { +            return true; +        } +    } +    return false; +} + +TEST(TProtoSchemaSimpleTest, TIntegral) +{ +    const auto schema = CreateTableSchema<NUnitTesting::TIntegral>(); + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema().Name("DoubleField").Type(ToTypeV3(EValueType::VT_DOUBLE, false))) +        .AddColumn(TColumnSchema().Name("FloatField").Type(ToTypeV3(EValueType::VT_DOUBLE, false))) +        .AddColumn(TColumnSchema().Name("Int32Field").Type(ToTypeV3(EValueType::VT_INT32, false))) +        .AddColumn(TColumnSchema().Name("Int64Field").Type(ToTypeV3(EValueType::VT_INT64, false))) +        .AddColumn(TColumnSchema().Name("Uint32Field").Type(ToTypeV3(EValueType::VT_UINT32, false))) +        .AddColumn(TColumnSchema().Name("Uint64Field").Type(ToTypeV3(EValueType::VT_UINT64, false))) +        .AddColumn(TColumnSchema().Name("Sint32Field").Type(ToTypeV3(EValueType::VT_INT32, false))) +        .AddColumn(TColumnSchema().Name("Sint64Field").Type(ToTypeV3(EValueType::VT_INT64, false))) +        .AddColumn(TColumnSchema().Name("Fixed32Field").Type(ToTypeV3(EValueType::VT_UINT32, false))) +        .AddColumn(TColumnSchema().Name("Fixed64Field").Type(ToTypeV3(EValueType::VT_UINT64, false))) +        .AddColumn(TColumnSchema().Name("Sfixed32Field").Type(ToTypeV3(EValueType::VT_INT32, false))) +        .AddColumn(TColumnSchema().Name("Sfixed64Field").Type(ToTypeV3(EValueType::VT_INT64, false))) +        .AddColumn(TColumnSchema().Name("BoolField").Type(ToTypeV3(EValueType::VT_BOOLEAN, false))) +        .AddColumn(TColumnSchema().Name("EnumField").Type(ToTypeV3(EValueType::VT_STRING, false)))); +} + +TEST(TProtoSchemaSimpleTest, TOneOf) +{ +    const auto schema = CreateTableSchema<NUnitTesting::TOneOf>(); + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema().Name("DoubleField").Type(ToTypeV3(EValueType::VT_DOUBLE, false))) +        .AddColumn(TColumnSchema().Name("Int32Field").Type(ToTypeV3(EValueType::VT_INT32, false))) +        .AddColumn(TColumnSchema().Name("BoolField").Type(ToTypeV3(EValueType::VT_BOOLEAN, false)))); +} + +TEST(TProtoSchemaSimpleTest, TWithRequired) +{ +    const auto schema = CreateTableSchema<NUnitTesting::TWithRequired>(); + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema().Name("RequiredField").Type(ToTypeV3(EValueType::VT_STRING, true))) +        .AddColumn(TColumnSchema().Name("NotRequiredField").Type(ToTypeV3(EValueType::VT_STRING, false)))); +} + +TEST(TProtoSchemaSimpleTest, TAggregated) +{ +    const auto schema = CreateTableSchema<NUnitTesting::TAggregated>(); + +    EXPECT_EQ(6u, schema.Columns().size()); +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema().Name("StringField").Type(ToTypeV3(EValueType::VT_STRING, false))) +        .AddColumn(TColumnSchema().Name("BytesField").Type(ToTypeV3(EValueType::VT_STRING, false))) +        .AddColumn(TColumnSchema().Name("NestedField").Type(ToTypeV3(EValueType::VT_STRING, false))) +        .AddColumn(TColumnSchema().Name("NestedRepeatedField").Type(ToTypeV3(EValueType::VT_STRING, false))) +        .AddColumn(TColumnSchema().Name("NestedOneOfField").Type(ToTypeV3(EValueType::VT_STRING, false))) +        .AddColumn(TColumnSchema().Name("NestedRecursiveField").Type(ToTypeV3(EValueType::VT_STRING, false)))); +} + +TEST(TProtoSchemaSimpleTest, TAliased) +{ +    const auto schema = CreateTableSchema<NUnitTesting::TAliased>(); + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema().Name("key").Type(ToTypeV3(EValueType::VT_INT32, false))) +        .AddColumn(TColumnSchema().Name("subkey").Type(ToTypeV3(EValueType::VT_DOUBLE, false))) +        .AddColumn(TColumnSchema().Name("Data").Type(ToTypeV3(EValueType::VT_STRING, false)))); +} + +TEST(TProtoSchemaSimpleTest, SortColumns) +{ +    const TSortColumns keys = {"key", "subkey"}; + +    const auto schema = CreateTableSchema<NUnitTesting::TAliased>(keys); + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema() +            .Name("key") +            .Type(ToTypeV3(EValueType::VT_INT32, false)) +            .SortOrder(ESortOrder::SO_ASCENDING)) +        .AddColumn(TColumnSchema() +            .Name("subkey") +            .Type(ToTypeV3(EValueType::VT_DOUBLE, false)) +            .SortOrder(ESortOrder::SO_ASCENDING)) +        .AddColumn(TColumnSchema().Name("Data").Type(ToTypeV3(EValueType::VT_STRING, false)))); +} + +TEST(TProtoSchemaSimpleTest, SortColumnsReordered) +{ +    const TSortColumns keys = {"subkey"}; + +    const auto schema = CreateTableSchema<NUnitTesting::TAliased>(keys); + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema() +            .Name("subkey") +            .Type(ToTypeV3(EValueType::VT_DOUBLE, false)) +            .SortOrder(ESortOrder::SO_ASCENDING)) +        .AddColumn(TColumnSchema().Name("key").Type(ToTypeV3(EValueType::VT_INT32, false))) +        .AddColumn(TColumnSchema().Name("Data").Type(ToTypeV3(EValueType::VT_STRING, false)))); +} + +TEST(TProtoSchemaSimpleTest, SortColumnsInvalid) +{ +    EXPECT_THROW(CreateTableSchema<NUnitTesting::TAliased>({"subkey", "subkey"}), yexception); +    EXPECT_THROW(CreateTableSchema<NUnitTesting::TAliased>({"key", "junk"}), yexception); +} + +TEST(TProtoSchemaSimpleTest, KeepFieldsWithoutExtensionTrue) +{ +    const auto schema = CreateTableSchema<NUnitTesting::TAliased>({}, true); +    EXPECT_TRUE(IsFieldPresent(schema, "key")); +    EXPECT_TRUE(IsFieldPresent(schema, "subkey")); +    EXPECT_TRUE(IsFieldPresent(schema, "Data")); +    EXPECT_TRUE(schema.Strict()); +} + +TEST(TProtoSchemaSimpleTest, KeepFieldsWithoutExtensionFalse) +{ +    const auto schema = CreateTableSchema<NUnitTesting::TAliased>({}, false); +    EXPECT_TRUE(IsFieldPresent(schema, "key")); +    EXPECT_TRUE(IsFieldPresent(schema, "subkey")); +    EXPECT_TRUE(!IsFieldPresent(schema, "Data")); +    EXPECT_TRUE(schema.Strict()); +} + +TEST(TProtoSchemaSimpleTest, ProtobufTypeOption) +{ +    const auto schema = CreateTableSchema<NUnitTesting::TWithTypeOptions>({}); + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .Strict(false) +        .AddColumn(TColumnSchema().Name("ColorIntField").Type(ToTypeV3(EValueType::VT_INT64, false))) +        .AddColumn(TColumnSchema().Name("ColorStringField").Type(ToTypeV3(EValueType::VT_STRING, false))) +        .AddColumn(TColumnSchema().Name("AnyField").Type(ToTypeV3(EValueType::VT_ANY, false))) +        .AddColumn(TColumnSchema().Name("EmbeddedField").Type( +            NTi::Optional(NTi::Struct({ +                {"ColorIntField", ToTypeV3(EValueType::VT_INT64, false)}, +                {"ColorStringField", ToTypeV3(EValueType::VT_STRING, false)}, +                {"AnyField", ToTypeV3(EValueType::VT_ANY, false)}})))) +        .AddColumn(TColumnSchema().Name("RepeatedEnumIntField").Type(NTi::List(NTi::Int64())))); +} + +TEST(TProtoSchemaSimpleTest, ProtobufTypeOption_TypeMismatch) +{ +    EXPECT_THROW( +        CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_EnumInt>({}), +        yexception); +    EXPECT_THROW( +        CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_EnumString>({}), +        yexception); +    EXPECT_THROW( +        CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_Any>({}), +        yexception); +    EXPECT_THROW( +        CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_OtherColumns>({}), +        yexception); +} + +NTi::TTypePtr GetUrlRowType_ColumnNames(bool required) +{ +    static const NTi::TTypePtr type = NTi::Struct({ +        {"Host_ColumnName", ToTypeV3(EValueType::VT_STRING, false)}, +        {"Path_KeyColumnName", ToTypeV3(EValueType::VT_STRING, false)}, +        {"HttpCode", ToTypeV3(EValueType::VT_INT32, false)}, +    }); +    return required ? type : NTi::TTypePtr(NTi::Optional(type)); +} + +TEST(TProtoSchemaComplexTest, TRepeated) +{ +    EXPECT_THROW(CreateTableSchema<NUnitTesting::TRepeated>(), yexception); + +    const auto schema = CreateTableSchema<NUnitTesting::TRepeatedYtMode>(); +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema().Name("Int32Field").Type(NTi::List(ToTypeV3(EValueType::VT_INT32, true))))); +} + +TEST(TProtoSchemaComplexTest, TRepeatedOptionalList) +{ +    const auto schema = CreateTableSchema<NUnitTesting::TOptionalList>(); +    auto type = NTi::Optional(NTi::List(NTi::Int64())); +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema().Name("OptionalListInt64").TypeV3(type))); +} + +NTi::TTypePtr GetUrlRowType(bool required) +{ +    static const NTi::TTypePtr structType = NTi::Struct({ +        {"Host", ToTypeV3(EValueType::VT_STRING, false)}, +        {"Path", ToTypeV3(EValueType::VT_STRING, false)}, +        {"HttpCode", ToTypeV3(EValueType::VT_INT32, false)}}); +    return required ? structType : NTi::TTypePtr(NTi::Optional(structType)); +} + +TEST(TProtoSchemaComplexTest, TRowFieldSerializationOption) +{ +    const auto schema = CreateTableSchema<NUnitTesting::TRowFieldSerializationOption>(); + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType(false))) +        .AddColumn(TColumnSchema().Name("UrlRow_2").Type(ToTypeV3(EValueType::VT_STRING, false)))); +} + +TEST(TProtoSchemaComplexTest, TRowMessageSerializationOption) +{ +    const auto schema = CreateTableSchema<NUnitTesting::TRowMessageSerializationOption>(); + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType(false))) +        .AddColumn(TColumnSchema().Name("UrlRow_2").Type(GetUrlRowType(false)))); +} + +TEST(TProtoSchemaComplexTest, TRowMixedSerializationOptions) +{ +    const auto schema = CreateTableSchema<NUnitTesting::TRowMixedSerializationOptions>(); + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType(false))) +        .AddColumn(TColumnSchema().Name("UrlRow_2").Type(ToTypeV3(EValueType::VT_STRING, false)))); +} + +TEST(TProtoSchemaComplexTest, TRowMixedSerializationOptions_ColumnNames) +{ +    const auto schema = CreateTableSchema<NUnitTesting::TRowMixedSerializationOptions_ColumnNames>(); + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType_ColumnNames(false))) +        .AddColumn(TColumnSchema().Name("UrlRow_2").Type(ToTypeV3(EValueType::VT_STRING, false)))); +} + +TEST(TProtoSchemaComplexTest, NoOptionInheritance) +{ +    auto deepestEmbedded = NTi::Optional(NTi::Struct({{"x", ToTypeV3(EValueType::VT_INT64, false)}})); + +    const auto schema = CreateTableSchema<NUnitTesting::TNoOptionInheritance>(); + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema() +            .Name("EmbeddedYt_YtOption") +            .Type(NTi::Optional(NTi::Struct({{"embedded", deepestEmbedded}})))) +        .AddColumn(TColumnSchema().Name("EmbeddedYt_ProtobufOption").Type(ToTypeV3(EValueType::VT_STRING, false))) +        .AddColumn(TColumnSchema().Name("EmbeddedYt_NoOption").Type(ToTypeV3(EValueType::VT_STRING, false))) +        .AddColumn(TColumnSchema() +            .Name("EmbeddedProtobuf_YtOption") +            .Type(NTi::Optional(NTi::Struct({{"embedded",  ToTypeV3(EValueType::VT_STRING, false)}})))) +        .AddColumn(TColumnSchema().Name("EmbeddedProtobuf_ProtobufOption").Type(ToTypeV3(EValueType::VT_STRING, false))) +        .AddColumn(TColumnSchema().Name("EmbeddedProtobuf_NoOption").Type(ToTypeV3(EValueType::VT_STRING, false))) +        .AddColumn(TColumnSchema() +            .Name("Embedded_YtOption") +            .Type(NTi::Optional(NTi::Struct({{"embedded",  ToTypeV3(EValueType::VT_STRING, false)}})))) +        .AddColumn(TColumnSchema().Name("Embedded_ProtobufOption").Type(ToTypeV3(EValueType::VT_STRING, false))) +        .AddColumn(TColumnSchema().Name("Embedded_NoOption").Type(ToTypeV3(EValueType::VT_STRING, false)))); +} + +TEST(TProtoSchemaComplexTest, Cyclic) +{ +    EXPECT_THROW(CreateTableSchema<NUnitTesting::TCyclic>(), TApiUsageError); +    EXPECT_THROW(CreateTableSchema<NUnitTesting::TCyclic::TA>(), TApiUsageError); +    EXPECT_THROW(CreateTableSchema<NUnitTesting::TCyclic::TB>(), TApiUsageError); +    EXPECT_THROW(CreateTableSchema<NUnitTesting::TCyclic::TC>(), TApiUsageError); +    EXPECT_THROW(CreateTableSchema<NUnitTesting::TCyclic::TD>(), TApiUsageError); + +    ASSERT_SERIALIZABLES_EQ( +        TTableSchema().AddColumn( +            TColumnSchema().Name("d").TypeV3(NTi::Optional(NTi::String()))), +        CreateTableSchema<NUnitTesting::TCyclic::TE>()); +} + +TEST(TProtoSchemaComplexTest, FieldSortOrder) +{ +    const auto schema = CreateTableSchema<NUnitTesting::TFieldSortOrder>(); + +    auto byFieldNumber = NTi::Optional(NTi::Struct({ +        {"z", NTi::Optional(NTi::Bool())}, +        {"x", NTi::Optional(NTi::Int64())}, +        {"y", NTi::Optional(NTi::String())}, +    })); + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema().Name("EmbeddedDefault").Type(byFieldNumber)) +        .AddColumn(TColumnSchema() +            .Name("EmbeddedAsInProtoFile") +            .Type(NTi::Optional(NTi::Struct({ +                {"x", NTi::Optional(NTi::Int64())}, +                {"y", NTi::Optional(NTi::String())}, +                {"z", NTi::Optional(NTi::Bool())}, +            })))) +        .AddColumn(TColumnSchema().Name("EmbeddedByFieldNumber").Type(byFieldNumber))); +} + +TEST(TProtoSchemaComplexTest, Map) +{ +    const auto schema = CreateTableSchema<NUnitTesting::TWithMap>(); + +    auto createKeyValueStruct = [] (NTi::TTypePtr key, NTi::TTypePtr value) { +        return NTi::List(NTi::Struct({ +            {"key", NTi::Optional(key)}, +            {"value", NTi::Optional(value)}, +        })); +    }; + +    auto embedded = NTi::Struct({ +        {"x", NTi::Optional(NTi::Int64())}, +        {"y", NTi::Optional(NTi::String())}, +    }); + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema() +            .Name("MapDefault") +            .Type(createKeyValueStruct(NTi::Int64(), NTi::String()))) +        .AddColumn(TColumnSchema() +            .Name("MapListOfStructsLegacy") +            .Type(createKeyValueStruct(NTi::Int64(), NTi::String()))) +        .AddColumn(TColumnSchema() +            .Name("MapListOfStructs") +            .Type(createKeyValueStruct(NTi::Int64(), embedded))) +        .AddColumn(TColumnSchema() +            .Name("MapOptionalDict") +            .Type(NTi::Optional(NTi::Dict(NTi::Int64(), embedded)))) +        .AddColumn(TColumnSchema() +            .Name("MapDict") +            .Type(NTi::Dict(NTi::Int64(), embedded)))); +} + +TEST(TProtoSchemaComplexTest, Oneof) +{ +    const auto schema = CreateTableSchema<NUnitTesting::TWithOneof>(); + +    auto embedded = NTi::Struct({ +        {"Oneof", NTi::Optional(NTi::Variant(NTi::Struct({ +            {"x", NTi::Int64()}, +            {"y", NTi::String()}, +        })))}, +    }); + +    auto createType = [&] (TString oneof2Name) { +        return NTi::Optional(NTi::Struct({ +            {"field", NTi::Optional(NTi::String())}, +            {oneof2Name, NTi::Optional(NTi::Variant(NTi::Struct({ +                {"x2", NTi::Int64()}, +                {"y2", NTi::String()}, +                {"z2", embedded}, +            })))}, +            {"y1", NTi::Optional(NTi::String())}, +            {"z1", NTi::Optional(embedded)}, +            {"x1", NTi::Optional(NTi::Int64())}, +        })); +    }; + +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema() +            .Name("DefaultSeparateFields") +            .Type(createType("variant_field_name"))) +        .AddColumn(TColumnSchema() +            .Name("NoDefault") +            .Type(createType("Oneof2"))) +        .AddColumn(TColumnSchema() +            .Name("SerializationProtobuf") +            .Type(NTi::Optional(NTi::Struct({ +                {"y1", NTi::Optional(NTi::String())}, +                {"x1", NTi::Optional(NTi::Int64())}, +                {"z1", NTi::Optional(NTi::String())}, +            })))) +        .AddColumn(TColumnSchema() +            .Name("TopLevelOneof") +            .Type( +                NTi::Optional( +                    NTi::Variant(NTi::Struct({ +                        {"MemberOfTopLevelOneof", NTi::Int64()} +                    })) +                ) +            )) +    ); +} + +TEST(TProtoSchemaComplexTest, Embedded) +{ +    const auto schema = CreateTableSchema<NUnitTesting::TEmbeddingMessage>(); +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .Strict(false) +        .AddColumn(TColumnSchema().Name("embedded2_num").Type(NTi::Optional(NTi::Uint64()))) +        .AddColumn(TColumnSchema().Name("embedded2_struct").Type(NTi::Optional(NTi::Struct({ +            {"float1", NTi::Optional(NTi::Double())}, +            {"string1", NTi::Optional(NTi::String())}, +        })))) +        .AddColumn(TColumnSchema().Name("embedded2_repeated").Type(NTi::List(NTi::String()))) +        .AddColumn(TColumnSchema().Name("embedded_num").Type(NTi::Optional(NTi::Uint64()))) +        .AddColumn(TColumnSchema().Name("embedded_extra_field").Type(NTi::Optional(NTi::String()))) +        .AddColumn(TColumnSchema().Name("variant").Type(NTi::Optional(NTi::Variant(NTi::Struct({ +            {"str_variant", NTi::String()}, +            {"uint_variant", NTi::Uint64()}, +        }))))) +        .AddColumn(TColumnSchema().Name("num").Type(NTi::Optional(NTi::Uint64()))) +        .AddColumn(TColumnSchema().Name("extra_field").Type(NTi::Optional(NTi::String()))) +    ); +} + +TEST(TProtoSchemaProto3Test, TWithOptional) +{ +    const auto schema = CreateTableSchema<NTestingProto3::TWithOptional>(); +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema() +            .Name("x").Type(NTi::Optional(NTi::Int64())) +        ) +    ); +} + +TEST(TProtoSchemaProto3Test, TWithOptionalMessage) +{ +    const auto schema = CreateTableSchema<NTestingProto3::TWithOptionalMessage>(); +    ASSERT_SERIALIZABLES_EQ(schema, TTableSchema() +        .AddColumn(TColumnSchema() +            .Name("x").Type( +                NTi::Optional( +                    NTi::Struct({{"x", NTi::Optional(NTi::Int64())}}) +                ) +            ) +        ) +    ); +} diff --git a/yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.proto b/yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.proto new file mode 100644 index 00000000000..da1e48f691b --- /dev/null +++ b/yt/cpp/mapreduce/interface/ut/protobuf_table_schema_ut.proto @@ -0,0 +1,402 @@ +import "yt/yt_proto/yt/formats/extension.proto"; + +package NYT.NUnitTesting; + +message TIntegral +{ +    optional double DoubleField = 1; +    optional float FloatField = 2; +    optional int32 Int32Field = 3; +    optional int64 Int64Field = 4; +    optional uint32 Uint32Field = 5; +    optional uint64 Uint64Field = 6; +    optional sint32 Sint32Field = 7; +    optional sint64 Sint64Field = 8; +    optional fixed32 Fixed32Field = 9; +    optional fixed64 Fixed64Field = 10; +    optional sfixed32 Sfixed32Field = 11; +    optional sfixed64 Sfixed64Field = 12; +    optional bool BoolField = 13; +    enum TriBool +    { +        TRI_FALSE = 0; +        TRI_TRUE = 1; +        TRI_UNDEF = -1; +    } +    optional TriBool EnumField = 14; +} + +message TRepeated +{ +    repeated int32 Int32Field = 1; +} + +message TRepeatedYtMode +{ +    option (NYT.default_field_flags) = SERIALIZATION_YT; +    repeated int32 Int32Field = 1; +} + +message TWithTypeOptions +{ +    enum Color +    { +        WHITE = 0; +        BLUE = 1; +        RED = -1; +    } + +    message TEmbedded +    { +        option (NYT.default_field_flags) = SERIALIZATION_YT; + +        optional Color ColorIntField = 1 [(NYT.flags) = ENUM_INT]; +        optional Color ColorStringField = 2 [(NYT.flags) = ENUM_STRING]; +        optional bytes AnyField = 3 [(NYT.flags) = ANY]; +    } + +    optional Color ColorIntField = 1 [(NYT.flags) = ENUM_INT]; +    optional Color ColorStringField = 2 [(NYT.flags) = ENUM_STRING]; +    optional bytes AnyField = 3 [(NYT.flags) = ANY]; +    optional bytes OtherColumnsField = 4 [(NYT.flags) = OTHER_COLUMNS]; +    optional TEmbedded EmbeddedField = 5 [(NYT.flags) = SERIALIZATION_YT]; +    repeated Color RepeatedEnumIntField = 6 [(NYT.flags) = SERIALIZATION_YT, (NYT.flags) = ENUM_INT]; +} + +message TWithTypeOptions_TypeMismatch_EnumInt +{ +    optional int64 EnumField = 1 [(NYT.flags) = ENUM_INT]; +} + +message TWithTypeOptions_TypeMismatch_EnumString +{ +    optional string EnumField = 1 [(NYT.flags) = ENUM_STRING]; +} + +message TWithTypeOptions_TypeMismatch_Any +{ +    optional string AnyField = 1 [(NYT.flags) = ANY]; +} + +message TWithTypeOptions_TypeMismatch_OtherColumns +{ +    optional string OtherColumnsField = 1 [(NYT.flags) = OTHER_COLUMNS]; +} + +message TOneOf +{ +    oneof Chooser +    { +        double DoubleField = 1; +        int32 Int32Field = 2; +    } +    optional bool BoolField = 3; +} + +message TWithRequired +{ +    required string RequiredField = 1; +    optional string NotRequiredField = 2; +}; + +message TAggregated +{ +    optional string StringField = 1; +    optional bytes BytesField = 2; +    optional TIntegral NestedField = 3; +    optional TRepeated NestedRepeatedField = 4; +    optional TOneOf NestedOneOfField = 5; +    optional TAggregated NestedRecursiveField = 6; +} + +message TAliased +{ +    optional int32 Key = 1         [(NYT.key_column_name) = "key"]; +    optional double Subkey = 2     [(NYT.key_column_name) = "subkey"]; +    optional TAggregated Data = 3; +} + +//////////////////////////////////////////////////////////////////////////////// + +message TUrlRow +{ +    optional string Host = 1     [(NYT.column_name) = "Host"]; +    optional string Path = 2     [(NYT.column_name) = "Path"]; +    optional sint32 HttpCode = 3 [(NYT.column_name) = "HttpCode"]; +} + +message TRowFieldSerializationOption +{ +    optional TUrlRow UrlRow_1 = 1 [(NYT.flags) = SERIALIZATION_YT]; +    optional TUrlRow UrlRow_2 = 2; +} + +message TRowMessageSerializationOption +{ +    option (NYT.default_field_flags) = SERIALIZATION_YT; +    optional TUrlRow UrlRow_1 = 1; +    optional TUrlRow UrlRow_2 = 2; +} + +message TRowMixedSerializationOptions +{ +    option (NYT.default_field_flags) = SERIALIZATION_YT; +    optional TUrlRow UrlRow_1 = 1; +    optional TUrlRow UrlRow_2 = 2 [(NYT.flags) = SERIALIZATION_PROTOBUF]; +} + +message TRowSerializedRepeatedFields +{ +    option (NYT.default_field_flags) = SERIALIZATION_YT; +    repeated int64 Ints = 1; +    repeated TUrlRow UrlRows = 2; +} + +message TUrlRowWithColumnNames +{ +    optional string Host = 1     [(NYT.column_name) = "Host_ColumnName", (NYT.key_column_name) = "Host_KeyColumnName"]; +    optional string Path = 2     [(NYT.key_column_name) = "Path_KeyColumnName"]; +    optional sint32 HttpCode = 3; +} + +message TRowMixedSerializationOptions_ColumnNames +{ +    option (NYT.default_field_flags) = SERIALIZATION_YT; +    optional TUrlRowWithColumnNames UrlRow_1 = 1; +    optional TUrlRowWithColumnNames UrlRow_2 = 2 [(NYT.flags) = SERIALIZATION_PROTOBUF]; +} + +message TNoOptionInheritance +{ +    message TDeepestEmbedded +    { +        optional int64 x = 1; +    } + +    message TEmbedded +    { +        optional TDeepestEmbedded embedded = 1; +    } + +    message TEmbeddedYt +    { +        option (NYT.default_field_flags) = SERIALIZATION_YT; + +        optional TDeepestEmbedded embedded = 1; +    } + +    message TEmbeddedProtobuf +    { +        option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF; + +        optional TDeepestEmbedded embedded = 1; +    } + +    optional TEmbeddedYt EmbeddedYt_YtOption = 1 [(NYT.flags) = SERIALIZATION_YT]; +    optional TEmbeddedYt EmbeddedYt_ProtobufOption = 2 [(NYT.flags) = SERIALIZATION_PROTOBUF]; +    optional TEmbeddedYt EmbeddedYt_NoOption = 3; +    optional TEmbeddedProtobuf EmbeddedProtobuf_YtOption = 4 [(NYT.flags) = SERIALIZATION_YT]; +    optional TEmbeddedProtobuf EmbeddedProtobuf_ProtobufOption = 5 [(NYT.flags) = SERIALIZATION_PROTOBUF]; +    optional TEmbeddedProtobuf EmbeddedProtobuf_NoOption = 6; +    optional TEmbedded Embedded_YtOption = 7 [(NYT.flags) = SERIALIZATION_YT]; +    optional TEmbedded Embedded_ProtobufOption = 8 [(NYT.flags) = SERIALIZATION_PROTOBUF]; +    optional TEmbedded Embedded_NoOption = 9; +} + +message TOptionalList +{ +    repeated int64 OptionalListInt64 = 1 [(NYT.flags) = OPTIONAL_LIST, (NYT.flags) = SERIALIZATION_YT]; +} + +message TPacked +{ +    repeated int64 PackedListInt64 = 1 [(NYT.flags) = SERIALIZATION_YT, packed=true]; +} + +message TCyclic +{ +    option (NYT.default_field_flags) = SERIALIZATION_YT; + +    message TA +    { +        option (NYT.default_field_flags) = SERIALIZATION_YT; +        repeated TB b = 1; +        optional TC c = 2; +    } + +    message TB +    { +        option (NYT.default_field_flags) = SERIALIZATION_YT; +        optional TD d = 1; +    } + +    message TC +    { +        option (NYT.default_field_flags) = SERIALIZATION_YT; +        optional TD d = 1; +    } + +    message TD +    { +        option (NYT.default_field_flags) = SERIALIZATION_YT; +        optional TA a = 1; +    } + +    message TE +    { +        optional TD d = 1 [(NYT.flags) = SERIALIZATION_PROTOBUF]; +    } + +    optional TA a = 1; +} + +message TFieldSortOrder +{ +    message TEmbeddedDefault { +        optional int64 x = 2; +        optional string y = 12; +        optional bool z = 1; +    } +    message TEmbeddedAsInProtoFile { +        option (NYT.message_flags) = DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE; +        optional int64 x = 2; +        optional string y = 12; +        optional bool z = 1; +    } +    message TEmbeddedByFieldNumber { +        option (NYT.message_flags) = SORT_FIELDS_BY_FIELD_NUMBER; +        optional int64 x = 2; +        optional string y = 12; +        optional bool z = 1; +    } +    option (NYT.default_field_flags) = SERIALIZATION_YT; + +    optional TEmbeddedDefault EmbeddedDefault = 1; +    optional TEmbeddedAsInProtoFile EmbeddedAsInProtoFile = 2; +    optional TEmbeddedByFieldNumber EmbeddedByFieldNumber = 3; +} + +message TWithMap +{ +    option (NYT.default_field_flags) = SERIALIZATION_YT; + +    message TEmbedded { +        optional int64 x = 1; +        optional string y = 2; +    } + +    map<int64, TEmbedded> MapDefault = 1; +    map<int64, TEmbedded> MapListOfStructsLegacy = 2 [(NYT.flags) = MAP_AS_LIST_OF_STRUCTS_LEGACY]; +    map<int64, TEmbedded> MapListOfStructs = 3 [(NYT.flags) = MAP_AS_LIST_OF_STRUCTS]; +    map<int64, TEmbedded> MapOptionalDict = 4 [(NYT.flags) = MAP_AS_OPTIONAL_DICT]; +    map<int64, TEmbedded> MapDict = 5 [(NYT.flags) = MAP_AS_DICT]; +} + +message TWithOneof +{ +    option (NYT.default_field_flags) = SERIALIZATION_YT; + +    message TEmbedded +    { +        option (NYT.default_field_flags) = SERIALIZATION_YT; +        oneof Oneof { +            int64 x = 1; +            string y = 2; +        } +    } + +    message TDefaultSeparateFields +    { +        option (NYT.default_oneof_flags) = SEPARATE_FIELDS; +        option (NYT.default_field_flags) = SERIALIZATION_YT; + +        optional string field = 1; + +        oneof Oneof2 +        { +            option (NYT.variant_field_name) = "variant_field_name"; +            option (NYT.oneof_flags) = VARIANT; +            string y2 = 4; +            TEmbedded z2 = 6; +            int64 x2 = 2; +        } + +        oneof Oneof1 +        { +            int64 x1 = 10; +            string y1 = 3; +            TEmbedded z1 = 5; +        } +    } + +    message TNoDefault +    { +        option (NYT.default_field_flags) = SERIALIZATION_YT; + +        optional string field = 1; + +        oneof Oneof2 +        { +            string y2 = 4; +            TEmbedded z2 = 6; +            int64 x2 = 2; +        } + +        oneof Oneof1 +        { +            option (NYT.oneof_flags) = SEPARATE_FIELDS; +            int64 x1 = 10; +            string y1 = 3; +            TEmbedded z1 = 5; +        } +    } + +    message TSerializationProtobuf +    { +        oneof Oneof +        { +            int64 x1 = 2; +            string y1 = 1; +            TEmbedded z1 = 3; +        } +    } + +    optional TDefaultSeparateFields DefaultSeparateFields = 1; +    optional TNoDefault NoDefault = 2; +    optional TSerializationProtobuf SerializationProtobuf = 3; + +    oneof TopLevelOneof +    { +        int64 MemberOfTopLevelOneof = 4; +    } +} + +message TEmbeddedStruct { +    optional float float1 = 1; +    optional string string1 = 2; +} + +message TEmbedded2Message { +    option (NYT.default_field_flags) = SERIALIZATION_YT; +    optional uint64 embedded2_num = 10; +    optional TEmbeddedStruct embedded2_struct = 17; +    repeated string embedded2_repeated = 42; +} + +message TEmbedded1Message { +    option (NYT.default_field_flags) = SERIALIZATION_YT; +    required TEmbedded2Message t2 = 1 [(NYT.flags) = EMBEDDED]; +    oneof variant { +        string str_variant = 101; +        uint64 uint_variant = 102; +    } +    optional uint64 embedded_num = 10; // make intentional field_num collision! +    optional string embedded_extra_field = 11; +} + +message TEmbeddingMessage { +    optional bytes other_columns_field = 15 [(NYT.flags) = OTHER_COLUMNS]; +    required TEmbedded1Message t1 = 2 [(NYT.flags) = EMBEDDED]; +    optional uint64 num = 12; +    optional string extra_field = 13; +} diff --git a/yt/cpp/mapreduce/interface/ut/serialize_ut.cpp b/yt/cpp/mapreduce/interface/ut/serialize_ut.cpp new file mode 100644 index 00000000000..0acec154d4e --- /dev/null +++ b/yt/cpp/mapreduce/interface/ut/serialize_ut.cpp @@ -0,0 +1,46 @@ +#include <yt/cpp/mapreduce/interface/serialize.h> +#include <yt/cpp/mapreduce/interface/common.h> + +#include <library/cpp/yson/node/node_builder.h> + +#include <library/cpp/testing/gtest/gtest.h> + +#include <util/generic/serialized_enum.h> + +using namespace NYT; + +TEST(TSerializationTest, TableSchema) +{ +    auto schema = TTableSchema() +        .AddColumn(TColumnSchema().Name("a").Type(EValueType::VT_STRING).SortOrder(SO_ASCENDING)) +        .AddColumn(TColumnSchema().Name("b").Type(EValueType::VT_UINT64)) +        .AddColumn(TColumnSchema().Name("c").Type(EValueType::VT_INT64, true)); + +    auto schemaNode = schema.ToNode(); +    EXPECT_TRUE(schemaNode.IsList()); +    EXPECT_EQ(schemaNode.Size(), 3u); + + +    EXPECT_EQ(schemaNode[0]["name"], "a"); +    EXPECT_EQ(schemaNode[0]["type"], "string"); +    EXPECT_EQ(schemaNode[0]["required"], false); +    EXPECT_EQ(schemaNode[0]["sort_order"], "ascending"); + +    EXPECT_EQ(schemaNode[1]["name"], "b"); +    EXPECT_EQ(schemaNode[1]["type"], "uint64"); +    EXPECT_EQ(schemaNode[1]["required"], false); + +    EXPECT_EQ(schemaNode[2]["name"], "c"); +    EXPECT_EQ(schemaNode[2]["type"], "int64"); +    EXPECT_EQ(schemaNode[2]["required"], true); +} + +TEST(TSerializationTest, ValueTypeSerialization) +{ +    for (const auto value : GetEnumAllValues<EValueType>()) { +        TNode serialized = NYT::NDetail::ToString(value); +        EValueType deserialized; +        Deserialize(deserialized, serialized); +        EXPECT_EQ(value, deserialized); +    } +} diff --git a/yt/cpp/mapreduce/interface/ut/ya.make b/yt/cpp/mapreduce/interface/ut/ya.make index 0219e6430ca..9e92931b5de 100644 --- a/yt/cpp/mapreduce/interface/ut/ya.make +++ b/yt/cpp/mapreduce/interface/ut/ya.make @@ -1,4 +1,4 @@ -UNITTEST_FOR(yt/cpp/mapreduce/interface) +GTEST()  SRCS(      common_ut.cpp @@ -18,8 +18,9 @@ SRCS(  PEERDIR(      contrib/libs/protobuf -    library/cpp/testing/unittest +    library/cpp/testing/gtest      yt/yt_proto/yt/formats +    yt/cpp/mapreduce/interface  )  END()  | 
