diff options
author | max42 <max42@yandex-team.com> | 2023-06-30 03:37:03 +0300 |
---|---|---|
committer | max42 <max42@yandex-team.com> | 2023-06-30 03:37:03 +0300 |
commit | fac2bd72b4b31ec3238292caf8fb2a8aaa6d6c4a (patch) | |
tree | b8cbc1deb00309c7f1a7ab6df520a76cf0b5c6d7 /yt/cpp/mapreduce/interface/operation_ut.cpp | |
parent | 7bf166b1a7ed0af927f230022b245af618e998c1 (diff) | |
download | ydb-fac2bd72b4b31ec3238292caf8fb2a8aaa6d6c4a.tar.gz |
YT-19324: move YT provider to ydb/library/yql
This commit is formed by the following script: https://paste.yandex-team.ru/6f92e4b8-efc5-4d34-948b-15ee2accd7e7/text.
This commit has zero effect on all projects that depend on YQL.
The summary of changes:
- `yql/providers/yt -> ydb/library/yql/providers/yt `- the whole implementation of YT provider is moved into YDB code base for further export as a part of YT YQL plugin shared library;
- `yql/providers/stat/{expr_nodes,uploader} -> ydb/library/yql/providers/stat/{expr_nodes,uploader}` - a small interface without implementation and the description of stat expr nodes;
- `yql/core/extract_predicate/ut -> ydb/library/yql/core/extract_predicate/ut`;
- `yql/core/{ut,ut_common} -> ydb/library/yql/core/{ut,ut_common}`;
- `yql/core` is gone;
- `yql/library/url_preprocessing -> ydb/library/yql/core/url_preprocessing`.
**NB**: all new targets inside `ydb/` are under `IF (NOT CMAKE_EXPORT)` clause which disables them from open-source cmake generation and ya make build. They will be enabled in the subsequent commits.
Diffstat (limited to 'yt/cpp/mapreduce/interface/operation_ut.cpp')
-rw-r--r-- | yt/cpp/mapreduce/interface/operation_ut.cpp | 269 |
1 files changed, 269 insertions, 0 deletions
diff --git a/yt/cpp/mapreduce/interface/operation_ut.cpp b/yt/cpp/mapreduce/interface/operation_ut.cpp new file mode 100644 index 0000000000..0fa62e1568 --- /dev/null +++ b/yt/cpp/mapreduce/interface/operation_ut.cpp @@ -0,0 +1,269 @@ +#include <yt/cpp/mapreduce/interface/common_ut.h> +#include <yt/cpp/mapreduce/interface/job_statistics.h> +#include <yt/cpp/mapreduce/interface/operation.h> +#include <yt/cpp/mapreduce/interface/protobuf_table_schema_ut.pb.h> + +#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h> + +#include <library/cpp/yson/node/node_io.h> + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NYT; +using namespace NYT::NUnitTesting; + +class TDummyInferenceContext + : public IOperationPreparationContext +{ +public: + TDummyInferenceContext(int inputCount, int outputCount) + : InputCount_(inputCount) + , OutputCount_(outputCount) + , InputSchemas_(inputCount) + { } + + int GetInputCount() const override + { + return InputCount_; + } + + int GetOutputCount() const override + { + return OutputCount_; + } + + const TVector<TTableSchema>& GetInputSchemas() const override + { + return InputSchemas_; + } + + const TTableSchema& GetInputSchema(int index) const override + { + return InputSchemas_[index]; + } + + TMaybe<TYPath> GetInputPath(int) const override + { + return Nothing(); + } + + TMaybe<TYPath> GetOutputPath(int) const override + { + return Nothing(); + } + +private: + int InputCount_; + int OutputCount_; + TVector<TTableSchema> InputSchemas_; +}; + +Y_UNIT_TEST_SUITE(PrepareOperation) +{ + + Y_UNIT_TEST(BasicSchemas) + { + auto firstSchema = TTableSchema() + .AddColumn(TColumnSchema().Name("some_column").Type(EValueType::VT_UINT64)); + auto otherSchema = TTableSchema() + .AddColumn(TColumnSchema().Name("other_column").Type(EValueType::VT_BOOLEAN)); + auto thirdSchema = TTableSchema() + .AddColumn(TColumnSchema().Name("third_column").Type(EValueType::VT_STRING)); + + TDummyInferenceContext context(3,7); + TJobOperationPreparer builder(context); + + builder + .OutputSchema(1, firstSchema) + .BeginOutputGroup(TVector<int>{2, 5}) + .Schema(otherSchema) + .EndOutputGroup() + .BeginOutputGroup(3, 5) + .Schema(thirdSchema) + .EndOutputGroup() + .BeginOutputGroup(TVector<int>{0, 6}) + .Schema(thirdSchema) + .EndOutputGroup(); + + UNIT_ASSERT_EXCEPTION(builder.OutputSchema(1, otherSchema), TApiUsageError); + UNIT_ASSERT_EXCEPTION(builder.BeginOutputGroup(3, 5).Schema(otherSchema), TApiUsageError); + UNIT_ASSERT_EXCEPTION(builder.BeginOutputGroup(TVector<int>{3,6,7}).Schema(otherSchema), TApiUsageError); + + builder.Finish(); + auto result = builder.GetOutputSchemas(); + + ASSERT_SERIALIZABLES_EQUAL(result[0], thirdSchema); + ASSERT_SERIALIZABLES_EQUAL(result[1], firstSchema); + ASSERT_SERIALIZABLES_EQUAL(result[2], otherSchema); + ASSERT_SERIALIZABLES_EQUAL(result[3], thirdSchema); + ASSERT_SERIALIZABLES_EQUAL(result[4], thirdSchema); + ASSERT_SERIALIZABLES_EQUAL(result[5], otherSchema); + ASSERT_SERIALIZABLES_EQUAL(result[6], thirdSchema); + } + + Y_UNIT_TEST(NoSchema) + { + auto schema = TTableSchema() + .AddColumn(TColumnSchema().Name("some_column").Type(EValueType::VT_UINT64)); + + TDummyInferenceContext context(3,4); + TJobOperationPreparer builder(context); + + builder + .OutputSchema(1, schema) + .NoOutputSchema(0) + .BeginOutputGroup(2, 4) + .Schema(schema) + .EndOutputGroup(); + + UNIT_ASSERT_EXCEPTION(builder.OutputSchema(0, schema), TApiUsageError); + + builder.Finish(); + auto result = builder.GetOutputSchemas(); + + UNIT_ASSERT(result[0].Empty()); + + ASSERT_SERIALIZABLES_EQUAL(result[1], schema); + ASSERT_SERIALIZABLES_EQUAL(result[2], schema); + ASSERT_SERIALIZABLES_EQUAL(result[3], schema); + } + + Y_UNIT_TEST(Descriptions) + { + auto urlRowSchema = TTableSchema() + .AddColumn(TColumnSchema().Name("Host").Type(NTi::Optional(NTi::String()))) + .AddColumn(TColumnSchema().Name("Path").Type(NTi::Optional(NTi::String()))) + .AddColumn(TColumnSchema().Name("HttpCode").Type(NTi::Optional(NTi::Int32()))); + + auto urlRowStruct = NTi::Struct({ + {"Host", NTi::Optional(NTi::String())}, + {"Path", NTi::Optional(NTi::String())}, + {"HttpCode", NTi::Optional(NTi::Int32())}, + }); + + auto rowFieldSerializationOptionSchema = TTableSchema() + .AddColumn(TColumnSchema().Name("UrlRow_1").Type(NTi::Optional(urlRowStruct))) + .AddColumn(TColumnSchema().Name("UrlRow_2").Type(NTi::Optional(NTi::String()))); + + auto rowSerializedRepeatedFieldsSchema = TTableSchema() + .AddColumn(TColumnSchema().Name("Ints").Type(NTi::List(NTi::Int64()))) + .AddColumn(TColumnSchema().Name("UrlRows").Type(NTi::List(urlRowStruct))); + + TDummyInferenceContext context(5,7); + TJobOperationPreparer builder(context); + + builder + .InputDescription<TUrlRow>(0) + .BeginInputGroup(2, 3) + .Description<TUrlRow>() + .EndInputGroup() + .BeginInputGroup(TVector<int>{1, 4}) + .Description<TRowSerializedRepeatedFields>() + .EndInputGroup() + .InputDescription<TUrlRow>(3); + + UNIT_ASSERT_EXCEPTION(builder.InputDescription<TUrlRow>(0), TApiUsageError); + + builder + .OutputDescription<TUrlRow>(0, false) + .OutputDescription<TRowFieldSerializationOption>(1) + .BeginOutputGroup(2, 4) + .Description<TUrlRow>() + .EndOutputGroup() + .BeginOutputGroup(TVector<int>{4,6}) + .Description<TRowSerializedRepeatedFields>() + .EndOutputGroup() + .OutputDescription<TUrlRow>(5, false); + + UNIT_ASSERT_EXCEPTION(builder.OutputDescription<TUrlRow>(0), TApiUsageError); + UNIT_ASSERT_NO_EXCEPTION(builder.OutputSchema(0, urlRowSchema)); + UNIT_ASSERT_NO_EXCEPTION(builder.OutputSchema(5, urlRowSchema)); + UNIT_ASSERT_EXCEPTION(builder.OutputSchema(1, urlRowSchema), TApiUsageError); + + builder.Finish(); + auto result = builder.GetOutputSchemas(); + + ASSERT_SERIALIZABLES_EQUAL(result[0], urlRowSchema); + ASSERT_SERIALIZABLES_EQUAL(result[1], rowFieldSerializationOptionSchema); + ASSERT_SERIALIZABLES_EQUAL(result[2], urlRowSchema); + ASSERT_SERIALIZABLES_EQUAL(result[3], urlRowSchema); + ASSERT_SERIALIZABLES_EQUAL(result[4], rowSerializedRepeatedFieldsSchema); + ASSERT_SERIALIZABLES_EQUAL(result[5], urlRowSchema); + ASSERT_SERIALIZABLES_EQUAL(result[6], rowSerializedRepeatedFieldsSchema); + + auto expectedInputDescriptions = TVector<TMaybe<TTableStructure>>{ + {TProtobufTableStructure{TUrlRow::descriptor()}}, + {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}}, + {TProtobufTableStructure{TUrlRow::descriptor()}}, + {TProtobufTableStructure{TUrlRow::descriptor()}}, + {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}}, + }; + UNIT_ASSERT_EQUAL(expectedInputDescriptions, builder.GetInputDescriptions()); + + auto expectedOutputDescriptions = TVector<TMaybe<TTableStructure>>{ + {TProtobufTableStructure{TUrlRow::descriptor()}}, + {TProtobufTableStructure{TRowFieldSerializationOption::descriptor()}}, + {TProtobufTableStructure{TUrlRow::descriptor()}}, + {TProtobufTableStructure{TUrlRow::descriptor()}}, + {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}}, + {TProtobufTableStructure{TUrlRow::descriptor()}}, + {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}}, + }; + UNIT_ASSERT_EQUAL(expectedOutputDescriptions, builder.GetOutputDescriptions()); + } + + Y_UNIT_TEST(InputColumns) + { + TDummyInferenceContext context(5, 1); + TJobOperationPreparer builder(context); + builder + .InputColumnFilter(2, {"a", "b"}) + .BeginInputGroup(0, 2) + .ColumnFilter({"b", "c"}) + .ColumnRenaming({{"b", "B"}, {"c", "C"}}) + .EndInputGroup() + .InputColumnRenaming(3, {{"a", "AAA"}}) + .NoOutputSchema(0); + builder.Finish(); + + auto expectedRenamings = TVector<THashMap<TString, TString>>{ + {{"b", "B"}, {"c", "C"}}, + {{"b", "B"}, {"c", "C"}}, + {}, + {{"a", "AAA"}}, + {}, + }; + UNIT_ASSERT_EQUAL(builder.GetInputColumnRenamings(), expectedRenamings); + + auto expectedFilters = TVector<TMaybe<TVector<TString>>>{ + {{"b", "c"}}, + {{"b", "c"}}, + {{"a", "b"}}, + {}, + {}, + }; + UNIT_ASSERT_EQUAL(builder.GetInputColumnFilters(), expectedFilters); + } + + Y_UNIT_TEST(Bug_r7349102) + { + auto firstSchema = TTableSchema() + .AddColumn(TColumnSchema().Name("some_column").Type(EValueType::VT_UINT64)); + auto otherSchema = TTableSchema() + .AddColumn(TColumnSchema().Name("other_column").Type(EValueType::VT_BOOLEAN)); + auto thirdSchema = TTableSchema() + .AddColumn(TColumnSchema().Name("third_column").Type(EValueType::VT_STRING)); + + TDummyInferenceContext context(3,1); + TJobOperationPreparer builder(context); + + builder + .InputDescription<TUrlRow>(0) + .InputDescription<TUrlRow>(1) + .InputDescription<TUrlRow>(2) + .OutputDescription<TUrlRow>(0); + + builder.Finish(); + } + +} // Y_UNIT_TEST_SUITE(SchemaInference) |