diff options
author | robot-piglet <[email protected]> | 2025-08-19 13:09:59 +0300 |
---|---|---|
committer | robot-piglet <[email protected]> | 2025-08-19 13:20:36 +0300 |
commit | a464bc70a160ce64c653b1b79969b59d7f65d4f9 (patch) | |
tree | 176eceab2e45e6e5e7658dc0a9502a2a73647572 | |
parent | f4d9ed29053a5d9bf21815d8e28da70a5bad3d32 (diff) |
Intermediate changes
commit_hash:b44cf19dc98b5f9b7fd8b258cd2a9fe9abe2765c
4 files changed, 130 insertions, 24 deletions
diff --git a/yql/essentials/public/purecalc/helpers/protobuf/schema_from_proto.cpp b/yql/essentials/public/purecalc/helpers/protobuf/schema_from_proto.cpp index 6927c46240c..e194e4f22c9 100644 --- a/yql/essentials/public/purecalc/helpers/protobuf/schema_from_proto.cpp +++ b/yql/essentials/public/purecalc/helpers/protobuf/schema_from_proto.cpp @@ -15,6 +15,7 @@ namespace NYql { TProtoSchemaOptions::TProtoSchemaOptions() : EnumPolicy(EEnumPolicy::Int32) , ListIsOptional(false) + , EnableRecursiveRenaming(false) { } @@ -28,6 +29,11 @@ namespace NYql { return *this; } + TProtoSchemaOptions& TProtoSchemaOptions::SetEnableRecursiveRenaming(bool value) { + EnableRecursiveRenaming = value; + return *this; + } + TProtoSchemaOptions& TProtoSchemaOptions::SetFieldRenames( THashMap<TString, TString> fieldRenames ) { @@ -122,7 +128,7 @@ namespace NYql { auto name = fieldDescriptor.name(); if ( auto renamePtr = options.FieldRenames.FindPtr(name); - nested.size() == 1 && renamePtr + (options.EnableRecursiveRenaming || nested.size() == 1) && renamePtr ) { name = *renamePtr; } diff --git a/yql/essentials/public/purecalc/helpers/protobuf/schema_from_proto.h b/yql/essentials/public/purecalc/helpers/protobuf/schema_from_proto.h index 168c654ac78..bf85fc5e33e 100644 --- a/yql/essentials/public/purecalc/helpers/protobuf/schema_from_proto.h +++ b/yql/essentials/public/purecalc/helpers/protobuf/schema_from_proto.h @@ -29,6 +29,7 @@ namespace NYql { public: EEnumPolicy EnumPolicy; bool ListIsOptional; + bool EnableRecursiveRenaming; THashMap<TString, TString> FieldRenames; public: @@ -39,9 +40,9 @@ namespace NYql { TProtoSchemaOptions& SetListIsOptional(bool); - TProtoSchemaOptions& SetFieldRenames( - THashMap<TString, TString> fieldRenames - ); + TProtoSchemaOptions& SetEnableRecursiveRenaming(bool); + + TProtoSchemaOptions& SetFieldRenames(THashMap<TString, TString>); }; EEnumFormatType EnumFormatType(const google::protobuf::FieldDescriptor& enumField, EEnumPolicy enumPolicy); diff --git a/yql/essentials/public/purecalc/io_specs/protobuf/ut/test_spec.cpp b/yql/essentials/public/purecalc/io_specs/protobuf/ut/test_spec.cpp index 923a4f5bd8f..5d479d2ef2c 100644 --- a/yql/essentials/public/purecalc/io_specs/protobuf/ut/test_spec.cpp +++ b/yql/essentials/public/purecalc/io_specs/protobuf/ut/test_spec.cpp @@ -109,6 +109,28 @@ private: bool Exhausted_ = false; }; +class TTSimpleNestedStreamImpl: public NYql::NPureCalc::IStream<NPureCalcProto::TSimpleNested*> { +public: + TTSimpleNestedStreamImpl() + { + Message_.SetX(100); + *Message_.MutableY() = GetCanonicalMessage(); + } + + NPureCalcProto::TSimpleNested* Fetch() override { + if (Exhausted_) { + return nullptr; + } else { + Exhausted_ = true; + return &Message_; + } + } + +private: + NPureCalcProto::TSimpleNested Message_; + bool Exhausted_ = false; +}; + class TAllTypesConsumerImpl: public NYql::NPureCalc::IConsumer<NPureCalcProto::TAllTypes*> { private: int I_ = 0; @@ -993,4 +1015,59 @@ Y_UNIT_TEST_SUITE(TestProtoIO) { UNIT_ASSERT_VALUES_EQUAL(expected, actual); } } + + Y_UNIT_TEST(TestNestedFieldRenames) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(); + + TString query = "SELECT InputAlias AS OutputAlias, X FROM Input"; + + auto inputProtoOptions = TProtoSchemaOptions(); + inputProtoOptions.SetFieldRenames({{"Y", "InputAlias"}, {"FInt64", "NestedField"}}); + inputProtoOptions.SetEnableRecursiveRenaming(true); + + auto inputSpec = TProtobufInputSpec<NPureCalcProto::TSimpleNested>( + Nothing(), std::move(inputProtoOptions) + ); + + auto outputProtoOptions = TProtoSchemaOptions(); + outputProtoOptions.SetEnableRecursiveRenaming(true); + + outputProtoOptions.SetFieldRenames({{"Y", "OutputAlias"}, {"FInt64", "NestedField"}}); + auto outputSpecWithNestedRename = TProtobufOutputSpec<NPureCalcProto::TSimpleNested>( + outputProtoOptions + ); + + outputProtoOptions.SetFieldRenames({{"Y", "OutputAlias"}}); + auto outputSpecWithoutNestedRename = TProtobufOutputSpec<NPureCalcProto::TSimpleNested>( + std::move(outputProtoOptions) + ); + + { + auto program = factory->MakePullStreamProgram( + inputSpec, outputSpecWithNestedRename, query, ETranslationMode::SQL + ); + + auto input = MakeHolder<TTSimpleNestedStreamImpl>(); + auto output = program->Apply(std::move(input)); + + TVector<int> expected = {3}; + TVector<int> actual; + + while (auto* x = output->Fetch()) { + actual.push_back(x->GetY().GetFInt64()); + } + + UNIT_ASSERT_VALUES_EQUAL(expected, actual); + } + + { + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + factory->MakePullStreamProgram( + inputSpec, outputSpecWithoutNestedRename, query, ETranslationMode::SQL + ); + }(), TCompileError, "Failed to optimize"); + } + } } diff --git a/yql/essentials/public/purecalc/io_specs/protobuf_raw/spec.cpp b/yql/essentials/public/purecalc/io_specs/protobuf_raw/spec.cpp index 0a3cc41427f..150ebfa80a8 100644 --- a/yql/essentials/public/purecalc/io_specs/protobuf_raw/spec.cpp +++ b/yql/essentials/public/purecalc/io_specs/protobuf_raw/spec.cpp @@ -177,29 +177,16 @@ namespace { TVector<TFieldMapping> NestedFields; }; - /** - * Fills a tree of field mappings from the given yql struct type to protobuf message. - * - * @param fromType source yql type. - * @param toType target protobuf message type. - * @param mappings destination vector will be filled with field descriptors. Order of descriptors will match - * the order of field names. - */ - void FillFieldMappings( + void FillFieldMappingsImpl( const TStructType* fromType, const Descriptor& toType, TVector<TFieldMapping>& mappings, const TMaybe<TString>& timestampColumn, bool listIsOptional, - const THashMap<TString, TString>& fieldRenames + bool enableRecursiveRenaming, + const THashMap<TString, TString>& inverseFieldRenames ) { - THashMap<TString, TString> inverseFieldRenames; - - for (const auto& [source, target]: fieldRenames) { - auto [iterator, emplaced] = inverseFieldRenames.emplace(target, source); - Y_ENSURE(emplaced, "Duplicate rename field found: " << source << " -> " << target); - } - + static const THashMap<TString, TString> emptyInverseFieldRenames; mappings.resize(fromType->GetMembersCount()); for (ui32 i = 0; i < fromType->GetMembersCount(); ++i) { TString fieldName(fromType->GetMemberName(i)); @@ -232,14 +219,46 @@ namespace { fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Data, "unsupported field kind [" << fieldType->GetKindAsStr() << "], field [" << fieldName << "]"); if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Struct) { - FillFieldMappings(static_cast<const NKikimr::NMiniKQL::TStructType*>(fieldType), - *mappings[i].Field->message_type(), - mappings[i].NestedFields, Nothing(), listIsOptional, {}); + FillFieldMappingsImpl( + static_cast<const NKikimr::NMiniKQL::TStructType*>(fieldType), + *mappings[i].Field->message_type(), + mappings[i].NestedFields, + Nothing(), + listIsOptional, + enableRecursiveRenaming, + enableRecursiveRenaming ? inverseFieldRenames : emptyInverseFieldRenames + ); } } } /** + * Fills a tree of field mappings from the given yql struct type to protobuf message. + * + * @param fromType source yql type. + * @param toType target protobuf message type. + * @param mappings destination vector will be filled with field descriptors. Order of descriptors will match + * the order of field names. + */ + void FillFieldMappings( + const TStructType* fromType, + const Descriptor& toType, + TVector<TFieldMapping>& mappings, + const TMaybe<TString>& timestampColumn, + bool listIsOptional, + bool enableRecursiveRenaming, + const THashMap<TString, TString>& fieldRenames + ) { + THashMap<TString, TString> inverseFieldRenames; + for (const auto& [source, target]: fieldRenames) { + auto [iterator, emplaced] = inverseFieldRenames.emplace(target, source); + Y_ENSURE(emplaced, "Duplicate rename field found: " << source << " -> " << target); + } + + FillFieldMappingsImpl(fromType, toType, mappings, timestampColumn, listIsOptional, enableRecursiveRenaming, inverseFieldRenames); + } + + /** * Extract field values from the given protobuf message into an array of unboxed values. * * @param factory to create nested unboxed values. @@ -624,6 +643,7 @@ namespace { Worker_->GetInputType(), inputSpec.GetDescriptor(), Mappings_, TimestampColumn_, inputSpec.GetSchemaOptions().ListIsOptional, + inputSpec.GetSchemaOptions().EnableRecursiveRenaming, inputSpec.GetSchemaOptions().FieldRenames ); } @@ -673,6 +693,7 @@ namespace { OutputColumns_, Nothing(), outputSpec.GetSchemaOptions().ListIsOptional, + outputSpec.GetSchemaOptions().EnableRecursiveRenaming, outputSpec.GetSchemaOptions().FieldRenames ); @@ -734,6 +755,7 @@ namespace { OutputColumns_.back(), Nothing(), outputSpec.GetSchemaOptions().ListIsOptional, + false, {} ); |