diff options
author | Grigory Reznikov <gritukan@nebius.com> | 2024-03-19 13:38:52 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2024-03-19 14:16:23 +0300 |
commit | 51c750cf25c79121b833e2a7882b10e8cd371bef (patch) | |
tree | c4e54afd7028d5f1b9859ecb54b287994cc6eee7 /yt/cpp/mapreduce/interface | |
parent | 7bfb3ae386d49f6392b1e4fcc8b6f1c43940f27c (diff) | |
download | ydb-51c750cf25c79121b833e2a7882b10e8cd371bef.tar.gz |
Support building yt/cpp and yt/yt/core with vanilla protobuf
After this PR yt/cpp and yt/yt/core are possible to be built both with Arcadia protobuf (that uses TString as a string) and vanilla protobuf (that uses std::string as a string). To achieve so, a couple of interoperability primitives are introduced.
* `TProtobufString` is an alias to protobuf string type, i.e. it can be `TString` or `std::string` depending on the protobuf implementation.
* `IsVanillaProtobuf` and `IsArcadiaProtobuf` are the constexpr boolean values that allow to check protobuf implementation both in the compile time and runtime.
The most challenging interoperability issue solved here is a string copy between protobuf message and C++ code that has a form of `TString str = msg.str()`. This code works perfect with Arcadia protobuf but does not work with vanilla protobuf. To solve it, a previously introduced primitive `FromProto<TString>` is used. This expression makes the most efficient cast possible between protobuf string and C++ string. Internally, it is just a copy in both cases. Since TString is CoW by default, this expression is almost zero-cost (actually it's just one atomic operation), so no degradation is expected for YTsaurus server builds. The most hot code is handled differently to avoid even atomic operations (see `GetRequestTargetYPath`). In case of vanilla protobuf string is copied, however there are no places in C++ SDK where it might be a problem. If such issues would appear, performance-critial code can be rewritten in `GetRequestTargetYPath`-style.
---
1a6f3e02cb6e83915102c24b73bc8734f6a48e74
Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/466
Diffstat (limited to 'yt/cpp/mapreduce/interface')
-rw-r--r-- | yt/cpp/mapreduce/interface/format.cpp | 1 | ||||
-rw-r--r-- | yt/cpp/mapreduce/interface/protobuf_format.cpp | 30 | ||||
-rw-r--r-- | yt/cpp/mapreduce/interface/ya.make | 1 |
3 files changed, 17 insertions, 15 deletions
diff --git a/yt/cpp/mapreduce/interface/format.cpp b/yt/cpp/mapreduce/interface/format.cpp index aec9570bc2..e29cfd33f5 100644 --- a/yt/cpp/mapreduce/interface/format.cpp +++ b/yt/cpp/mapreduce/interface/format.cpp @@ -4,7 +4,6 @@ #include "errors.h" #include <google/protobuf/descriptor.h> -#include <google/protobuf/messagext.h> namespace NYT { diff --git a/yt/cpp/mapreduce/interface/protobuf_format.cpp b/yt/cpp/mapreduce/interface/protobuf_format.cpp index bdf7ed8c40..4962e85fc8 100644 --- a/yt/cpp/mapreduce/interface/protobuf_format.cpp +++ b/yt/cpp/mapreduce/interface/protobuf_format.cpp @@ -2,6 +2,8 @@ #include "errors.h" +#include <yt/yt/core/misc/protobuf_helpers.h> + #include <yt/yt_proto/yt/formats/extension.pb.h> #include <google/protobuf/text_format.h> @@ -531,15 +533,15 @@ TProtobufOneofOptions GetOneofOptions( auto variantFieldName = oneofDescriptor->options().GetExtension(variant_field_name); switch (options.Mode) { case EProtobufOneofMode::SeparateFields: - if (variantFieldName) { + if (!variantFieldName.empty()) { ythrow TApiUsageError() << "\"variant_field_name\" requires (NYT.oneof_flags) = VARIANT"; } break; case EProtobufOneofMode::Variant: - if (variantFieldName) { - options.VariantFieldName = variantFieldName; + if (variantFieldName.empty()) { + options.VariantFieldName = FromProto<TString>(oneofDescriptor->name()); } else { - options.VariantFieldName = oneofDescriptor->name(); + options.VariantFieldName = variantFieldName; } break; } @@ -598,15 +600,15 @@ TString DeduceProtobufType( TString GetColumnName(const ::google::protobuf::FieldDescriptor& field) { const auto& options = field.options(); - const auto columnName = options.GetExtension(column_name); + const auto columnName = FromProto<TString>(options.GetExtension(column_name)); if (!columnName.empty()) { return columnName; } - const auto keyColumnName = options.GetExtension(key_column_name); + const auto keyColumnName = FromProto<TString>(options.GetExtension(key_column_name)); if (!keyColumnName.empty()) { return keyColumnName; } - return field.name(); + return FromProto<TString>(field.name()); } TNode MakeProtoFormatMessageFieldsConfig( @@ -680,7 +682,7 @@ TNode MakeProtoFormatFieldConfig( if (fieldDescriptor->type() == FieldDescriptor::TYPE_ENUM) { auto* enumeration = fieldDescriptor->enum_type(); (*enumerations)[enumeration->full_name()] = MakeEnumerationConfig(enumeration); - fieldConfig["enumeration_name"] = enumeration->full_name(); + fieldConfig["enumeration_name"] = FromProto<TString>(enumeration->full_name()); } if (fieldOptions.SerializationMode != EProtobufSerializationMode::Yt) { @@ -841,10 +843,10 @@ public: THashSet<TString> messageTypeNames; THashSet<TString> enumTypeNames; for (const auto* descriptor : AllDescriptors_) { - messageTypeNames.insert(descriptor->full_name()); + messageTypeNames.insert(FromProto<TString>(descriptor->full_name())); } for (const auto* enumDescriptor : EnumDescriptors_) { - enumTypeNames.insert(enumDescriptor->full_name()); + enumTypeNames.insert(FromProto<TString>(enumDescriptor->full_name())); } FileDescriptorSet fileDescriptorSetProto; for (const auto* file : fileTopoOrder) { @@ -954,9 +956,9 @@ private: const THashSet<TString>& messageTypeNames, const THashSet<TString>& enumTypeNames) { - const auto prefix = fileProto->package().Empty() + const auto prefix = fileProto->package().empty() ? "" - : fileProto->package() + '.'; + : FromProto<TString>(fileProto->package()) + '.'; RemoveIf(fileProto->mutable_message_type(), [&] (const DescriptorProto& descriptorProto) { return !messageTypeNames.contains(prefix + descriptorProto.name()); @@ -992,10 +994,10 @@ TNode MakeProtoFormatConfigWithDescriptors(const TVector<const Descriptor*>& des auto typeNames = TNode::CreateList(); for (const auto* descriptor : descriptors) { builder.AddDescriptor(descriptor); - typeNames.Add(descriptor->full_name()); + typeNames.Add(FromProto<TString>(descriptor->full_name())); } - auto fileDescriptorSetText = builder.Build().ShortDebugString(); + auto fileDescriptorSetText = FromProto<TString>(builder.Build().ShortDebugString()); TNode config("protobuf"); config.Attributes() ("file_descriptor_set_text", std::move(fileDescriptorSetText)) diff --git a/yt/cpp/mapreduce/interface/ya.make b/yt/cpp/mapreduce/interface/ya.make index 0e94f14633..1d76b1dabb 100644 --- a/yt/cpp/mapreduce/interface/ya.make +++ b/yt/cpp/mapreduce/interface/ya.make @@ -29,6 +29,7 @@ PEERDIR( yt/cpp/mapreduce/interface/logging yt/yt_proto/yt/formats yt/yt/library/tvm + yt/yt/core ) GENERATE_ENUM_SERIALIZATION(client_method_options.h) |