aboutsummaryrefslogtreecommitdiffstats
path: root/yt/cpp/mapreduce/interface
diff options
context:
space:
mode:
authorGrigory Reznikov <gritukan@nebius.com>2024-03-19 13:38:52 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2024-03-19 14:16:23 +0300
commit51c750cf25c79121b833e2a7882b10e8cd371bef (patch)
treec4e54afd7028d5f1b9859ecb54b287994cc6eee7 /yt/cpp/mapreduce/interface
parent7bfb3ae386d49f6392b1e4fcc8b6f1c43940f27c (diff)
downloadydb-51c750cf25c79121b833e2a7882b10e8cd371bef.tar.gz
Support building yt/cpp and yt/yt/core with vanilla protobuf
After this PR yt/cpp and yt/yt/core are possible to be built both with Arcadia protobuf (that uses TString as a string) and vanilla protobuf (that uses std::string as a string). To achieve so, a couple of interoperability primitives are introduced. * `TProtobufString` is an alias to protobuf string type, i.e. it can be `TString` or `std::string` depending on the protobuf implementation. * `IsVanillaProtobuf` and `IsArcadiaProtobuf` are the constexpr boolean values that allow to check protobuf implementation both in the compile time and runtime. The most challenging interoperability issue solved here is a string copy between protobuf message and C++ code that has a form of `TString str = msg.str()`. This code works perfect with Arcadia protobuf but does not work with vanilla protobuf. To solve it, a previously introduced primitive `FromProto<TString>` is used. This expression makes the most efficient cast possible between protobuf string and C++ string. Internally, it is just a copy in both cases. Since TString is CoW by default, this expression is almost zero-cost (actually it's just one atomic operation), so no degradation is expected for YTsaurus server builds. The most hot code is handled differently to avoid even atomic operations (see `GetRequestTargetYPath`). In case of vanilla protobuf string is copied, however there are no places in C++ SDK where it might be a problem. If such issues would appear, performance-critial code can be rewritten in `GetRequestTargetYPath`-style. --- 1a6f3e02cb6e83915102c24b73bc8734f6a48e74 Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/466
Diffstat (limited to 'yt/cpp/mapreduce/interface')
-rw-r--r--yt/cpp/mapreduce/interface/format.cpp1
-rw-r--r--yt/cpp/mapreduce/interface/protobuf_format.cpp30
-rw-r--r--yt/cpp/mapreduce/interface/ya.make1
3 files changed, 17 insertions, 15 deletions
diff --git a/yt/cpp/mapreduce/interface/format.cpp b/yt/cpp/mapreduce/interface/format.cpp
index aec9570bc2..e29cfd33f5 100644
--- a/yt/cpp/mapreduce/interface/format.cpp
+++ b/yt/cpp/mapreduce/interface/format.cpp
@@ -4,7 +4,6 @@
#include "errors.h"
#include <google/protobuf/descriptor.h>
-#include <google/protobuf/messagext.h>
namespace NYT {
diff --git a/yt/cpp/mapreduce/interface/protobuf_format.cpp b/yt/cpp/mapreduce/interface/protobuf_format.cpp
index bdf7ed8c40..4962e85fc8 100644
--- a/yt/cpp/mapreduce/interface/protobuf_format.cpp
+++ b/yt/cpp/mapreduce/interface/protobuf_format.cpp
@@ -2,6 +2,8 @@
#include "errors.h"
+#include <yt/yt/core/misc/protobuf_helpers.h>
+
#include <yt/yt_proto/yt/formats/extension.pb.h>
#include <google/protobuf/text_format.h>
@@ -531,15 +533,15 @@ TProtobufOneofOptions GetOneofOptions(
auto variantFieldName = oneofDescriptor->options().GetExtension(variant_field_name);
switch (options.Mode) {
case EProtobufOneofMode::SeparateFields:
- if (variantFieldName) {
+ if (!variantFieldName.empty()) {
ythrow TApiUsageError() << "\"variant_field_name\" requires (NYT.oneof_flags) = VARIANT";
}
break;
case EProtobufOneofMode::Variant:
- if (variantFieldName) {
- options.VariantFieldName = variantFieldName;
+ if (variantFieldName.empty()) {
+ options.VariantFieldName = FromProto<TString>(oneofDescriptor->name());
} else {
- options.VariantFieldName = oneofDescriptor->name();
+ options.VariantFieldName = variantFieldName;
}
break;
}
@@ -598,15 +600,15 @@ TString DeduceProtobufType(
TString GetColumnName(const ::google::protobuf::FieldDescriptor& field)
{
const auto& options = field.options();
- const auto columnName = options.GetExtension(column_name);
+ const auto columnName = FromProto<TString>(options.GetExtension(column_name));
if (!columnName.empty()) {
return columnName;
}
- const auto keyColumnName = options.GetExtension(key_column_name);
+ const auto keyColumnName = FromProto<TString>(options.GetExtension(key_column_name));
if (!keyColumnName.empty()) {
return keyColumnName;
}
- return field.name();
+ return FromProto<TString>(field.name());
}
TNode MakeProtoFormatMessageFieldsConfig(
@@ -680,7 +682,7 @@ TNode MakeProtoFormatFieldConfig(
if (fieldDescriptor->type() == FieldDescriptor::TYPE_ENUM) {
auto* enumeration = fieldDescriptor->enum_type();
(*enumerations)[enumeration->full_name()] = MakeEnumerationConfig(enumeration);
- fieldConfig["enumeration_name"] = enumeration->full_name();
+ fieldConfig["enumeration_name"] = FromProto<TString>(enumeration->full_name());
}
if (fieldOptions.SerializationMode != EProtobufSerializationMode::Yt) {
@@ -841,10 +843,10 @@ public:
THashSet<TString> messageTypeNames;
THashSet<TString> enumTypeNames;
for (const auto* descriptor : AllDescriptors_) {
- messageTypeNames.insert(descriptor->full_name());
+ messageTypeNames.insert(FromProto<TString>(descriptor->full_name()));
}
for (const auto* enumDescriptor : EnumDescriptors_) {
- enumTypeNames.insert(enumDescriptor->full_name());
+ enumTypeNames.insert(FromProto<TString>(enumDescriptor->full_name()));
}
FileDescriptorSet fileDescriptorSetProto;
for (const auto* file : fileTopoOrder) {
@@ -954,9 +956,9 @@ private:
const THashSet<TString>& messageTypeNames,
const THashSet<TString>& enumTypeNames)
{
- const auto prefix = fileProto->package().Empty()
+ const auto prefix = fileProto->package().empty()
? ""
- : fileProto->package() + '.';
+ : FromProto<TString>(fileProto->package()) + '.';
RemoveIf(fileProto->mutable_message_type(), [&] (const DescriptorProto& descriptorProto) {
return !messageTypeNames.contains(prefix + descriptorProto.name());
@@ -992,10 +994,10 @@ TNode MakeProtoFormatConfigWithDescriptors(const TVector<const Descriptor*>& des
auto typeNames = TNode::CreateList();
for (const auto* descriptor : descriptors) {
builder.AddDescriptor(descriptor);
- typeNames.Add(descriptor->full_name());
+ typeNames.Add(FromProto<TString>(descriptor->full_name()));
}
- auto fileDescriptorSetText = builder.Build().ShortDebugString();
+ auto fileDescriptorSetText = FromProto<TString>(builder.Build().ShortDebugString());
TNode config("protobuf");
config.Attributes()
("file_descriptor_set_text", std::move(fileDescriptorSetText))
diff --git a/yt/cpp/mapreduce/interface/ya.make b/yt/cpp/mapreduce/interface/ya.make
index 0e94f14633..1d76b1dabb 100644
--- a/yt/cpp/mapreduce/interface/ya.make
+++ b/yt/cpp/mapreduce/interface/ya.make
@@ -29,6 +29,7 @@ PEERDIR(
yt/cpp/mapreduce/interface/logging
yt/yt_proto/yt/formats
yt/yt/library/tvm
+ yt/yt/core
)
GENERATE_ENUM_SERIALIZATION(client_method_options.h)