diff options
author | alexv-smirnov <alex@ydb.tech> | 2023-09-19 12:14:42 +0300 |
---|---|---|
committer | alexv-smirnov <alex@ydb.tech> | 2023-09-19 12:43:02 +0300 |
commit | 4662db373a1785a836134a4b2342e8471342bdda (patch) | |
tree | 2df6840dc71039dbcb4192a4b6b2edbf5f26248d /library/cpp | |
parent | 0364421e62126bcf971c82084d3c923be85c77c7 (diff) | |
download | ydb-4662db373a1785a836134a4b2342e8471342bdda.tar.gz |
Clone protobuf_udf to ydb/library
Diffstat (limited to 'library/cpp')
24 files changed, 930 insertions, 0 deletions
diff --git a/library/cpp/protobuf/CMakeLists.txt b/library/cpp/protobuf/CMakeLists.txt index cfd4a1733f..ab3580519b 100644 --- a/library/cpp/protobuf/CMakeLists.txt +++ b/library/cpp/protobuf/CMakeLists.txt @@ -6,6 +6,8 @@ # original buildsystem will not be accepted. +add_subdirectory(dynamic_prototype) add_subdirectory(interop) add_subdirectory(json) add_subdirectory(util) +add_subdirectory(yql) diff --git a/library/cpp/protobuf/dynamic_prototype/CMakeLists.darwin-x86_64.txt b/library/cpp/protobuf/dynamic_prototype/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..f17724c5b1 --- /dev/null +++ b/library/cpp/protobuf/dynamic_prototype/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-protobuf-dynamic_prototype) +target_link_libraries(cpp-protobuf-dynamic_prototype PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_sources(cpp-protobuf-dynamic_prototype PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.cpp +) diff --git a/library/cpp/protobuf/dynamic_prototype/CMakeLists.linux-aarch64.txt b/library/cpp/protobuf/dynamic_prototype/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..1d956f9a86 --- /dev/null +++ b/library/cpp/protobuf/dynamic_prototype/CMakeLists.linux-aarch64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-protobuf-dynamic_prototype) +target_link_libraries(cpp-protobuf-dynamic_prototype PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_sources(cpp-protobuf-dynamic_prototype PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.cpp +) diff --git a/library/cpp/protobuf/dynamic_prototype/CMakeLists.linux-x86_64.txt b/library/cpp/protobuf/dynamic_prototype/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..1d956f9a86 --- /dev/null +++ b/library/cpp/protobuf/dynamic_prototype/CMakeLists.linux-x86_64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-protobuf-dynamic_prototype) +target_link_libraries(cpp-protobuf-dynamic_prototype PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_sources(cpp-protobuf-dynamic_prototype PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.cpp +) diff --git a/library/cpp/protobuf/dynamic_prototype/CMakeLists.txt b/library/cpp/protobuf/dynamic_prototype/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/protobuf/dynamic_prototype/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/protobuf/dynamic_prototype/CMakeLists.windows-x86_64.txt b/library/cpp/protobuf/dynamic_prototype/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..f17724c5b1 --- /dev/null +++ b/library/cpp/protobuf/dynamic_prototype/CMakeLists.windows-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-protobuf-dynamic_prototype) +target_link_libraries(cpp-protobuf-dynamic_prototype PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_sources(cpp-protobuf-dynamic_prototype PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.cpp +) diff --git a/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.cpp b/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.cpp new file mode 100644 index 0000000000..692331fbbe --- /dev/null +++ b/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.cpp @@ -0,0 +1,63 @@ +#include "dynamic_prototype.h" + +#include <util/generic/yexception.h> + +TDynamicPrototype::TDynamicMessage::TDynamicMessage(THolder<NProtoBuf::Message> message, TIntrusivePtr<TDynamicPrototype> prototype) + : Prototype(std::move(prototype)) + , Message(std::move(message)) +{ +} + +NProtoBuf::Message& TDynamicPrototype::TDynamicMessage::operator*() { + return *Message; +} + +NProtoBuf::Message* TDynamicPrototype::TDynamicMessage::operator->() { + return Get(); +} + +NProtoBuf::Message* TDynamicPrototype::TDynamicMessage::Get() { + return Message.Get(); +} + +TDynamicPrototypePtr TDynamicPrototype::Create(const NProtoBuf::FileDescriptorSet& fds, const TString& messageName, bool yqlHack) { + return new TDynamicPrototype(fds, messageName, yqlHack); +} + +TDynamicPrototype::TDynamicPrototype(const NProtoBuf::FileDescriptorSet& fileDescriptorSet, const TString& messageName, bool yqlHack) +{ + const NProtoBuf::FileDescriptor* fileDescriptor; + + for (int i = 0; i < fileDescriptorSet.file_size(); ++i) { + fileDescriptor = Pool.BuildFile(fileDescriptorSet.file(i)); + if (fileDescriptor == nullptr) { + ythrow yexception() << "can't build file descriptor"; + } + } + + Descriptor = Pool.FindMessageTypeByName(messageName); + + // Первоначальный вариант поведения, когда тип определялся + // по имени сообщения верхнего уровня в заданном файле. + if (yqlHack && !Descriptor) { + Descriptor = fileDescriptor->FindMessageTypeByName(messageName); + } + + if (!Descriptor) { + ythrow yexception() << "no descriptor for " << messageName; + } + + Prototype = Factory.GetPrototype(Descriptor); +} + +TDynamicPrototype::TDynamicMessage TDynamicPrototype::Create() { + return TDynamicMessage(CreateUnsafe(), this); +} + +THolder<NProtoBuf::Message> TDynamicPrototype::CreateUnsafe() const { + return THolder<NProtoBuf::Message>(Prototype->New()); +} + +const NProtoBuf::Descriptor* TDynamicPrototype::GetDescriptor() const { + return Descriptor; +} diff --git a/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.h b/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.h new file mode 100644 index 0000000000..f2afcd5f86 --- /dev/null +++ b/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.h @@ -0,0 +1,43 @@ +#pragma once + +#include <google/protobuf/dynamic_message.h> +#include <google/protobuf/descriptor.pb.h> + +#include <util/generic/ptr.h> + +class TDynamicPrototype; + +using TDynamicPrototypePtr = TIntrusivePtr<TDynamicPrototype>; + +class TDynamicPrototype : public TThrRefBase { +public: + class TDynamicMessage { + public: + TDynamicMessage() = default; + TDynamicMessage(THolder<NProtoBuf::Message> message, TIntrusivePtr<TDynamicPrototype> prototype); + NProtoBuf::Message& operator*(); + NProtoBuf::Message* operator->(); + NProtoBuf::Message* Get(); + private: + TIntrusivePtr<TDynamicPrototype> Prototype; + THolder<NProtoBuf::Message> Message; + }; + + static TDynamicPrototypePtr Create(const NProtoBuf::FileDescriptorSet& fds, const TString& messageName, bool yqlHack = false); + + // https://developers.google.com/protocol-buffers/docs/reference/cpp/google.protobuf.dynamic_message#DynamicMessageFactory + // Must outlive created messages + TDynamicMessage Create(); + + THolder<NProtoBuf::Message> CreateUnsafe() const; + + const NProtoBuf::Descriptor* GetDescriptor() const; + +private: + TDynamicPrototype(const NProtoBuf::FileDescriptorSet& fds, const TString& messageName, bool yqlHack); + + NProtoBuf::DescriptorPool Pool; + const NProtoBuf::Descriptor* Descriptor; + NProtoBuf::DynamicMessageFactory Factory; + const NProtoBuf::Message* Prototype; +}; diff --git a/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.cpp b/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.cpp new file mode 100644 index 0000000000..b3fc68e545 --- /dev/null +++ b/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.cpp @@ -0,0 +1,33 @@ +#include "generate_file_descriptor_set.h" + +#include <util/generic/queue.h> +#include <util/generic/set.h> +#include <util/generic/vector.h> + +using namespace NProtoBuf; + +FileDescriptorSet GenerateFileDescriptorSet(const Descriptor* desc) { + TVector<const FileDescriptor*> flat; + TQueue<const FileDescriptor*> descriptors; + + for (descriptors.push(desc->file()); !descriptors.empty(); descriptors.pop()) { + const FileDescriptor* file = descriptors.front(); + + for (int i = 0; i < file->dependency_count(); ++i) { + descriptors.push(file->dependency(i)); + } + + flat.push_back(file); + } + + FileDescriptorSet result; + TSet<TString> visited; + + for (auto ri = flat.rbegin(); ri != flat.rend(); ++ri) { + if (visited.insert((*ri)->name()).second == true) { + (*ri)->CopyTo(result.add_file()); + } + } + + return result; +} diff --git a/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.h b/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.h new file mode 100644 index 0000000000..8e1fb33a08 --- /dev/null +++ b/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.h @@ -0,0 +1,5 @@ +#pragma once + +#include <google/protobuf/descriptor.pb.h> + +NProtoBuf::FileDescriptorSet GenerateFileDescriptorSet(const NProtoBuf::Descriptor* desc); diff --git a/library/cpp/protobuf/dynamic_prototype/ut/dynamic_prototype_ut.cpp b/library/cpp/protobuf/dynamic_prototype/ut/dynamic_prototype_ut.cpp new file mode 100644 index 0000000000..bf3bfca4aa --- /dev/null +++ b/library/cpp/protobuf/dynamic_prototype/ut/dynamic_prototype_ut.cpp @@ -0,0 +1,27 @@ +#include <library/cpp/testing/unittest/registar.h> + +#include <library/cpp/protobuf/dynamic_prototype/dynamic_prototype.h> +#include <library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.h> +#include <library/cpp/protobuf/dynamic_prototype/ut/my_message.pb.h> + + +Y_UNIT_TEST_SUITE(TDynamicPrototype) { + Y_UNIT_TEST(Create) { + const auto* descriptor = TMyMessage().GetDescriptor(); + const auto& fileDescriptorSet = GenerateFileDescriptorSet(descriptor); + + auto prototype = TDynamicPrototype::Create(fileDescriptorSet, "TMyMessage"); + auto myMessage = prototype->Create(); + + TMyMessage reference; + reference.SetFloat(12.1); + reference.MutableInnerMessage()->SetInt32(42); + reference.MutableInnerMessage()->SetString("string"); + + TString serialized; + Y_PROTOBUF_SUPPRESS_NODISCARD reference.SerializeToString(&serialized); + Y_PROTOBUF_SUPPRESS_NODISCARD myMessage->ParseFromString(serialized); + + UNIT_ASSERT_STRINGS_EQUAL(reference.DebugString(), myMessage->DebugString()); + } +} diff --git a/library/cpp/protobuf/dynamic_prototype/ut/my_inner_message.proto b/library/cpp/protobuf/dynamic_prototype/ut/my_inner_message.proto new file mode 100644 index 0000000000..d893d7b4d9 --- /dev/null +++ b/library/cpp/protobuf/dynamic_prototype/ut/my_inner_message.proto @@ -0,0 +1,4 @@ +message TMyInnerMessage { + required int32 Int32 = 1; + required string String = 2; +} diff --git a/library/cpp/protobuf/dynamic_prototype/ut/my_message.proto b/library/cpp/protobuf/dynamic_prototype/ut/my_message.proto new file mode 100644 index 0000000000..c548985530 --- /dev/null +++ b/library/cpp/protobuf/dynamic_prototype/ut/my_message.proto @@ -0,0 +1,6 @@ +import "library/cpp/protobuf/dynamic_prototype/ut/my_inner_message.proto"; + +message TMyMessage { + required TMyInnerMessage InnerMessage = 1; + required float Float = 2; +}
\ No newline at end of file diff --git a/library/cpp/protobuf/dynamic_prototype/ut/ya.make b/library/cpp/protobuf/dynamic_prototype/ut/ya.make new file mode 100644 index 0000000000..1bb570f959 --- /dev/null +++ b/library/cpp/protobuf/dynamic_prototype/ut/ya.make @@ -0,0 +1,13 @@ +UNITTEST() + +SRCS( + dynamic_prototype_ut.cpp + my_inner_message.proto + my_message.proto +) + +PEERDIR( + library/cpp/protobuf/dynamic_prototype +) + +END() diff --git a/library/cpp/protobuf/dynamic_prototype/ya.make b/library/cpp/protobuf/dynamic_prototype/ya.make new file mode 100644 index 0000000000..3fd883b6a2 --- /dev/null +++ b/library/cpp/protobuf/dynamic_prototype/ya.make @@ -0,0 +1,16 @@ +LIBRARY() + +SRCS( + dynamic_prototype.cpp + generate_file_descriptor_set.cpp +) + +PEERDIR( + contrib/libs/protobuf +) + +END() + +RECURSE_FOR_TESTS( + ut +) diff --git a/library/cpp/protobuf/yql/CMakeLists.darwin-x86_64.txt b/library/cpp/protobuf/yql/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..112f6e94e7 --- /dev/null +++ b/library/cpp/protobuf/yql/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,22 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-protobuf-yql) +target_link_libraries(cpp-protobuf-yql PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf + library-cpp-json + cpp-protobuf-dynamic_prototype + cpp-protobuf-json + cpp-string_utils-base64 +) +target_sources(cpp-protobuf-yql PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/yql/descriptor.cpp +) diff --git a/library/cpp/protobuf/yql/CMakeLists.linux-aarch64.txt b/library/cpp/protobuf/yql/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..de09662231 --- /dev/null +++ b/library/cpp/protobuf/yql/CMakeLists.linux-aarch64.txt @@ -0,0 +1,23 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-protobuf-yql) +target_link_libraries(cpp-protobuf-yql PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf + library-cpp-json + cpp-protobuf-dynamic_prototype + cpp-protobuf-json + cpp-string_utils-base64 +) +target_sources(cpp-protobuf-yql PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/yql/descriptor.cpp +) diff --git a/library/cpp/protobuf/yql/CMakeLists.linux-x86_64.txt b/library/cpp/protobuf/yql/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..de09662231 --- /dev/null +++ b/library/cpp/protobuf/yql/CMakeLists.linux-x86_64.txt @@ -0,0 +1,23 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-protobuf-yql) +target_link_libraries(cpp-protobuf-yql PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf + library-cpp-json + cpp-protobuf-dynamic_prototype + cpp-protobuf-json + cpp-string_utils-base64 +) +target_sources(cpp-protobuf-yql PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/yql/descriptor.cpp +) diff --git a/library/cpp/protobuf/yql/CMakeLists.txt b/library/cpp/protobuf/yql/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/protobuf/yql/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/protobuf/yql/CMakeLists.windows-x86_64.txt b/library/cpp/protobuf/yql/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..112f6e94e7 --- /dev/null +++ b/library/cpp/protobuf/yql/CMakeLists.windows-x86_64.txt @@ -0,0 +1,22 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-protobuf-yql) +target_link_libraries(cpp-protobuf-yql PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf + library-cpp-json + cpp-protobuf-dynamic_prototype + cpp-protobuf-json + cpp-string_utils-base64 +) +target_sources(cpp-protobuf-yql PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/yql/descriptor.cpp +) diff --git a/library/cpp/protobuf/yql/README.md b/library/cpp/protobuf/yql/README.md new file mode 100644 index 0000000000..622bfce841 --- /dev/null +++ b/library/cpp/protobuf/yql/README.md @@ -0,0 +1,26 @@ +Функции данной библиотеки, позволяют генерировать метаинформацию, необходимую YQL для того, чтобы работать с содержимым protobuf-сообщения как со структурой. + +Пример задания атрибута для таблицы в YT: +```c++ +#include <proto/person.pb.h> +#include <library/cpp/protobuf/yql/descriptor.h> + +auto client = NYT::CreateClient(ServerName); +client->Create(OutputPath, NT_TABLE, + TCreateOptions().Attributes(NYT::TNode() + ("_yql_proto_field_Data", GenerateProtobufTypeConfig<TPersonProto>()) +); +``` +Далее, в YQL можно будет написать следующий запрос: +```sql +SELECT t.Data.Name, t.Data.Age, FROM [table] AS t; +``` + +Для того, чтобы работать с protobuf-сообщениями, сохранёнными в разных представлениях есть специальные опции. На данный момент поддерживается три формата представления данных - binary-protobuf, text-protbuf и json. + +Пример использования опций: +```c++ +GenerateProtobufTypeConfig<TPersonProto>( + TProtoTypeConfigOptions().SetProtoFormat(PF_PROTOTEXT)) +); +``` diff --git a/library/cpp/protobuf/yql/descriptor.cpp b/library/cpp/protobuf/yql/descriptor.cpp new file mode 100644 index 0000000000..2d5a154fc1 --- /dev/null +++ b/library/cpp/protobuf/yql/descriptor.cpp @@ -0,0 +1,314 @@ +#include "descriptor.h" + +#include <library/cpp/json/json_reader.h> +#include <library/cpp/json/json_writer.h> +#include <library/cpp/protobuf/dynamic_prototype/dynamic_prototype.h> +#include <library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.h> +#include <library/cpp/protobuf/json/json2proto.h> +#include <library/cpp/protobuf/json/proto2json.h> +#include <library/cpp/string_utils/base64/base64.h> + +#include <util/generic/hash.h> +#include <util/generic/queue.h> +#include <util/generic/set.h> +#include <util/generic/vector.h> +#include <util/stream/mem.h> +#include <util/stream/str.h> +#include <util/stream/zlib.h> +#include <util/string/cast.h> + +#include <google/protobuf/text_format.h> +#include <google/protobuf/io/zero_copy_stream_impl_lite.h> + +using namespace NProtoBuf; + +static TString SerializeFileDescriptorSet(const FileDescriptorSet& proto) { + const auto size = proto.ByteSize(); + TTempBuf data(size); + proto.SerializeWithCachedSizesToArray((ui8*)data.Data()); + + TStringStream str; + { + TZLibCompress comp(&str, ZLib::GZip); + comp.Write(data.Data(), size); + } + return str.Str(); +} + +static bool ParseFileDescriptorSet(const TStringBuf& data, FileDescriptorSet* proto) { + TMemoryInput input(data.data(), data.size()); + TString buf = TZLibDecompress(&input).ReadAll(); + + if (!proto->ParseFromArray(buf.data(), buf.size())) { + return false; + } + return true; +} + +TDynamicInfo::TDynamicInfo(TDynamicPrototypePtr dynamicPrototype) + : DynamicPrototype(dynamicPrototype) + , SkipBytes_(0) +{ +} + +TDynamicInfo::~TDynamicInfo() { +} + +TDynamicInfoRef TDynamicInfo::Create(const TStringBuf& typeConfig) { + auto data = ParseTypeConfig(typeConfig); + const TString& meta = Base64Decode(data.Metadata); + const TString& name = data.MessageName; + FileDescriptorSet set; + + if (!ParseFileDescriptorSet(meta, &set)) { + ythrow yexception() << "can't parse metadata"; + } + + auto info = MakeIntrusive<TDynamicInfo>(TDynamicPrototype::Create(set, name, true)); + + info->EnumFormat_ = data.EnumFormat; + info->ProtoFormat_ = data.ProtoFormat; + info->Recursion_ = data.Recursion; + info->YtMode_ = data.YtMode; + info->SkipBytes_ = data.SkipBytes; + info->OptionalLists_ = data.OptionalLists; + info->SyntaxAware_ = data.SyntaxAware; + return info; +} + +const Descriptor* TDynamicInfo::Descriptor() const { + return DynamicPrototype->GetDescriptor(); +} + +EEnumFormat TDynamicInfo::GetEnumFormat() const { + return EnumFormat_; +} + +ERecursionTraits TDynamicInfo::GetRecursionTraits() const { + return Recursion_; +} + +bool TDynamicInfo::GetYtMode() const { + return YtMode_; +} + +bool TDynamicInfo::GetOptionalLists() const { + return OptionalLists_; +} + +bool TDynamicInfo::GetSyntaxAware() const { + return SyntaxAware_; +} + +TAutoPtr<Message> TDynamicInfo::MakeProto() { + return DynamicPrototype->CreateUnsafe(); +} + +TAutoPtr<Message> TDynamicInfo::Parse(const TStringBuf& data) { + auto mut = MakeProto(); + TStringBuf tmp(data); + + if (SkipBytes_) { + tmp = TStringBuf(tmp.data() + SkipBytes_, tmp.size() - SkipBytes_); + } + + switch (ProtoFormat_) { + case PF_PROTOBIN: { + if (!mut->ParseFromArray(tmp.data(), tmp.size())) { + ythrow yexception() << "can't parse protobin message"; + } + break; + } + case PF_PROTOTEXT: { + io::ArrayInputStream si(tmp.data(), tmp.size()); + if (!TextFormat::Parse(&si, mut.Get())) { + ythrow yexception() << "can't parse prototext message"; + } + break; + } + case PF_JSON: { + NJson::TJsonValue value; + + if (NJson::ReadJsonFastTree(tmp, &value)) { + NProtobufJson::Json2Proto(value, *mut); + } else { + ythrow yexception() << "can't parse json value"; + } + break; + } + } + + return mut; +} + +TString TDynamicInfo::Serialize(const Message& proto) { + TString result; + switch (ProtoFormat_) { + case PF_PROTOBIN: { + result.ReserveAndResize(proto.ByteSize()); + if (!proto.SerializeToArray(result.begin(), result.size())) { + ythrow yexception() << "can't serialize protobin message"; + } + break; + } + case PF_PROTOTEXT: { + if (!TextFormat::PrintToString(proto, &result)) { + ythrow yexception() << "can't serialize prototext message"; + } + break; + } + case PF_JSON: { + NJson::TJsonValue value; + NProtobufJson::Proto2Json(proto, value); + result = NJson::WriteJson(value); + break; + } + } + return result; +} + +TString GenerateProtobufTypeConfig( + const Descriptor* descriptor, + const TProtoTypeConfigOptions& options) { + NJson::TJsonValue ret(NJson::JSON_MAP); + + ret["name"] = descriptor->full_name(); + ret["meta"] = Base64Encode( + SerializeFileDescriptorSet(GenerateFileDescriptorSet(descriptor))); + + if (options.SkipBytes > 0) { + ret["skip"] = options.SkipBytes; + } + + switch (options.ProtoFormat) { + case PF_PROTOBIN: + break; + case PF_PROTOTEXT: + ret["format"] = "prototext"; + break; + case PF_JSON: + ret["format"] = "json"; + break; + } + + if (!options.OptionalLists) { + ret["lists"]["optional"] = false; + } + + if (options.SyntaxAware) { + ret["syntax"]["aware"] = options.SyntaxAware; + } + + switch (options.EnumFormat) { + case EEnumFormat::Number: + break; + case EEnumFormat::Name: + ret["view"]["enum"] = "name"; + break; + case EEnumFormat::FullName: + ret["view"]["enum"] = "full_name"; + break; + } + + switch (options.Recursion) { + case ERecursionTraits::Fail: + break; + case ERecursionTraits::Ignore: + ret["view"]["recursion"] = "ignore"; + break; + case ERecursionTraits::Bytes: + ret["view"]["recursion"] = "bytes"; + break; + } + + if (options.YtMode) { + ret["view"]["yt_mode"] = true; + } + + return NJson::WriteJson(ret, false); +} + +TProtoTypeConfig ParseTypeConfig(const TStringBuf& config) { + if (config.empty()) { + ythrow yexception() << "empty metadata"; + } + + switch (config[0]) { + case '#': { + auto plus = config.find('+'); + + if (config[0] != '#') { + ythrow yexception() << "unknown version of metadata format"; + } + if (plus == TStringBuf::npos) { + ythrow yexception() << "invalid metadata"; + } + + TProtoTypeConfig result; + + result.MessageName = TStringBuf(config.begin() + 1, plus - 1); + result.Metadata = TStringBuf(config.begin() + 1 + plus, config.size() - plus - 1); + result.SkipBytes = 0; + + return result; + } + + case '{': { + NJson::TJsonValue value; + + if (NJson::ReadJsonFastTree(config, &value)) { + TProtoTypeConfig result; + TString protoFormat = value["format"].GetStringSafe("protobin"); + TString enumFormat = value["view"]["enum"].GetStringSafe("number"); + TString recursion = value["view"]["recursion"].GetStringSafe("fail"); + + result.MessageName = value["name"].GetString(); + result.Metadata = value["meta"].GetString(); + result.SkipBytes = value["skip"].GetIntegerSafe(0); + result.OptionalLists = value["lists"]["optional"].GetBooleanSafe(true); + result.SyntaxAware = value["syntax"]["aware"].GetBooleanSafe(false); + result.YtMode = value["view"]["yt_mode"].GetBooleanSafe(false); + + if (protoFormat == "protobin") { + result.ProtoFormat = PF_PROTOBIN; + } else if (protoFormat == "prototext") { + result.ProtoFormat = PF_PROTOTEXT; + } else if (protoFormat == "json") { + result.ProtoFormat = PF_JSON; + } else { + ythrow yexception() << "unsupported format " << protoFormat; + } + + if (enumFormat == "number") { + result.EnumFormat = EEnumFormat::Number; + } else if (enumFormat == "name") { + result.EnumFormat = EEnumFormat::Name; + } else if (enumFormat == "full_name") { + result.EnumFormat = EEnumFormat::FullName; + } else { + ythrow yexception() << "unsupported enum representation " + << enumFormat; + } + + if (recursion == "fail") { + result.Recursion = ERecursionTraits::Fail; + } else if (recursion == "ignore") { + result.Recursion = ERecursionTraits::Ignore; + } else if (recursion == "bytes") { + result.Recursion = ERecursionTraits::Bytes; + } else { + ythrow yexception() << "unsupported recursion trait " + << recursion; + } + + return result; + } else { + ythrow yexception() << "can't parse json metadata"; + } + } + + default: + ythrow yexception() << "invalid control char " + << TStringBuf(config.data(), 1); + } +} diff --git a/library/cpp/protobuf/yql/descriptor.h b/library/cpp/protobuf/yql/descriptor.h new file mode 100644 index 0000000000..bbed6850ec --- /dev/null +++ b/library/cpp/protobuf/yql/descriptor.h @@ -0,0 +1,161 @@ +#pragma once + +#include <google/protobuf/descriptor.h> +#include <google/protobuf/dynamic_message.h> +#include <google/protobuf/descriptor.pb.h> + +#include <library/cpp/protobuf/dynamic_prototype/dynamic_prototype.h> + +#include <util/generic/ptr.h> + +enum EProtoFormat { + PF_PROTOBIN = 0, + PF_PROTOTEXT = 1, + PF_JSON = 2, +}; + +enum class EEnumFormat { + Number = 0, + Name = 1, + FullName = 2, +}; + +enum class ERecursionTraits { + //! Падать, если на входе имеется рекурсивно определённый тип. + Fail = 0, + //! Игнорировать все поля с рекурсивно определённым типом. + Ignore = 1, + //! Возвращать поля с рекурсивным типом в виде сериализованной строки + Bytes = 2, +}; + +struct TProtoTypeConfig { + //! Имя сообщения. + TString MessageName; + //! Сериализованные метаданные. + TString Metadata; + //! Количество байт, которые надо отступить + //! от начала каждого блока данных. + ui32 SkipBytes = 0; + //! Формат сериализации proto-сообщений. + EProtoFormat ProtoFormat = PF_PROTOBIN; + //! Формат представление значений Enum. + EEnumFormat EnumFormat = EEnumFormat::Number; + //! Способ интерпретации рекурсивно определённых типов. + ERecursionTraits Recursion = ERecursionTraits::Fail; + //! Выдать бинарными строками, если SERIALIZATION_PROTOBUF. + //! Если SERIALIZATION_YT, то для рекурсивных структур поведение зависит от Recursion. + bool YtMode = false; + //! Заворачивать ли списочные типы в Optional. + bool OptionalLists = false; + //! Заполнять ли пустые Optional типы дефолтным значением (только для proto3). + bool SyntaxAware = false; +}; + +struct TProtoTypeConfigOptions { + //! Количество байт, которые надо отступить + //! от начала каждого блока данных. + ui32 SkipBytes = 0; + //! Формат сериализации proto-сообщений. + EProtoFormat ProtoFormat = PF_PROTOBIN; + //! Формат представление значений Enum. + EEnumFormat EnumFormat = EEnumFormat::Number; + //! Способ интерпретации рекурсивно определённых типов. + ERecursionTraits Recursion = ERecursionTraits::Fail; + //! Выдать бинарными строками, если SERIALIZATION_PROTOBUF. + //! Если SERIALIZATION_YT, то для рекурсивных структур поведение зависит от Recursion. + bool YtMode = false; + //! Заворачивать ли списочные типы в Optional. + bool OptionalLists = false; + //! Заполнять ли пустые Optional типы дефолтным значением (только для proto3). + bool SyntaxAware = false; + + TProtoTypeConfigOptions& SetProtoFormat(EProtoFormat value) { + ProtoFormat = value; + return *this; + } + + TProtoTypeConfigOptions& SetSkipBytes(ui32 value) { + SkipBytes = value; + return *this; + } + + TProtoTypeConfigOptions& SetEnumFormat(EEnumFormat value) { + EnumFormat = value; + return *this; + } + + TProtoTypeConfigOptions& SetRecursionTraits(ERecursionTraits value) { + Recursion = value; + return *this; + } + + TProtoTypeConfigOptions& SetYtMode(bool value) { + YtMode = value; + return *this; + } + + TProtoTypeConfigOptions& SetOptionalLists(bool value) { + OptionalLists = value; + return *this; + } + + TProtoTypeConfigOptions& SetSyntaxAware(bool value) { + SyntaxAware = value; + return *this; + } +}; + +TString GenerateProtobufTypeConfig( + const NProtoBuf::Descriptor* descriptor, + const TProtoTypeConfigOptions& options = TProtoTypeConfigOptions()); + +template <typename T> +inline TString GenerateProtobufTypeConfig( + const TProtoTypeConfigOptions& options = TProtoTypeConfigOptions()) { + return GenerateProtobufTypeConfig(T::descriptor(), options); +} + +TProtoTypeConfig ParseTypeConfig(const TStringBuf& config); + +using TDynamicInfoRef = TIntrusivePtr<class TDynamicInfo>; + +class TDynamicInfo: public TSimpleRefCount<TDynamicInfo> { +public: + TDynamicInfo(TDynamicPrototypePtr); + ~TDynamicInfo(); + + static TDynamicInfoRef Create(const TStringBuf& typeConfig); + + const NProtoBuf::Descriptor* Descriptor() const; + + //! Текущий формат представление значений Enum. + EEnumFormat GetEnumFormat() const; + + //! Текущий способ интерпретации рекурсивно определённых типов. + ERecursionTraits GetRecursionTraits() const; + + bool GetYtMode() const; + + bool GetOptionalLists() const; + + bool GetSyntaxAware() const; + + TAutoPtr<NProtoBuf::Message> MakeProto(); + + //! \param data сериализованное protobuf-сообщение. + TAutoPtr<NProtoBuf::Message> Parse(const TStringBuf& data); + + //! \param proto protobuf-сообщение. + TString Serialize(const NProtoBuf::Message& proto); + +private: + TDynamicPrototypePtr DynamicPrototype; + EEnumFormat EnumFormat_; + EProtoFormat ProtoFormat_; + ERecursionTraits Recursion_; + bool YtMode_; + ui32 SkipBytes_; + bool OptionalLists_; + bool SyntaxAware_; +}; diff --git a/library/cpp/protobuf/yql/ya.make b/library/cpp/protobuf/yql/ya.make new file mode 100644 index 0000000000..29c21da402 --- /dev/null +++ b/library/cpp/protobuf/yql/ya.make @@ -0,0 +1,15 @@ +LIBRARY() + +SRCS( + descriptor.cpp +) + +PEERDIR( + contrib/libs/protobuf + library/cpp/json + library/cpp/protobuf/dynamic_prototype + library/cpp/protobuf/json + library/cpp/string_utils/base64 +) + +END() |