aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp
diff options
context:
space:
mode:
authoralexv-smirnov <alex@ydb.tech>2023-09-19 12:14:42 +0300
committeralexv-smirnov <alex@ydb.tech>2023-09-19 12:43:02 +0300
commit4662db373a1785a836134a4b2342e8471342bdda (patch)
tree2df6840dc71039dbcb4192a4b6b2edbf5f26248d /library/cpp
parent0364421e62126bcf971c82084d3c923be85c77c7 (diff)
downloadydb-4662db373a1785a836134a4b2342e8471342bdda.tar.gz
Clone protobuf_udf to ydb/library
Diffstat (limited to 'library/cpp')
-rw-r--r--library/cpp/protobuf/CMakeLists.txt2
-rw-r--r--library/cpp/protobuf/dynamic_prototype/CMakeLists.darwin-x86_64.txt19
-rw-r--r--library/cpp/protobuf/dynamic_prototype/CMakeLists.linux-aarch64.txt20
-rw-r--r--library/cpp/protobuf/dynamic_prototype/CMakeLists.linux-x86_64.txt20
-rw-r--r--library/cpp/protobuf/dynamic_prototype/CMakeLists.txt17
-rw-r--r--library/cpp/protobuf/dynamic_prototype/CMakeLists.windows-x86_64.txt19
-rw-r--r--library/cpp/protobuf/dynamic_prototype/dynamic_prototype.cpp63
-rw-r--r--library/cpp/protobuf/dynamic_prototype/dynamic_prototype.h43
-rw-r--r--library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.cpp33
-rw-r--r--library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.h5
-rw-r--r--library/cpp/protobuf/dynamic_prototype/ut/dynamic_prototype_ut.cpp27
-rw-r--r--library/cpp/protobuf/dynamic_prototype/ut/my_inner_message.proto4
-rw-r--r--library/cpp/protobuf/dynamic_prototype/ut/my_message.proto6
-rw-r--r--library/cpp/protobuf/dynamic_prototype/ut/ya.make13
-rw-r--r--library/cpp/protobuf/dynamic_prototype/ya.make16
-rw-r--r--library/cpp/protobuf/yql/CMakeLists.darwin-x86_64.txt22
-rw-r--r--library/cpp/protobuf/yql/CMakeLists.linux-aarch64.txt23
-rw-r--r--library/cpp/protobuf/yql/CMakeLists.linux-x86_64.txt23
-rw-r--r--library/cpp/protobuf/yql/CMakeLists.txt17
-rw-r--r--library/cpp/protobuf/yql/CMakeLists.windows-x86_64.txt22
-rw-r--r--library/cpp/protobuf/yql/README.md26
-rw-r--r--library/cpp/protobuf/yql/descriptor.cpp314
-rw-r--r--library/cpp/protobuf/yql/descriptor.h161
-rw-r--r--library/cpp/protobuf/yql/ya.make15
24 files changed, 930 insertions, 0 deletions
diff --git a/library/cpp/protobuf/CMakeLists.txt b/library/cpp/protobuf/CMakeLists.txt
index cfd4a1733f..ab3580519b 100644
--- a/library/cpp/protobuf/CMakeLists.txt
+++ b/library/cpp/protobuf/CMakeLists.txt
@@ -6,6 +6,8 @@
# original buildsystem will not be accepted.
+add_subdirectory(dynamic_prototype)
add_subdirectory(interop)
add_subdirectory(json)
add_subdirectory(util)
+add_subdirectory(yql)
diff --git a/library/cpp/protobuf/dynamic_prototype/CMakeLists.darwin-x86_64.txt b/library/cpp/protobuf/dynamic_prototype/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..f17724c5b1
--- /dev/null
+++ b/library/cpp/protobuf/dynamic_prototype/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-protobuf-dynamic_prototype)
+target_link_libraries(cpp-protobuf-dynamic_prototype PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_sources(cpp-protobuf-dynamic_prototype PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.cpp
+)
diff --git a/library/cpp/protobuf/dynamic_prototype/CMakeLists.linux-aarch64.txt b/library/cpp/protobuf/dynamic_prototype/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..1d956f9a86
--- /dev/null
+++ b/library/cpp/protobuf/dynamic_prototype/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-protobuf-dynamic_prototype)
+target_link_libraries(cpp-protobuf-dynamic_prototype PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_sources(cpp-protobuf-dynamic_prototype PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.cpp
+)
diff --git a/library/cpp/protobuf/dynamic_prototype/CMakeLists.linux-x86_64.txt b/library/cpp/protobuf/dynamic_prototype/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..1d956f9a86
--- /dev/null
+++ b/library/cpp/protobuf/dynamic_prototype/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-protobuf-dynamic_prototype)
+target_link_libraries(cpp-protobuf-dynamic_prototype PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_sources(cpp-protobuf-dynamic_prototype PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.cpp
+)
diff --git a/library/cpp/protobuf/dynamic_prototype/CMakeLists.txt b/library/cpp/protobuf/dynamic_prototype/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/protobuf/dynamic_prototype/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/protobuf/dynamic_prototype/CMakeLists.windows-x86_64.txt b/library/cpp/protobuf/dynamic_prototype/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..f17724c5b1
--- /dev/null
+++ b/library/cpp/protobuf/dynamic_prototype/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-protobuf-dynamic_prototype)
+target_link_libraries(cpp-protobuf-dynamic_prototype PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+)
+target_sources(cpp-protobuf-dynamic_prototype PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.cpp
+)
diff --git a/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.cpp b/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.cpp
new file mode 100644
index 0000000000..692331fbbe
--- /dev/null
+++ b/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.cpp
@@ -0,0 +1,63 @@
+#include "dynamic_prototype.h"
+
+#include <util/generic/yexception.h>
+
+TDynamicPrototype::TDynamicMessage::TDynamicMessage(THolder<NProtoBuf::Message> message, TIntrusivePtr<TDynamicPrototype> prototype)
+ : Prototype(std::move(prototype))
+ , Message(std::move(message))
+{
+}
+
+NProtoBuf::Message& TDynamicPrototype::TDynamicMessage::operator*() {
+ return *Message;
+}
+
+NProtoBuf::Message* TDynamicPrototype::TDynamicMessage::operator->() {
+ return Get();
+}
+
+NProtoBuf::Message* TDynamicPrototype::TDynamicMessage::Get() {
+ return Message.Get();
+}
+
+TDynamicPrototypePtr TDynamicPrototype::Create(const NProtoBuf::FileDescriptorSet& fds, const TString& messageName, bool yqlHack) {
+ return new TDynamicPrototype(fds, messageName, yqlHack);
+}
+
+TDynamicPrototype::TDynamicPrototype(const NProtoBuf::FileDescriptorSet& fileDescriptorSet, const TString& messageName, bool yqlHack)
+{
+ const NProtoBuf::FileDescriptor* fileDescriptor;
+
+ for (int i = 0; i < fileDescriptorSet.file_size(); ++i) {
+ fileDescriptor = Pool.BuildFile(fileDescriptorSet.file(i));
+ if (fileDescriptor == nullptr) {
+ ythrow yexception() << "can't build file descriptor";
+ }
+ }
+
+ Descriptor = Pool.FindMessageTypeByName(messageName);
+
+ // Первоначальный вариант поведения, когда тип определялся
+ // по имени сообщения верхнего уровня в заданном файле.
+ if (yqlHack && !Descriptor) {
+ Descriptor = fileDescriptor->FindMessageTypeByName(messageName);
+ }
+
+ if (!Descriptor) {
+ ythrow yexception() << "no descriptor for " << messageName;
+ }
+
+ Prototype = Factory.GetPrototype(Descriptor);
+}
+
+TDynamicPrototype::TDynamicMessage TDynamicPrototype::Create() {
+ return TDynamicMessage(CreateUnsafe(), this);
+}
+
+THolder<NProtoBuf::Message> TDynamicPrototype::CreateUnsafe() const {
+ return THolder<NProtoBuf::Message>(Prototype->New());
+}
+
+const NProtoBuf::Descriptor* TDynamicPrototype::GetDescriptor() const {
+ return Descriptor;
+}
diff --git a/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.h b/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.h
new file mode 100644
index 0000000000..f2afcd5f86
--- /dev/null
+++ b/library/cpp/protobuf/dynamic_prototype/dynamic_prototype.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include <google/protobuf/dynamic_message.h>
+#include <google/protobuf/descriptor.pb.h>
+
+#include <util/generic/ptr.h>
+
+class TDynamicPrototype;
+
+using TDynamicPrototypePtr = TIntrusivePtr<TDynamicPrototype>;
+
+class TDynamicPrototype : public TThrRefBase {
+public:
+ class TDynamicMessage {
+ public:
+ TDynamicMessage() = default;
+ TDynamicMessage(THolder<NProtoBuf::Message> message, TIntrusivePtr<TDynamicPrototype> prototype);
+ NProtoBuf::Message& operator*();
+ NProtoBuf::Message* operator->();
+ NProtoBuf::Message* Get();
+ private:
+ TIntrusivePtr<TDynamicPrototype> Prototype;
+ THolder<NProtoBuf::Message> Message;
+ };
+
+ static TDynamicPrototypePtr Create(const NProtoBuf::FileDescriptorSet& fds, const TString& messageName, bool yqlHack = false);
+
+ // https://developers.google.com/protocol-buffers/docs/reference/cpp/google.protobuf.dynamic_message#DynamicMessageFactory
+ // Must outlive created messages
+ TDynamicMessage Create();
+
+ THolder<NProtoBuf::Message> CreateUnsafe() const;
+
+ const NProtoBuf::Descriptor* GetDescriptor() const;
+
+private:
+ TDynamicPrototype(const NProtoBuf::FileDescriptorSet& fds, const TString& messageName, bool yqlHack);
+
+ NProtoBuf::DescriptorPool Pool;
+ const NProtoBuf::Descriptor* Descriptor;
+ NProtoBuf::DynamicMessageFactory Factory;
+ const NProtoBuf::Message* Prototype;
+};
diff --git a/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.cpp b/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.cpp
new file mode 100644
index 0000000000..b3fc68e545
--- /dev/null
+++ b/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.cpp
@@ -0,0 +1,33 @@
+#include "generate_file_descriptor_set.h"
+
+#include <util/generic/queue.h>
+#include <util/generic/set.h>
+#include <util/generic/vector.h>
+
+using namespace NProtoBuf;
+
+FileDescriptorSet GenerateFileDescriptorSet(const Descriptor* desc) {
+ TVector<const FileDescriptor*> flat;
+ TQueue<const FileDescriptor*> descriptors;
+
+ for (descriptors.push(desc->file()); !descriptors.empty(); descriptors.pop()) {
+ const FileDescriptor* file = descriptors.front();
+
+ for (int i = 0; i < file->dependency_count(); ++i) {
+ descriptors.push(file->dependency(i));
+ }
+
+ flat.push_back(file);
+ }
+
+ FileDescriptorSet result;
+ TSet<TString> visited;
+
+ for (auto ri = flat.rbegin(); ri != flat.rend(); ++ri) {
+ if (visited.insert((*ri)->name()).second == true) {
+ (*ri)->CopyTo(result.add_file());
+ }
+ }
+
+ return result;
+}
diff --git a/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.h b/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.h
new file mode 100644
index 0000000000..8e1fb33a08
--- /dev/null
+++ b/library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#include <google/protobuf/descriptor.pb.h>
+
+NProtoBuf::FileDescriptorSet GenerateFileDescriptorSet(const NProtoBuf::Descriptor* desc);
diff --git a/library/cpp/protobuf/dynamic_prototype/ut/dynamic_prototype_ut.cpp b/library/cpp/protobuf/dynamic_prototype/ut/dynamic_prototype_ut.cpp
new file mode 100644
index 0000000000..bf3bfca4aa
--- /dev/null
+++ b/library/cpp/protobuf/dynamic_prototype/ut/dynamic_prototype_ut.cpp
@@ -0,0 +1,27 @@
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <library/cpp/protobuf/dynamic_prototype/dynamic_prototype.h>
+#include <library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.h>
+#include <library/cpp/protobuf/dynamic_prototype/ut/my_message.pb.h>
+
+
+Y_UNIT_TEST_SUITE(TDynamicPrototype) {
+ Y_UNIT_TEST(Create) {
+ const auto* descriptor = TMyMessage().GetDescriptor();
+ const auto& fileDescriptorSet = GenerateFileDescriptorSet(descriptor);
+
+ auto prototype = TDynamicPrototype::Create(fileDescriptorSet, "TMyMessage");
+ auto myMessage = prototype->Create();
+
+ TMyMessage reference;
+ reference.SetFloat(12.1);
+ reference.MutableInnerMessage()->SetInt32(42);
+ reference.MutableInnerMessage()->SetString("string");
+
+ TString serialized;
+ Y_PROTOBUF_SUPPRESS_NODISCARD reference.SerializeToString(&serialized);
+ Y_PROTOBUF_SUPPRESS_NODISCARD myMessage->ParseFromString(serialized);
+
+ UNIT_ASSERT_STRINGS_EQUAL(reference.DebugString(), myMessage->DebugString());
+ }
+}
diff --git a/library/cpp/protobuf/dynamic_prototype/ut/my_inner_message.proto b/library/cpp/protobuf/dynamic_prototype/ut/my_inner_message.proto
new file mode 100644
index 0000000000..d893d7b4d9
--- /dev/null
+++ b/library/cpp/protobuf/dynamic_prototype/ut/my_inner_message.proto
@@ -0,0 +1,4 @@
+message TMyInnerMessage {
+ required int32 Int32 = 1;
+ required string String = 2;
+}
diff --git a/library/cpp/protobuf/dynamic_prototype/ut/my_message.proto b/library/cpp/protobuf/dynamic_prototype/ut/my_message.proto
new file mode 100644
index 0000000000..c548985530
--- /dev/null
+++ b/library/cpp/protobuf/dynamic_prototype/ut/my_message.proto
@@ -0,0 +1,6 @@
+import "library/cpp/protobuf/dynamic_prototype/ut/my_inner_message.proto";
+
+message TMyMessage {
+ required TMyInnerMessage InnerMessage = 1;
+ required float Float = 2;
+} \ No newline at end of file
diff --git a/library/cpp/protobuf/dynamic_prototype/ut/ya.make b/library/cpp/protobuf/dynamic_prototype/ut/ya.make
new file mode 100644
index 0000000000..1bb570f959
--- /dev/null
+++ b/library/cpp/protobuf/dynamic_prototype/ut/ya.make
@@ -0,0 +1,13 @@
+UNITTEST()
+
+SRCS(
+ dynamic_prototype_ut.cpp
+ my_inner_message.proto
+ my_message.proto
+)
+
+PEERDIR(
+ library/cpp/protobuf/dynamic_prototype
+)
+
+END()
diff --git a/library/cpp/protobuf/dynamic_prototype/ya.make b/library/cpp/protobuf/dynamic_prototype/ya.make
new file mode 100644
index 0000000000..3fd883b6a2
--- /dev/null
+++ b/library/cpp/protobuf/dynamic_prototype/ya.make
@@ -0,0 +1,16 @@
+LIBRARY()
+
+SRCS(
+ dynamic_prototype.cpp
+ generate_file_descriptor_set.cpp
+)
+
+PEERDIR(
+ contrib/libs/protobuf
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ ut
+)
diff --git a/library/cpp/protobuf/yql/CMakeLists.darwin-x86_64.txt b/library/cpp/protobuf/yql/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..112f6e94e7
--- /dev/null
+++ b/library/cpp/protobuf/yql/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,22 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-protobuf-yql)
+target_link_libraries(cpp-protobuf-yql PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+ library-cpp-json
+ cpp-protobuf-dynamic_prototype
+ cpp-protobuf-json
+ cpp-string_utils-base64
+)
+target_sources(cpp-protobuf-yql PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/yql/descriptor.cpp
+)
diff --git a/library/cpp/protobuf/yql/CMakeLists.linux-aarch64.txt b/library/cpp/protobuf/yql/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..de09662231
--- /dev/null
+++ b/library/cpp/protobuf/yql/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,23 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-protobuf-yql)
+target_link_libraries(cpp-protobuf-yql PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+ library-cpp-json
+ cpp-protobuf-dynamic_prototype
+ cpp-protobuf-json
+ cpp-string_utils-base64
+)
+target_sources(cpp-protobuf-yql PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/yql/descriptor.cpp
+)
diff --git a/library/cpp/protobuf/yql/CMakeLists.linux-x86_64.txt b/library/cpp/protobuf/yql/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..de09662231
--- /dev/null
+++ b/library/cpp/protobuf/yql/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,23 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-protobuf-yql)
+target_link_libraries(cpp-protobuf-yql PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+ library-cpp-json
+ cpp-protobuf-dynamic_prototype
+ cpp-protobuf-json
+ cpp-string_utils-base64
+)
+target_sources(cpp-protobuf-yql PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/yql/descriptor.cpp
+)
diff --git a/library/cpp/protobuf/yql/CMakeLists.txt b/library/cpp/protobuf/yql/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/protobuf/yql/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/protobuf/yql/CMakeLists.windows-x86_64.txt b/library/cpp/protobuf/yql/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..112f6e94e7
--- /dev/null
+++ b/library/cpp/protobuf/yql/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,22 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-protobuf-yql)
+target_link_libraries(cpp-protobuf-yql PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ contrib-libs-protobuf
+ library-cpp-json
+ cpp-protobuf-dynamic_prototype
+ cpp-protobuf-json
+ cpp-string_utils-base64
+)
+target_sources(cpp-protobuf-yql PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/protobuf/yql/descriptor.cpp
+)
diff --git a/library/cpp/protobuf/yql/README.md b/library/cpp/protobuf/yql/README.md
new file mode 100644
index 0000000000..622bfce841
--- /dev/null
+++ b/library/cpp/protobuf/yql/README.md
@@ -0,0 +1,26 @@
+Функции данной библиотеки, позволяют генерировать метаинформацию, необходимую YQL для того, чтобы работать с содержимым protobuf-сообщения как со структурой.
+
+Пример задания атрибута для таблицы в YT:
+```c++
+#include <proto/person.pb.h>
+#include <library/cpp/protobuf/yql/descriptor.h>
+
+auto client = NYT::CreateClient(ServerName);
+client->Create(OutputPath, NT_TABLE,
+ TCreateOptions().Attributes(NYT::TNode()
+ ("_yql_proto_field_Data", GenerateProtobufTypeConfig<TPersonProto>())
+);
+```
+Далее, в YQL можно будет написать следующий запрос:
+```sql
+SELECT t.Data.Name, t.Data.Age, FROM [table] AS t;
+```
+
+Для того, чтобы работать с protobuf-сообщениями, сохранёнными в разных представлениях есть специальные опции. На данный момент поддерживается три формата представления данных - binary-protobuf, text-protbuf и json.
+
+Пример использования опций:
+```c++
+GenerateProtobufTypeConfig<TPersonProto>(
+ TProtoTypeConfigOptions().SetProtoFormat(PF_PROTOTEXT))
+);
+```
diff --git a/library/cpp/protobuf/yql/descriptor.cpp b/library/cpp/protobuf/yql/descriptor.cpp
new file mode 100644
index 0000000000..2d5a154fc1
--- /dev/null
+++ b/library/cpp/protobuf/yql/descriptor.cpp
@@ -0,0 +1,314 @@
+#include "descriptor.h"
+
+#include <library/cpp/json/json_reader.h>
+#include <library/cpp/json/json_writer.h>
+#include <library/cpp/protobuf/dynamic_prototype/dynamic_prototype.h>
+#include <library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.h>
+#include <library/cpp/protobuf/json/json2proto.h>
+#include <library/cpp/protobuf/json/proto2json.h>
+#include <library/cpp/string_utils/base64/base64.h>
+
+#include <util/generic/hash.h>
+#include <util/generic/queue.h>
+#include <util/generic/set.h>
+#include <util/generic/vector.h>
+#include <util/stream/mem.h>
+#include <util/stream/str.h>
+#include <util/stream/zlib.h>
+#include <util/string/cast.h>
+
+#include <google/protobuf/text_format.h>
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
+
+using namespace NProtoBuf;
+
+static TString SerializeFileDescriptorSet(const FileDescriptorSet& proto) {
+ const auto size = proto.ByteSize();
+ TTempBuf data(size);
+ proto.SerializeWithCachedSizesToArray((ui8*)data.Data());
+
+ TStringStream str;
+ {
+ TZLibCompress comp(&str, ZLib::GZip);
+ comp.Write(data.Data(), size);
+ }
+ return str.Str();
+}
+
+static bool ParseFileDescriptorSet(const TStringBuf& data, FileDescriptorSet* proto) {
+ TMemoryInput input(data.data(), data.size());
+ TString buf = TZLibDecompress(&input).ReadAll();
+
+ if (!proto->ParseFromArray(buf.data(), buf.size())) {
+ return false;
+ }
+ return true;
+}
+
+TDynamicInfo::TDynamicInfo(TDynamicPrototypePtr dynamicPrototype)
+ : DynamicPrototype(dynamicPrototype)
+ , SkipBytes_(0)
+{
+}
+
+TDynamicInfo::~TDynamicInfo() {
+}
+
+TDynamicInfoRef TDynamicInfo::Create(const TStringBuf& typeConfig) {
+ auto data = ParseTypeConfig(typeConfig);
+ const TString& meta = Base64Decode(data.Metadata);
+ const TString& name = data.MessageName;
+ FileDescriptorSet set;
+
+ if (!ParseFileDescriptorSet(meta, &set)) {
+ ythrow yexception() << "can't parse metadata";
+ }
+
+ auto info = MakeIntrusive<TDynamicInfo>(TDynamicPrototype::Create(set, name, true));
+
+ info->EnumFormat_ = data.EnumFormat;
+ info->ProtoFormat_ = data.ProtoFormat;
+ info->Recursion_ = data.Recursion;
+ info->YtMode_ = data.YtMode;
+ info->SkipBytes_ = data.SkipBytes;
+ info->OptionalLists_ = data.OptionalLists;
+ info->SyntaxAware_ = data.SyntaxAware;
+ return info;
+}
+
+const Descriptor* TDynamicInfo::Descriptor() const {
+ return DynamicPrototype->GetDescriptor();
+}
+
+EEnumFormat TDynamicInfo::GetEnumFormat() const {
+ return EnumFormat_;
+}
+
+ERecursionTraits TDynamicInfo::GetRecursionTraits() const {
+ return Recursion_;
+}
+
+bool TDynamicInfo::GetYtMode() const {
+ return YtMode_;
+}
+
+bool TDynamicInfo::GetOptionalLists() const {
+ return OptionalLists_;
+}
+
+bool TDynamicInfo::GetSyntaxAware() const {
+ return SyntaxAware_;
+}
+
+TAutoPtr<Message> TDynamicInfo::MakeProto() {
+ return DynamicPrototype->CreateUnsafe();
+}
+
+TAutoPtr<Message> TDynamicInfo::Parse(const TStringBuf& data) {
+ auto mut = MakeProto();
+ TStringBuf tmp(data);
+
+ if (SkipBytes_) {
+ tmp = TStringBuf(tmp.data() + SkipBytes_, tmp.size() - SkipBytes_);
+ }
+
+ switch (ProtoFormat_) {
+ case PF_PROTOBIN: {
+ if (!mut->ParseFromArray(tmp.data(), tmp.size())) {
+ ythrow yexception() << "can't parse protobin message";
+ }
+ break;
+ }
+ case PF_PROTOTEXT: {
+ io::ArrayInputStream si(tmp.data(), tmp.size());
+ if (!TextFormat::Parse(&si, mut.Get())) {
+ ythrow yexception() << "can't parse prototext message";
+ }
+ break;
+ }
+ case PF_JSON: {
+ NJson::TJsonValue value;
+
+ if (NJson::ReadJsonFastTree(tmp, &value)) {
+ NProtobufJson::Json2Proto(value, *mut);
+ } else {
+ ythrow yexception() << "can't parse json value";
+ }
+ break;
+ }
+ }
+
+ return mut;
+}
+
+TString TDynamicInfo::Serialize(const Message& proto) {
+ TString result;
+ switch (ProtoFormat_) {
+ case PF_PROTOBIN: {
+ result.ReserveAndResize(proto.ByteSize());
+ if (!proto.SerializeToArray(result.begin(), result.size())) {
+ ythrow yexception() << "can't serialize protobin message";
+ }
+ break;
+ }
+ case PF_PROTOTEXT: {
+ if (!TextFormat::PrintToString(proto, &result)) {
+ ythrow yexception() << "can't serialize prototext message";
+ }
+ break;
+ }
+ case PF_JSON: {
+ NJson::TJsonValue value;
+ NProtobufJson::Proto2Json(proto, value);
+ result = NJson::WriteJson(value);
+ break;
+ }
+ }
+ return result;
+}
+
+TString GenerateProtobufTypeConfig(
+ const Descriptor* descriptor,
+ const TProtoTypeConfigOptions& options) {
+ NJson::TJsonValue ret(NJson::JSON_MAP);
+
+ ret["name"] = descriptor->full_name();
+ ret["meta"] = Base64Encode(
+ SerializeFileDescriptorSet(GenerateFileDescriptorSet(descriptor)));
+
+ if (options.SkipBytes > 0) {
+ ret["skip"] = options.SkipBytes;
+ }
+
+ switch (options.ProtoFormat) {
+ case PF_PROTOBIN:
+ break;
+ case PF_PROTOTEXT:
+ ret["format"] = "prototext";
+ break;
+ case PF_JSON:
+ ret["format"] = "json";
+ break;
+ }
+
+ if (!options.OptionalLists) {
+ ret["lists"]["optional"] = false;
+ }
+
+ if (options.SyntaxAware) {
+ ret["syntax"]["aware"] = options.SyntaxAware;
+ }
+
+ switch (options.EnumFormat) {
+ case EEnumFormat::Number:
+ break;
+ case EEnumFormat::Name:
+ ret["view"]["enum"] = "name";
+ break;
+ case EEnumFormat::FullName:
+ ret["view"]["enum"] = "full_name";
+ break;
+ }
+
+ switch (options.Recursion) {
+ case ERecursionTraits::Fail:
+ break;
+ case ERecursionTraits::Ignore:
+ ret["view"]["recursion"] = "ignore";
+ break;
+ case ERecursionTraits::Bytes:
+ ret["view"]["recursion"] = "bytes";
+ break;
+ }
+
+ if (options.YtMode) {
+ ret["view"]["yt_mode"] = true;
+ }
+
+ return NJson::WriteJson(ret, false);
+}
+
+TProtoTypeConfig ParseTypeConfig(const TStringBuf& config) {
+ if (config.empty()) {
+ ythrow yexception() << "empty metadata";
+ }
+
+ switch (config[0]) {
+ case '#': {
+ auto plus = config.find('+');
+
+ if (config[0] != '#') {
+ ythrow yexception() << "unknown version of metadata format";
+ }
+ if (plus == TStringBuf::npos) {
+ ythrow yexception() << "invalid metadata";
+ }
+
+ TProtoTypeConfig result;
+
+ result.MessageName = TStringBuf(config.begin() + 1, plus - 1);
+ result.Metadata = TStringBuf(config.begin() + 1 + plus, config.size() - plus - 1);
+ result.SkipBytes = 0;
+
+ return result;
+ }
+
+ case '{': {
+ NJson::TJsonValue value;
+
+ if (NJson::ReadJsonFastTree(config, &value)) {
+ TProtoTypeConfig result;
+ TString protoFormat = value["format"].GetStringSafe("protobin");
+ TString enumFormat = value["view"]["enum"].GetStringSafe("number");
+ TString recursion = value["view"]["recursion"].GetStringSafe("fail");
+
+ result.MessageName = value["name"].GetString();
+ result.Metadata = value["meta"].GetString();
+ result.SkipBytes = value["skip"].GetIntegerSafe(0);
+ result.OptionalLists = value["lists"]["optional"].GetBooleanSafe(true);
+ result.SyntaxAware = value["syntax"]["aware"].GetBooleanSafe(false);
+ result.YtMode = value["view"]["yt_mode"].GetBooleanSafe(false);
+
+ if (protoFormat == "protobin") {
+ result.ProtoFormat = PF_PROTOBIN;
+ } else if (protoFormat == "prototext") {
+ result.ProtoFormat = PF_PROTOTEXT;
+ } else if (protoFormat == "json") {
+ result.ProtoFormat = PF_JSON;
+ } else {
+ ythrow yexception() << "unsupported format " << protoFormat;
+ }
+
+ if (enumFormat == "number") {
+ result.EnumFormat = EEnumFormat::Number;
+ } else if (enumFormat == "name") {
+ result.EnumFormat = EEnumFormat::Name;
+ } else if (enumFormat == "full_name") {
+ result.EnumFormat = EEnumFormat::FullName;
+ } else {
+ ythrow yexception() << "unsupported enum representation "
+ << enumFormat;
+ }
+
+ if (recursion == "fail") {
+ result.Recursion = ERecursionTraits::Fail;
+ } else if (recursion == "ignore") {
+ result.Recursion = ERecursionTraits::Ignore;
+ } else if (recursion == "bytes") {
+ result.Recursion = ERecursionTraits::Bytes;
+ } else {
+ ythrow yexception() << "unsupported recursion trait "
+ << recursion;
+ }
+
+ return result;
+ } else {
+ ythrow yexception() << "can't parse json metadata";
+ }
+ }
+
+ default:
+ ythrow yexception() << "invalid control char "
+ << TStringBuf(config.data(), 1);
+ }
+}
diff --git a/library/cpp/protobuf/yql/descriptor.h b/library/cpp/protobuf/yql/descriptor.h
new file mode 100644
index 0000000000..bbed6850ec
--- /dev/null
+++ b/library/cpp/protobuf/yql/descriptor.h
@@ -0,0 +1,161 @@
+#pragma once
+
+#include <google/protobuf/descriptor.h>
+#include <google/protobuf/dynamic_message.h>
+#include <google/protobuf/descriptor.pb.h>
+
+#include <library/cpp/protobuf/dynamic_prototype/dynamic_prototype.h>
+
+#include <util/generic/ptr.h>
+
+enum EProtoFormat {
+ PF_PROTOBIN = 0,
+ PF_PROTOTEXT = 1,
+ PF_JSON = 2,
+};
+
+enum class EEnumFormat {
+ Number = 0,
+ Name = 1,
+ FullName = 2,
+};
+
+enum class ERecursionTraits {
+ //! Падать, если на входе имеется рекурсивно определённый тип.
+ Fail = 0,
+ //! Игнорировать все поля с рекурсивно определённым типом.
+ Ignore = 1,
+ //! Возвращать поля с рекурсивным типом в виде сериализованной строки
+ Bytes = 2,
+};
+
+struct TProtoTypeConfig {
+ //! Имя сообщения.
+ TString MessageName;
+ //! Сериализованные метаданные.
+ TString Metadata;
+ //! Количество байт, которые надо отступить
+ //! от начала каждого блока данных.
+ ui32 SkipBytes = 0;
+ //! Формат сериализации proto-сообщений.
+ EProtoFormat ProtoFormat = PF_PROTOBIN;
+ //! Формат представление значений Enum.
+ EEnumFormat EnumFormat = EEnumFormat::Number;
+ //! Способ интерпретации рекурсивно определённых типов.
+ ERecursionTraits Recursion = ERecursionTraits::Fail;
+ //! Выдать бинарными строками, если SERIALIZATION_PROTOBUF.
+ //! Если SERIALIZATION_YT, то для рекурсивных структур поведение зависит от Recursion.
+ bool YtMode = false;
+ //! Заворачивать ли списочные типы в Optional.
+ bool OptionalLists = false;
+ //! Заполнять ли пустые Optional типы дефолтным значением (только для proto3).
+ bool SyntaxAware = false;
+};
+
+struct TProtoTypeConfigOptions {
+ //! Количество байт, которые надо отступить
+ //! от начала каждого блока данных.
+ ui32 SkipBytes = 0;
+ //! Формат сериализации proto-сообщений.
+ EProtoFormat ProtoFormat = PF_PROTOBIN;
+ //! Формат представление значений Enum.
+ EEnumFormat EnumFormat = EEnumFormat::Number;
+ //! Способ интерпретации рекурсивно определённых типов.
+ ERecursionTraits Recursion = ERecursionTraits::Fail;
+ //! Выдать бинарными строками, если SERIALIZATION_PROTOBUF.
+ //! Если SERIALIZATION_YT, то для рекурсивных структур поведение зависит от Recursion.
+ bool YtMode = false;
+ //! Заворачивать ли списочные типы в Optional.
+ bool OptionalLists = false;
+ //! Заполнять ли пустые Optional типы дефолтным значением (только для proto3).
+ bool SyntaxAware = false;
+
+ TProtoTypeConfigOptions& SetProtoFormat(EProtoFormat value) {
+ ProtoFormat = value;
+ return *this;
+ }
+
+ TProtoTypeConfigOptions& SetSkipBytes(ui32 value) {
+ SkipBytes = value;
+ return *this;
+ }
+
+ TProtoTypeConfigOptions& SetEnumFormat(EEnumFormat value) {
+ EnumFormat = value;
+ return *this;
+ }
+
+ TProtoTypeConfigOptions& SetRecursionTraits(ERecursionTraits value) {
+ Recursion = value;
+ return *this;
+ }
+
+ TProtoTypeConfigOptions& SetYtMode(bool value) {
+ YtMode = value;
+ return *this;
+ }
+
+ TProtoTypeConfigOptions& SetOptionalLists(bool value) {
+ OptionalLists = value;
+ return *this;
+ }
+
+ TProtoTypeConfigOptions& SetSyntaxAware(bool value) {
+ SyntaxAware = value;
+ return *this;
+ }
+};
+
+TString GenerateProtobufTypeConfig(
+ const NProtoBuf::Descriptor* descriptor,
+ const TProtoTypeConfigOptions& options = TProtoTypeConfigOptions());
+
+template <typename T>
+inline TString GenerateProtobufTypeConfig(
+ const TProtoTypeConfigOptions& options = TProtoTypeConfigOptions()) {
+ return GenerateProtobufTypeConfig(T::descriptor(), options);
+}
+
+TProtoTypeConfig ParseTypeConfig(const TStringBuf& config);
+
+using TDynamicInfoRef = TIntrusivePtr<class TDynamicInfo>;
+
+class TDynamicInfo: public TSimpleRefCount<TDynamicInfo> {
+public:
+ TDynamicInfo(TDynamicPrototypePtr);
+ ~TDynamicInfo();
+
+ static TDynamicInfoRef Create(const TStringBuf& typeConfig);
+
+ const NProtoBuf::Descriptor* Descriptor() const;
+
+ //! Текущий формат представление значений Enum.
+ EEnumFormat GetEnumFormat() const;
+
+ //! Текущий способ интерпретации рекурсивно определённых типов.
+ ERecursionTraits GetRecursionTraits() const;
+
+ bool GetYtMode() const;
+
+ bool GetOptionalLists() const;
+
+ bool GetSyntaxAware() const;
+
+ TAutoPtr<NProtoBuf::Message> MakeProto();
+
+ //! \param data сериализованное protobuf-сообщение.
+ TAutoPtr<NProtoBuf::Message> Parse(const TStringBuf& data);
+
+ //! \param proto protobuf-сообщение.
+ TString Serialize(const NProtoBuf::Message& proto);
+
+private:
+ TDynamicPrototypePtr DynamicPrototype;
+ EEnumFormat EnumFormat_;
+ EProtoFormat ProtoFormat_;
+ ERecursionTraits Recursion_;
+ bool YtMode_;
+ ui32 SkipBytes_;
+ bool OptionalLists_;
+ bool SyntaxAware_;
+};
diff --git a/library/cpp/protobuf/yql/ya.make b/library/cpp/protobuf/yql/ya.make
new file mode 100644
index 0000000000..29c21da402
--- /dev/null
+++ b/library/cpp/protobuf/yql/ya.make
@@ -0,0 +1,15 @@
+LIBRARY()
+
+SRCS(
+ descriptor.cpp
+)
+
+PEERDIR(
+ contrib/libs/protobuf
+ library/cpp/json
+ library/cpp/protobuf/dynamic_prototype
+ library/cpp/protobuf/json
+ library/cpp/string_utils/base64
+)
+
+END()