aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorinnokentii <innokentii@yandex-team.com>2023-09-29 00:02:43 +0300
committerinnokentii <innokentii@yandex-team.com>2023-09-29 00:22:17 +0300
commit95fac3fb8d054fb4f0b538d358e09bbadcffbc91 (patch)
tree78f954781759681d6793710cca8ea2e407bebbbd
parentaf047ff0da562d2220c693343306c4833e875040 (diff)
downloadydb-95fac3fb8d054fb4f0b538d358e09bbadcffbc91.tar.gz
Add basic unknown fields collector
add basic unknown fields collector
-rw-r--r--library/cpp/protobuf/json/json2proto.cpp46
-rw-r--r--library/cpp/protobuf/json/json2proto.h10
-rw-r--r--library/cpp/protobuf/json/unknown_fields_collector.h29
-rw-r--r--library/cpp/protobuf/json/ut/unknown_fields_collector_ut.cpp165
-rw-r--r--library/cpp/protobuf/json/ut/ya.make1
5 files changed, 249 insertions, 2 deletions
diff --git a/library/cpp/protobuf/json/json2proto.cpp b/library/cpp/protobuf/json/json2proto.cpp
index 3131b9779d..067e307146 100644
--- a/library/cpp/protobuf/json/json2proto.cpp
+++ b/library/cpp/protobuf/json/json2proto.cpp
@@ -275,6 +275,10 @@ Json2SingleField(const NJson::TJsonValue& json,
const NJson::TJsonValue& fieldJson = name ? json[name] : json;
+ if (name && config.UnknownFieldsCollector) {
+ config.UnknownFieldsCollector->OnEnterMapItem(name);
+ }
+
switch (field.cpp_type()) {
JSON_TO_FIELD(CPPTYPE_INT32, field.name(), fieldJson, IsInteger, SetInt32, GetInteger);
JSON_TO_FIELD(CPPTYPE_INT64, field.name(), fieldJson, IsInteger, SetInt64, GetInteger);
@@ -312,6 +316,10 @@ Json2SingleField(const NJson::TJsonValue& json,
ythrow yexception() << "Unknown protobuf field type: "
<< static_cast<int>(field.cpp_type()) << ".";
}
+
+ if (name && config.UnknownFieldsCollector) {
+ config.UnknownFieldsCollector->OnLeaveMapItem();
+ }
}
static void
@@ -414,6 +422,10 @@ Json2RepeatedField(const NJson::TJsonValue& json,
if (fieldJson.GetType() == NJson::JSON_UNDEFINED || fieldJson.GetType() == NJson::JSON_NULL)
return;
+ if (config.UnknownFieldsCollector) {
+ config.UnknownFieldsCollector->OnEnterMapItem(name);
+ }
+
bool isMap = fieldJson.GetType() == NJson::JSON_MAP;
if (isMap) {
if (!config.MapAsObject) {
@@ -438,7 +450,13 @@ Json2RepeatedField(const NJson::TJsonValue& json,
for (const auto& x : jsonMap) {
const TString& key = x.first;
const NJson::TJsonValue& jsonValue = x.second;
+ if (config.UnknownFieldsCollector) {
+ config.UnknownFieldsCollector->OnEnterMapItem(key);
+ }
Json2RepeatedFieldValue(jsonValue, proto, field, config, reflection, key);
+ if (config.UnknownFieldsCollector) {
+ config.UnknownFieldsCollector->OnLeaveMapItem();
+ }
}
} else {
if (config.ReplaceRepeatedFields) {
@@ -446,17 +464,37 @@ Json2RepeatedField(const NJson::TJsonValue& json,
}
if (fieldJson.GetType() == NJson::JSON_ARRAY) {
const NJson::TJsonValue::TArray& jsonArray = fieldJson.GetArray();
+ ui64 id = 0;
for (const NJson::TJsonValue& jsonValue : jsonArray) {
+ if (config.UnknownFieldsCollector) {
+ config.UnknownFieldsCollector->OnEnterArrayItem(id);
+ }
Json2RepeatedFieldValue(jsonValue, proto, field, config, reflection);
+ if (config.UnknownFieldsCollector) {
+ config.UnknownFieldsCollector->OnLeaveArrayItem();
+ }
+ ++id;
}
} else if (config.ValueVectorizer) {
+ ui64 id = 0;
for (const NJson::TJsonValue& jsonValue : config.ValueVectorizer(fieldJson)) {
+ if (config.UnknownFieldsCollector) {
+ config.UnknownFieldsCollector->OnEnterArrayItem(id);
+ }
Json2RepeatedFieldValue(jsonValue, proto, field, config, reflection);
+ if (config.UnknownFieldsCollector) {
+ config.UnknownFieldsCollector->OnLeaveArrayItem();
+ }
+ ++id;
}
} else if (config.VectorizeScalars) {
Json2RepeatedFieldValue(fieldJson, proto, field, config, reflection);
}
}
+
+ if (config.UnknownFieldsCollector) {
+ config.UnknownFieldsCollector->OnLeaveMapItem();
+ }
}
namespace NProtobufJson {
@@ -480,14 +518,18 @@ namespace NProtobufJson {
}
}
- if (!config.AllowUnknownFields) {
+ if (!config.AllowUnknownFields || config.UnknownFieldsCollector) {
THashMap<TString, bool> knownFields;
for (int f = 0, endF = descriptor->field_count(); f < endF; ++f) {
const google::protobuf::FieldDescriptor* field = descriptor->field(f);
knownFields[GetFieldName(*field, config)] = 1;
}
for (const auto& f : json.GetMap()) {
- Y_ENSURE(knownFields.contains(f.first), "unknown field \"" << f.first << "\" for \"" << descriptor->full_name() << "\"");
+ const bool isFieldKnown = knownFields.contains(f.first);
+ Y_ENSURE(config.AllowUnknownFields || isFieldKnown, "unknown field \"" << f.first << "\" for \"" << descriptor->full_name() << "\"");
+ if (!isFieldKnown) {
+ config.UnknownFieldsCollector->OnUnknownField(f.first, *descriptor);
+ }
}
}
}
diff --git a/library/cpp/protobuf/json/json2proto.h b/library/cpp/protobuf/json/json2proto.h
index aedbb8affc..45b589d171 100644
--- a/library/cpp/protobuf/json/json2proto.h
+++ b/library/cpp/protobuf/json/json2proto.h
@@ -2,10 +2,12 @@
#include "string_transform.h"
#include "name_generator.h"
+#include "unknown_fields_collector.h"
#include <library/cpp/json/json_reader.h>
#include <library/cpp/json/json_value.h>
+#include <util/generic/ptr.h>
#include <util/stream/input.h>
#include <util/stream/str.h>
#include <util/stream/mem.h>
@@ -115,6 +117,11 @@ namespace NProtobufJson {
return *this;
}
+ TSelf& SetUnknownFieldsCollector(TSimpleSharedPtr<IUnknownFieldsCollector> value) {
+ UnknownFieldsCollector = std::move(value);
+ return *this;
+ }
+
FldNameMode FieldNameMode = FieldNameOriginalCase;
bool AllowUnknownFields = true;
@@ -163,6 +170,9 @@ namespace NProtobufJson {
/// Allow nonstandard conversions, e.g. google.protobuf.Duration from String
bool AllowString2TimeConversion = false;
+
+ /// Stores information about unknown fields
+ TSimpleSharedPtr<IUnknownFieldsCollector> UnknownFieldsCollector = nullptr;
};
/// @throw yexception
diff --git a/library/cpp/protobuf/json/unknown_fields_collector.h b/library/cpp/protobuf/json/unknown_fields_collector.h
new file mode 100644
index 0000000000..1e71f2164c
--- /dev/null
+++ b/library/cpp/protobuf/json/unknown_fields_collector.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <util/generic/string.h>
+
+namespace google {
+ namespace protobuf {
+ class FieldDescriptor;
+ class Descriptor;
+ }
+}
+
+namespace NProtobufJson {
+ /* Methods OnEnter.../OnLeave... are called on every field of structure
+ * during traverse and should be used to build context
+ * Method OnUnknownField are called every time when field which can't
+ * be mapped
+ */
+ struct IUnknownFieldsCollector {
+ virtual ~IUnknownFieldsCollector() = default;
+
+ virtual void OnEnterMapItem(const TString& key) = 0;
+ virtual void OnLeaveMapItem() = 0;
+
+ virtual void OnEnterArrayItem(ui64 id) = 0;
+ virtual void OnLeaveArrayItem() = 0;
+
+ virtual void OnUnknownField(const TString& key, const google::protobuf::Descriptor& value) = 0;
+ };
+}
diff --git a/library/cpp/protobuf/json/ut/unknown_fields_collector_ut.cpp b/library/cpp/protobuf/json/ut/unknown_fields_collector_ut.cpp
new file mode 100644
index 0000000000..00a2152a68
--- /dev/null
+++ b/library/cpp/protobuf/json/ut/unknown_fields_collector_ut.cpp
@@ -0,0 +1,165 @@
+#include "json.h"
+#include "proto.h"
+#include "proto2json.h"
+
+#include <library/cpp/protobuf/json/json2proto.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/generic/set.h>
+#include <util/generic/string.h>
+
+using namespace NProtobufJson;
+using namespace NProtobufJsonTest;
+
+struct TTestUnknownFieldsCollector : public IUnknownFieldsCollector {
+ void OnEnterMapItem(const TString& key) override {
+ CurrentPath.push_back(key);
+ }
+
+ void OnEnterArrayItem(ui64 id) override {
+ CurrentPath.push_back(ToString(id));
+ }
+
+ void OnLeaveMapItem() override {
+ CurrentPath.pop_back();
+ }
+
+ void OnLeaveArrayItem() override {
+ CurrentPath.pop_back();
+ }
+
+ void OnUnknownField(const TString& key, const google::protobuf::Descriptor& value) override {
+ TString path;
+ for (auto& piece : CurrentPath) {
+ path.append("/");
+ path.append(piece);
+ }
+ path.append("/");
+ path.append(key);
+ UnknownKeys.insert(std::move(path));
+ Y_UNUSED(value);
+ }
+
+ TVector<TString> CurrentPath;
+ TSet<TString> UnknownKeys;
+};
+
+Y_UNIT_TEST_SUITE(TUnknownFieldsCollectorTest) {
+ Y_UNIT_TEST(TestFlatOptional) {
+ TFlatOptional proto;
+ TSimpleSharedPtr<TTestUnknownFieldsCollector> collector = new TTestUnknownFieldsCollector;
+ TJson2ProtoConfig cfg;
+ cfg.SetUnknownFieldsCollector(collector).SetAllowUnknownFields(true);
+
+ Json2Proto(TStringBuf(R"({"42":42,"I32":11,"test":2,"string":"str","String":"string","obj":{"inner":{}},"arr":[1,2,3]})"), proto, cfg);
+ TSet<TString> expectedKeys = {
+ {"/42"},
+ {"/arr"},
+ {"/obj"},
+ {"/string"},
+ {"/test"},
+ };
+ UNIT_ASSERT(collector->CurrentPath.empty());
+ UNIT_ASSERT_VALUES_EQUAL(collector->UnknownKeys, expectedKeys);
+ }
+
+ Y_UNIT_TEST(TestFlatRepeated) {
+ TFlatRepeated proto;
+ TSimpleSharedPtr<TTestUnknownFieldsCollector> collector = new TTestUnknownFieldsCollector;
+ TJson2ProtoConfig cfg;
+ cfg.SetUnknownFieldsCollector(collector).SetAllowUnknownFields(true);
+
+ Json2Proto(TStringBuf(R"({"42":42,"I32":[11,12],"test":12,"string":"str","String":["string1","string2"],"obj":{"inner":{}},"arr":[1,2,3]})"), proto, cfg);
+ TSet<TString> expectedKeys = {
+ {"/42"},
+ {"/arr"},
+ {"/obj"},
+ {"/string"},
+ {"/test"},
+ };
+ UNIT_ASSERT(collector->CurrentPath.empty());
+ UNIT_ASSERT_VALUES_EQUAL(collector->UnknownKeys, expectedKeys);
+ }
+
+ Y_UNIT_TEST(TestCompositeOptional) {
+ TCompositeOptional proto;
+ TSimpleSharedPtr<TTestUnknownFieldsCollector> collector = new TTestUnknownFieldsCollector;
+ TJson2ProtoConfig cfg;
+ cfg.SetUnknownFieldsCollector(collector).SetAllowUnknownFields(true);
+
+ Json2Proto(TStringBuf(R"({"Part":{"42":42,"I32":11,"test":12,"string":"str","String":"string"},"string2":"str"})"), proto, cfg);
+ TSet<TString> expectedKeys = {
+ {"/Part/42"},
+ {"/Part/string"},
+ {"/Part/test"},
+ {"/string2"},
+ };
+ UNIT_ASSERT(collector->CurrentPath.empty());
+ UNIT_ASSERT_VALUES_EQUAL(collector->UnknownKeys, expectedKeys);
+ }
+
+ Y_UNIT_TEST(TestCompositeRepeated) {
+ TCompositeRepeated proto;
+ TSimpleSharedPtr<TTestUnknownFieldsCollector> collector = new TTestUnknownFieldsCollector;
+ TJson2ProtoConfig cfg;
+ cfg.SetUnknownFieldsCollector(collector).SetAllowUnknownFields(true);
+
+ Json2Proto(TStringBuf(R"({"Part":[)"
+ R"( {"42":42,"I32":11,"test":12,"string":"str","String":"string"},)"
+ R"( {"abc":"d"})"
+ R"(],)"
+ R"("string2":"str"})"), proto, cfg);
+ TSet<TString> expectedKeys = {
+ {"/Part/0/42"},
+ {"/Part/0/string"},
+ {"/Part/0/test"},
+ {"/Part/1/abc"},
+ {"/string2"},
+ };
+ UNIT_ASSERT(collector->CurrentPath.empty());
+ UNIT_ASSERT_VALUES_EQUAL(collector->UnknownKeys, expectedKeys);
+ }
+
+ Y_UNIT_TEST(TestCompleMapType) {
+ TComplexMapType proto;
+ TSimpleSharedPtr<TTestUnknownFieldsCollector> collector = new TTestUnknownFieldsCollector;
+ TJson2ProtoConfig cfg;
+ cfg.SetUnknownFieldsCollector(collector).SetAllowUnknownFields(true);
+
+ Json2Proto(TStringBuf(R"({"42":42,)"
+ R"("Nested":[)"
+ R"( {"key":"abc","value":{"string":"string","Nested":[{"key":"def","value":{"string2":"string2"}}]}},)"
+ R"( {"key":"car","value":{"string3":"string3"}})"
+ R"(]})"), proto, cfg);
+ TSet<TString> expectedKeys = {
+ {"/42"},
+ {"/Nested/0/value/Nested/0/value/string2"},
+ {"/Nested/0/value/string"},
+ {"/Nested/1/value/string3"},
+ };
+ UNIT_ASSERT(collector->CurrentPath.empty());
+ UNIT_ASSERT_VALUES_EQUAL(collector->UnknownKeys, expectedKeys);
+ }
+
+ Y_UNIT_TEST(TestCompleMapTypeMapAsObject) {
+ TComplexMapType proto;
+ TSimpleSharedPtr<TTestUnknownFieldsCollector> collector = new TTestUnknownFieldsCollector;
+ TJson2ProtoConfig cfg;
+ cfg.SetUnknownFieldsCollector(collector).SetAllowUnknownFields(true).SetMapAsObject(true);
+
+ Json2Proto(TStringBuf(R"({"42":42,)"
+ R"("Nested":{)"
+ R"( "abc":{"string":"string","Nested":{"def":{"string2":"string2"}}},)"
+ R"( "car":{"string3":"string3"})"
+ R"(}})"), proto, cfg);
+ TSet<TString> expectedKeys = {
+ {"/42"},
+ {"/Nested/abc/Nested/def/string2"},
+ {"/Nested/abc/string"},
+ {"/Nested/car/string3"},
+ };
+ UNIT_ASSERT(collector->CurrentPath.empty());
+ UNIT_ASSERT_VALUES_EQUAL(collector->UnknownKeys, expectedKeys);
+ }
+} // TJson2ProtoTest
diff --git a/library/cpp/protobuf/json/ut/ya.make b/library/cpp/protobuf/json/ut/ya.make
index 0b2c954952..2f709ba539 100644
--- a/library/cpp/protobuf/json/ut/ya.make
+++ b/library/cpp/protobuf/json/ut/ya.make
@@ -10,6 +10,7 @@ SRCS(
filter_ut.proto
test.proto
util_ut.cpp
+ unknown_fields_collector_ut.cpp
)
GENERATE_ENUM_SERIALIZATION(test.pb.h)