diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/json/yson | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/json/yson')
-rw-r--r-- | library/cpp/json/yson/json2yson.cpp | 108 | ||||
-rw-r--r-- | library/cpp/json/yson/json2yson.h | 179 | ||||
-rw-r--r-- | library/cpp/json/yson/json2yson_ut.cpp | 107 | ||||
-rw-r--r-- | library/cpp/json/yson/ut/ya.make | 28 | ||||
-rw-r--r-- | library/cpp/json/yson/ya.make | 18 |
5 files changed, 440 insertions, 0 deletions
diff --git a/library/cpp/json/yson/json2yson.cpp b/library/cpp/json/yson/json2yson.cpp new file mode 100644 index 0000000000..f72cb7a9ef --- /dev/null +++ b/library/cpp/json/yson/json2yson.cpp @@ -0,0 +1,108 @@ +#include "json2yson.h" + +#include <library/cpp/yson/parser.h> +#include <library/cpp/yson/json/json_writer.h> +#include <library/cpp/yson/json/yson2json_adapter.h> + +namespace NJson2Yson { + static void WriteJsonValue(const NJson::TJsonValue& jsonValue, NYT::TYson2JsonCallbacksAdapter* adapter) { + switch (jsonValue.GetType()) { + default: + case NJson::JSON_NULL: + adapter->OnNull(); + break; + case NJson::JSON_BOOLEAN: + adapter->OnBoolean(jsonValue.GetBoolean()); + break; + case NJson::JSON_DOUBLE: + adapter->OnDouble(jsonValue.GetDouble()); + break; + case NJson::JSON_INTEGER: + adapter->OnInteger(jsonValue.GetInteger()); + break; + case NJson::JSON_UINTEGER: + adapter->OnUInteger(jsonValue.GetUInteger()); + break; + case NJson::JSON_STRING: + adapter->OnString(jsonValue.GetString()); + break; + case NJson::JSON_ARRAY: { + adapter->OnOpenArray(); + const NJson::TJsonValue::TArray& arr = jsonValue.GetArray(); + for (const auto& it : arr) + WriteJsonValue(it, adapter); + adapter->OnCloseArray(); + break; + } + case NJson::JSON_MAP: { + adapter->OnOpenMap(); + const NJson::TJsonValue::TMapType& map = jsonValue.GetMap(); + for (const auto& it : map) { + adapter->OnMapKey(it.first); + WriteJsonValue(it.second, adapter); + } + adapter->OnCloseMap(); + break; + } + } + } + + void SerializeJsonValueAsYson(const NJson::TJsonValue& inputValue, NYson::TYsonWriter* ysonWriter) { + NYT::TYson2JsonCallbacksAdapter adapter(ysonWriter); + WriteJsonValue(inputValue, &adapter); + } + + void SerializeJsonValueAsYson(const NJson::TJsonValue& inputValue, IOutputStream* outputStream) { + NYson::TYsonWriter ysonWriter(outputStream, NYson::EYsonFormat::Binary, ::NYson::EYsonType::Node, false); + SerializeJsonValueAsYson(inputValue, &ysonWriter); + } + + void SerializeJsonValueAsYson(const NJson::TJsonValue& inputValue, TString& result) { + TStringOutput resultStream(result); + SerializeJsonValueAsYson(inputValue, &resultStream); + } + + TString SerializeJsonValueAsYson(const NJson::TJsonValue& inputValue) { + TString result; + SerializeJsonValueAsYson(inputValue, result); + return result; + } + + bool DeserializeYsonAsJsonValue(IInputStream* inputStream, NJson::TJsonValue* outputValue, bool throwOnError) { + NJson::TParserCallbacks parser(*outputValue); + NJson2Yson::TJsonBuilder consumer(&parser); + NYson::TYsonParser ysonParser(&consumer, inputStream, ::NYson::EYsonType::Node); + try { + ysonParser.Parse(); + } catch (...) { + if (throwOnError) { + throw; + } + return false; + } + return true; + } + + bool DeserializeYsonAsJsonValue(TStringBuf str, NJson::TJsonValue* outputValue, bool throwOnError) { + TMemoryInput inputStream(str); + return DeserializeYsonAsJsonValue(&inputStream, outputValue, throwOnError); + } + + void ConvertYson2Json(IInputStream* inputStream, IOutputStream* outputStream) { + NYT::TJsonWriter writer(outputStream, ::NYson::EYsonType::Node, NYT::JF_TEXT, NYT::JAM_ON_DEMAND, NYT::SBF_BOOLEAN); + NYson::TYsonParser ysonParser(&writer, inputStream, ::NYson::EYsonType::Node); + ysonParser.Parse(); + } + + void ConvertYson2Json(TStringBuf yson, IOutputStream* outputStream) { + TMemoryInput inputStream(yson); + ConvertYson2Json(&inputStream, outputStream); + } + + TString ConvertYson2Json(TStringBuf yson) { + TString json; + TStringOutput outputStream(json); + ConvertYson2Json(yson, &outputStream); + return json; + } +} diff --git a/library/cpp/json/yson/json2yson.h b/library/cpp/json/yson/json2yson.h new file mode 100644 index 0000000000..758eb6d0cf --- /dev/null +++ b/library/cpp/json/yson/json2yson.h @@ -0,0 +1,179 @@ +#pragma once + +#include <library/cpp/json/json_reader.h> +#include <library/cpp/json/json_value.h> +#include <library/cpp/yson/writer.h> + +namespace NJson2Yson { + class TJsonBuilderImpl: public NYson::TYsonConsumerBase { + public: + TJsonBuilderImpl(NJson::TJsonCallbacks* parserCallbacks) + : ParserCallbacks_(parserCallbacks) + { + } + + void OnStringScalar(TStringBuf value) override { + ParserCallbacks_->OnString(value); + } + + void OnInt64Scalar(i64 value) override { + ParserCallbacks_->OnInteger(value); + } + + void OnUint64Scalar(ui64 value) override { + ParserCallbacks_->OnUInteger(value); + } + + void OnDoubleScalar(double value) override { + ParserCallbacks_->OnDouble(value); + } + + void OnBooleanScalar(bool value) override { + ParserCallbacks_->OnBoolean(value); + } + + void OnEntity() override { + ParserCallbacks_->OnNull(); + } + + void OnBeginList() override { + ParserCallbacks_->OnOpenArray(); + } + + void OnListItem() override { + } + + void OnEndList() override { + ParserCallbacks_->OnCloseArray(); + } + + void OnBeginMap() override { + ParserCallbacks_->OnOpenMap(); + } + + void OnKeyedItem(TStringBuf key) override { + ParserCallbacks_->OnMapKey(key); + } + + void OnEndMap() override { + ParserCallbacks_->OnCloseMap(); + } + + void OnBeginAttributes() override { + } + + void OnEndAttributes() override { + } + + private: + NJson::TJsonCallbacks* ParserCallbacks_; + }; + + template <typename TBase> + class TSkipAttributesProxy: public TBase { + public: + template <typename... TArgs> + TSkipAttributesProxy<TBase>(TArgs&&... args) + : TBase(std::forward<TArgs>(args)...) + { + } + + void OnStringScalar(TStringBuf value) override { + if (AttributesDepth == 0) { + TBase::OnStringScalar(value); + } + } + + void OnInt64Scalar(i64 value) override { + if (AttributesDepth == 0) { + TBase::OnInt64Scalar(value); + } + } + + void OnUint64Scalar(ui64 value) override { + if (AttributesDepth == 0) { + TBase::OnUint64Scalar(value); + } + } + + void OnDoubleScalar(double value) override { + if (AttributesDepth == 0) { + TBase::OnDoubleScalar(value); + } + } + + void OnBooleanScalar(bool value) override { + if (AttributesDepth == 0) { + TBase::OnBooleanScalar(value); + } + } + + void OnEntity() override { + if (AttributesDepth == 0) { + TBase::OnEntity(); + } + } + + void OnBeginList() override { + if (AttributesDepth == 0) { + TBase::OnBeginList(); + } + } + + void OnListItem() override { + if (AttributesDepth == 0) { + TBase::OnListItem(); + } + } + + void OnEndList() override { + if (AttributesDepth == 0) { + TBase::OnEndList(); + } + } + + void OnBeginMap() override { + if (AttributesDepth == 0) { + TBase::OnBeginMap(); + } + } + + void OnKeyedItem(TStringBuf key) override { + if (AttributesDepth == 0) { + TBase::OnKeyedItem(key); + } + } + + void OnEndMap() override { + if (AttributesDepth == 0) { + TBase::OnEndMap(); + } + } + + void OnBeginAttributes() override { + ++AttributesDepth; + } + + void OnEndAttributes() override { + --AttributesDepth; + Y_ASSERT(AttributesDepth >= 0); + } + + private: + int AttributesDepth = 0; + }; + + using TJsonBuilder = TSkipAttributesProxy<TJsonBuilderImpl>; + + void ConvertYson2Json(IInputStream* inputStream, IOutputStream* outputStream); + void ConvertYson2Json(TStringBuf yson, IOutputStream* outputStream); + TString ConvertYson2Json(TStringBuf yson); + + bool DeserializeYsonAsJsonValue(IInputStream* inputStream, NJson::TJsonValue* outputValue, bool throwOnError = false); + bool DeserializeYsonAsJsonValue(TStringBuf str, NJson::TJsonValue* outputValue, bool throwOnError = false); + + void SerializeJsonValueAsYson(const NJson::TJsonValue& inputValue, NYson::TYsonWriter* ysonWriter); + void SerializeJsonValueAsYson(const NJson::TJsonValue& inputValue, IOutputStream* outputStream); + void SerializeJsonValueAsYson(const NJson::TJsonValue& inputValue, TString& result); + TString SerializeJsonValueAsYson(const NJson::TJsonValue& inputValue); +} diff --git a/library/cpp/json/yson/json2yson_ut.cpp b/library/cpp/json/yson/json2yson_ut.cpp new file mode 100644 index 0000000000..9eb23354cf --- /dev/null +++ b/library/cpp/json/yson/json2yson_ut.cpp @@ -0,0 +1,107 @@ +#include "library/cpp/json/yson/json2yson.h" + +#include <library/cpp/blockcodecs/codecs.h> +#include <library/cpp/histogram/simple/histogram.h> +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/testing/unittest/tests_data.h> + +#include <util/datetime/cputimer.h> +#include <util/stream/file.h> + +template <typename TCallBack> +ui64 Run(TCallBack&& callBack) { + TSimpleTimer timer; + callBack(); + return timer.Get().MicroSeconds(); +} + +static TString GetRequestsWithDecoding(const TString& inputPath, const NBlockCodecs::ICodec* codec) { + TIFStream inputFileStream(inputPath); + TString encodedRequests = inputFileStream.ReadAll(); + TString requests; + codec->Decode(encodedRequests, requests); + return requests; +} + +Y_UNIT_TEST_SUITE(Json2Yson) { + Y_UNIT_TEST(NOAPACHE_REQUESTS) { + const ui32 warmUpRetries = 5; + const TVector<double> percentiles = {0.25, 0.5, 0.6, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.97, 0.99, 1.0}; + + NSimpleHistogram::TMultiHistogramCalcer<ui64> calcer; + + TString requests = GetRequestsWithDecoding(GetWorkPath() + "/noapache_requests_sample_lz4", NBlockCodecs::Codec("lz4")); + TStringInput inputStream(requests); + + for (TString jsonRequest, jsonString, ysonString; inputStream.ReadLine(jsonRequest);) { + TStringInput jsonInput(jsonRequest); + NJson::TJsonValue readedJson; + NJson::ReadJsonTree(&jsonInput, &readedJson, true); + jsonRequest.clear(); + + ui64 writeTime = Max<ui64>(); + ui64 readTime = Max<ui64>(); + + for (ui32 i = 0; i < warmUpRetries; ++i) { + NJson::TJsonValue Json2Json; + TStringOutput jsonWriteOutput(jsonString); + NJsonWriter::TBuf jsonBuf(NJsonWriter::HEM_UNSAFE, &jsonWriteOutput); + + writeTime = Min(writeTime, Run([&]() { + jsonBuf.WriteJsonValue(&readedJson); + })); + + TStringInput jsonInput(jsonString); + NJson::TJsonReaderConfig config; + config.DontValidateUtf8 = true; + readTime = Min(readTime, Run([&]() { + NJson::ReadJsonTree(&jsonInput, &config, &Json2Json, true); + })); + + UNIT_ASSERT_VALUES_EQUAL( + NJsonWriter::TBuf().WriteJsonValue(&readedJson, true).Str(), + NJsonWriter::TBuf().WriteJsonValue(&Json2Json, true).Str()); + + jsonString.clear(); + } + + calcer.RecordValue("read_json", readTime); + calcer.RecordValue("write_json", writeTime); + calcer.RecordValue("read_and_write_json", readTime + writeTime); + + writeTime = Max<ui64>(); + readTime = Max<ui64>(); + + for (ui32 i = 0; i < warmUpRetries; ++i) { + NJson::TJsonValue convertedJson; + TStringOutput ysonOutput(ysonString); + + writeTime = Min(writeTime, Run([&]() { + NJson2Yson::SerializeJsonValueAsYson(readedJson, &ysonOutput); + })); + + TStringInput ysonInput(ysonString); + readTime = Min(readTime, Run([&]() { + NJson2Yson::DeserializeYsonAsJsonValue(&ysonInput, &convertedJson); + })); + + UNIT_ASSERT_VALUES_EQUAL( + NJsonWriter::TBuf().WriteJsonValue(&convertedJson, true).Str(), + NJsonWriter::TBuf().WriteJsonValue(&readedJson, true).Str()); + + ysonString.clear(); + } + + calcer.RecordValue("read_yson", readTime); + calcer.RecordValue("write_yson", writeTime); + calcer.RecordValue("read_and_write_yson", readTime + writeTime); + } + + NJson::TJsonValue histogramJson = NSimpleHistogram::ToJson(calcer.Calc(), percentiles); + for (const auto& it : histogramJson.GetMap()) { + for (const auto& percentileValue : it.second.GetMap()) { + UNIT_ADD_METRIC(it.first + "_" + percentileValue.first, percentileValue.second.GetUInteger() / 1000.0); + } + } + } +} diff --git a/library/cpp/json/yson/ut/ya.make b/library/cpp/json/yson/ut/ya.make new file mode 100644 index 0000000000..4ceb65b279 --- /dev/null +++ b/library/cpp/json/yson/ut/ya.make @@ -0,0 +1,28 @@ +OWNER( + avitella + elshiko +) + +UNITTEST_FOR(library/cpp/json/yson) + +ALLOCATOR(LF) + +DATA(sbr://363537653) + +PEERDIR( + library/cpp/blockcodecs + library/cpp/histogram/simple + library/cpp/testing/unittest +) + +SIZE(LARGE) + +TAG(ya:fat) + +TIMEOUT(600) + +SRCS( + json2yson_ut.cpp +) + +END() diff --git a/library/cpp/json/yson/ya.make b/library/cpp/json/yson/ya.make new file mode 100644 index 0000000000..9b289d674f --- /dev/null +++ b/library/cpp/json/yson/ya.make @@ -0,0 +1,18 @@ +LIBRARY() + +OWNER( + avitella + elshiko +) + +PEERDIR( + library/cpp/json + library/cpp/yson + library/cpp/yson/json +) + +SRCS( + json2yson.cpp +) + +END() |