diff options
author | vvvv <vvvv@yandex-team.com> | 2024-11-06 23:54:28 +0300 |
---|---|---|
committer | vvvv <vvvv@yandex-team.com> | 2024-11-07 00:04:25 +0300 |
commit | cf2a23963ac10add28c50cc114fbf48953eca5aa (patch) | |
tree | 174b849b8ecfa96b0c8e4409ab3287721a9210c8 /yql/essentials/minikql | |
parent | 3a3113a2bf5a7fab32bde414932082b264c559fc (diff) | |
download | ydb-cf2a23963ac10add28c50cc114fbf48953eca5aa.tar.gz |
Prepare move yql/minikql YQL-19206
types,jsonpath,dom
commit_hash:6b54be5968b6a30b6d97fe3a1611574bcefc749e
Diffstat (limited to 'yql/essentials/minikql')
58 files changed, 13123 insertions, 0 deletions
diff --git a/yql/essentials/minikql/dom/convert.h b/yql/essentials/minikql/dom/convert.h new file mode 100644 index 0000000000..e562d0381f --- /dev/null +++ b/yql/essentials/minikql/dom/convert.h @@ -0,0 +1,388 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/utils/utf8.h> + +#include <util/string/escape.h> +#include <util/string/cast.h> +#include <util/string/builder.h> + +#include <functional> + +namespace NYql::NDom { + +template<bool Strict, bool AutoConvert> +TUnboxedValuePod ConvertToBool(TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { + switch (GetNodeType(x)) { + case ENodeType::Bool: + return TUnboxedValuePod(x.Get<bool>()); + case ENodeType::String: + if (const std::string_view str = x.AsStringRef(); str == "true") + return TUnboxedValuePod(true); + else if (str == "false") + return TUnboxedValuePod(false); + else if constexpr (AutoConvert) + return TUnboxedValuePod(x.AsStringRef().Size() > 0U); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::Uint64: + if constexpr (AutoConvert) + return TUnboxedValuePod(x.Get<ui64>() != 0ULL); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::Int64: + if constexpr (AutoConvert) + return TUnboxedValuePod(x.Get<i64>() != 0LL); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::Double: + if constexpr (AutoConvert) + return TUnboxedValuePod(x.Get<double>() != 0.); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::Entity: + if constexpr (AutoConvert) + return TUnboxedValuePod(false); + else if constexpr (Strict) + break; + else if constexpr (AutoConvert) + return TUnboxedValuePod(false); + else + return {}; + case ENodeType::List: + if constexpr (AutoConvert) + return TUnboxedValuePod(x.IsBoxed() && x.HasListItems()); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::Dict: + if constexpr (AutoConvert) + return TUnboxedValuePod(x.IsBoxed() && x.HasDictItems()); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::Attr: + return ConvertToBool<Strict, AutoConvert>(x.GetVariantItem().Release(), valueBuilder, pos); + } + + UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Cannot parse boolean value from " << TDebugPrinter(x)).c_str()); +} + +template<typename TDst, typename TSrc> +constexpr inline bool InBounds(const TSrc v) { + if constexpr (std::is_same<TSrc, TDst>()) + return true; + if constexpr (sizeof(TSrc) > sizeof(TDst)) + if constexpr (std::is_signed<TSrc>()) + return v <= TSrc(std::numeric_limits<TDst>::max()) && v >= TSrc(std::numeric_limits<TDst>::min()); + else + return v <= TSrc(std::numeric_limits<TDst>::max()); + else + if constexpr (std::is_signed<TSrc>()) + return v >= TSrc(std::numeric_limits<TDst>::min()); + else + return v <= TSrc(std::numeric_limits<TDst>::max()); + static_assert(sizeof(TSrc) >= sizeof(TDst), "Expects wide to short."); +} + +template<bool Strict, bool AutoConvert, typename TargetType> +TUnboxedValuePod ConvertToIntegral(TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { + switch (GetNodeType(x)) { + case ENodeType::Int64: { + const auto s = x.Get<i64>(); + if constexpr (AutoConvert) + return TUnboxedValuePod(TargetType(s)); + else if (InBounds<TargetType>(s)) + return TUnboxedValuePod(TargetType(s)); + else if constexpr (Strict) + break; + else + return {}; + } + case ENodeType::Uint64: { + const auto u = x.Get<ui64>(); + if constexpr (AutoConvert) + return TUnboxedValuePod(TargetType(u)); + else if (InBounds<TargetType>(u)) + return TUnboxedValuePod(TargetType(u)); + else if constexpr (Strict) + break; + else + return {}; + } + case ENodeType::Bool: + if constexpr (AutoConvert) + return TUnboxedValuePod(TargetType(x.Get<bool>() ? 1 : 0)); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::Double: + if constexpr (AutoConvert) + return TUnboxedValuePod(TargetType(x.Get<double>())); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::String: + if constexpr (AutoConvert) + return TUnboxedValuePod(FromStringWithDefault(std::string_view(x.AsStringRef()), TargetType(0))); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::Entity: + if constexpr (AutoConvert) + return TUnboxedValuePod::Zero(); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::List: + if constexpr (AutoConvert) + return TUnboxedValuePod::Zero(); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::Dict: + if constexpr (AutoConvert) + return TUnboxedValuePod::Zero(); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::Attr: + return ConvertToIntegral<Strict, AutoConvert, TargetType>(x.GetVariantItem().Release(), valueBuilder, pos); + } + + UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Cannot parse integer value from " << TDebugPrinter(x)).c_str()); + static_assert(std::is_integral<TargetType>(), "Expect integral."); +} + +template<bool Strict, bool AutoConvert, typename TargetType> +TUnboxedValuePod ConvertToFloat(TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { + switch (GetNodeType(x)) { + case ENodeType::Double: + return TUnboxedValuePod(TargetType(x.Get<double>())); + case ENodeType::Uint64: + return TUnboxedValuePod(TargetType(x.Get<ui64>())); + case ENodeType::Int64: + return TUnboxedValuePod(TargetType(x.Get<i64>())); + case ENodeType::Bool: + if constexpr (AutoConvert) + return TUnboxedValuePod(x.Get<bool>() ? TargetType(1) : TargetType(0)); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::String: + if constexpr (AutoConvert) + return TUnboxedValuePod(FromStringWithDefault(std::string_view(x.AsStringRef()), TargetType(0))); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::Entity: + if constexpr (AutoConvert) + return TUnboxedValuePod(TargetType(0)); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::List: + if constexpr (AutoConvert) + return TUnboxedValuePod(TargetType(0)); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::Dict: + if constexpr (AutoConvert) + return TUnboxedValuePod(TargetType(0)); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::Attr: + return ConvertToFloat<Strict, AutoConvert, TargetType>(x.GetVariantItem().Release(), valueBuilder, pos); + } + + UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Cannot parse floating point value from " << TDebugPrinter(x)).c_str()); + static_assert(std::is_floating_point<TargetType>(), "Expect float."); +} + +template<bool Strict, bool AutoConvert, bool Utf8> +TUnboxedValuePod ConvertToString(TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { + switch (GetNodeType(x)) { + case ENodeType::String: + if constexpr (Utf8) + if (IsUtf8(x.AsStringRef())) + return x; + else + if (AutoConvert) + return valueBuilder->NewString(EscapeC(TStringBuf(x.AsStringRef()))).Release(); + else if constexpr (Strict) + break; + else + return {}; + else + return x; + case ENodeType::Uint64: + if constexpr (AutoConvert) + return valueBuilder->NewString(ToString(x.Get<ui64>())).Release(); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::Int64: + if constexpr (AutoConvert) + return valueBuilder->NewString(ToString(x.Get<i64>())).Release(); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::Bool: + if constexpr (AutoConvert) + return x.Get<bool>() ? TUnboxedValuePod::Embedded("true") : TUnboxedValuePod::Embedded("false"); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::Double: + if constexpr (AutoConvert) + return valueBuilder->NewString(::FloatToString(x.Get<double>())).Release(); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::Entity: + case ENodeType::List: + case ENodeType::Dict: + if constexpr (AutoConvert) + return TUnboxedValuePod::Embedded(""); + else if constexpr (Strict) + break; + else + return {}; + case ENodeType::Attr: + return ConvertToString<Strict, AutoConvert, Utf8>(x.GetVariantItem().Release(), valueBuilder, pos); + } + + UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Cannot parse string value from " << TDebugPrinter(x)).c_str()); +} + +class TLazyConveter : public TManagedBoxedValue { +public: + using TConverter = std::function<TUnboxedValuePod(TUnboxedValuePod)>; + + TLazyConveter(TUnboxedValue&& original, TConverter&& converter) + : Original(std::move(original)), Converter(std::move(converter)) + {} +private: + template <bool NoSwap> + class TIterator: public TManagedBoxedValue { + public: + TIterator(TUnboxedValue&& original, const TConverter& converter) + : Original(std::move(original)), Converter(converter) + {} + + private: + bool Skip() final { + return Original.Skip(); + } + + bool Next(TUnboxedValue& value) final { + if (Original.Next(value)) { + if constexpr (!NoSwap) { + value = Converter(value.Release()); + } + return true; + } + return false; + } + + bool NextPair(TUnboxedValue& key, TUnboxedValue& payload) final { + if (Original.NextPair(key, payload)) { + if constexpr (NoSwap) { + payload = Converter(payload.Release()); + } else { + key = Converter(key.Release()); + } + return true; + } + return false; + } + + const TUnboxedValue Original; + const TConverter Converter; + }; + + ui64 GetDictLength() const final { + return Original.GetDictLength(); + } + + ui64 GetListLength() const final { + return Original.GetListLength(); + } + + bool HasFastListLength() const final { + return Original.HasFastListLength(); + } + + bool HasDictItems() const final { + return Original.HasDictItems(); + } + + bool HasListItems() const final { + return Original.HasListItems(); + } + + TUnboxedValue GetListIterator() const final { + return TUnboxedValuePod(new TIterator<false>(Original.GetListIterator(), Converter)); + } + + TUnboxedValue GetDictIterator() const final { + return TUnboxedValuePod(new TIterator<true>(Original.GetDictIterator(), Converter)); + } + + TUnboxedValue GetKeysIterator() const final { + return TUnboxedValuePod(new TIterator<true>(Original.GetKeysIterator(), Converter)); + } + + TUnboxedValue GetPayloadsIterator() const { + return TUnboxedValuePod(new TIterator<false>(Original.GetPayloadsIterator(), Converter)); + } + + bool Contains(const TUnboxedValuePod& key) const final { + return Original.Contains(key); + } + + TUnboxedValue Lookup(const TUnboxedValuePod& key) const final { + if (auto lookup = Original.Lookup(key)) { + return Converter(lookup.Release().GetOptionalValue()).MakeOptional(); + } + return {}; + } + + bool IsSortedDict() const final { + return Original.IsSortedDict(); + } + +private: + const TUnboxedValue Original; + const TConverter Converter; +}; + +} diff --git a/yql/essentials/minikql/dom/hash.cpp b/yql/essentials/minikql/dom/hash.cpp new file mode 100644 index 0000000000..ba4d8d0146 --- /dev/null +++ b/yql/essentials/minikql/dom/hash.cpp @@ -0,0 +1,151 @@ +#include "node.h" +#include "hash.h" + +#include <yql/essentials/public/udf/udf_type_ops.h> + +namespace NYql::NDom { + +using namespace NUdf; + +namespace { + +THashType HashList(const NUdf::TUnboxedValuePod x) { + THashType hash = 0ULL; + if (x.IsBoxed()) { + if (const auto elements = x.GetElements()) { + const auto size = x.GetListLength(); + for (ui32 i = 0U; i < size; ++i) { + hash = CombineHashes(hash, HashDom(elements[i])); + } + } else { + const auto it = x.GetListIterator(); + for (TUnboxedValue v; it.Next(v); hash = CombineHashes(hash, HashDom(v))) + continue; + } + } + return hash; +} + +THashType HashDict(const NUdf::TUnboxedValuePod x) { + THashType hash = 0ULL; + if (x.IsBoxed()) { + const auto it = x.GetDictIterator(); + for (TUnboxedValue k, v; it.NextPair(k, v);) { + hash = CombineHashes(hash, CombineHashes(GetStringHash(k), HashDom(v))); + } + } + return hash; +} + +bool EquateLists(const NUdf::TUnboxedValuePod x, const NUdf::TUnboxedValuePod y) { + if (x.IsBoxed() && y.IsBoxed()) { + const auto ex = x.GetElements(); + const auto ey = y.GetElements(); + if (ex && ey) { + const auto size = x.GetListLength(); + if (size != y.GetListLength()) { + return false; + } + for (ui32 i = 0U; i < size; ++i) { + if (!EquateDoms(ex[i], ey[i])) + return false; + } + } else { + const auto itx = x.GetListIterator(); + const auto ity = y.GetListIterator(); + for (TUnboxedValue vx, vy; itx.Next(vx);) { + if (!ity.Next(vy)) + return false; + if (!EquateDoms(vx, vy)) + return false; + } + } + return true; + } + return x.IsBoxed() == y.IsBoxed(); +} + +bool EquateDicts(const NUdf::TUnboxedValuePod x, const NUdf::TUnboxedValuePod y) { + if (x.IsBoxed() && y.IsBoxed()) { + const auto size = x.GetDictLength(); + if (size != y.GetDictLength()) { + return false; + } + + const auto xr = static_cast<const TPair*>(x.GetResource()); + const auto yr = static_cast<const TPair*>(y.GetResource()); + // clone dict as attrnode + if (xr && yr) { + for (ui32 i = 0U; i < size; ++i) { + if (!EquateStrings(xr[i].first, yr[i].first)) + return false; + if (!EquateDoms(xr[i].second, yr[i].second)) + return false; + } + } else { + const auto it = x.GetDictIterator(); + for (TUnboxedValue k, v; it.NextPair(k, v);) { + if (auto l = y.Lookup(k)) + if (EquateDoms(v, l.GetOptionalValue())) + continue; + return false; + } + + } + return true; + } + return x.IsBoxed() == y.IsBoxed(); +} + +} + +THashType HashDom(const NUdf::TUnboxedValuePod x) { + switch (const auto type = GetNodeType(x); type) { + case ENodeType::Double: + return CombineHashes(THashType(type), GetFloatHash<double>(x)); + case ENodeType::Uint64: + return CombineHashes(THashType(type), GetIntegerHash<ui64>(x)); + case ENodeType::Int64: + return CombineHashes(THashType(type), GetIntegerHash<i64>(x)); + case ENodeType::Bool: + return CombineHashes(THashType(type), std::hash<bool>()(x.Get<bool>())); + case ENodeType::String: + return CombineHashes(THashType(type), GetStringHash(x)); + case ENodeType::Entity: + return CombineHashes(THashType(type), THashType(~0ULL)); + case ENodeType::List: + return CombineHashes(THashType(type), HashList(x)); + case ENodeType::Dict: + return CombineHashes(THashType(type), HashDict(x)); + case ENodeType::Attr: + return CombineHashes(THashType(type), CombineHashes(HashDict(x), HashDom(x.GetVariantItem().Release()))); + } +} + +bool EquateDoms(const NUdf::TUnboxedValuePod x, const NUdf::TUnboxedValuePod y) { + if (const auto type = GetNodeType(x); type == GetNodeType(y)) { + switch (type) { + case ENodeType::Double: + return EquateFloats<double>(x, y); + case ENodeType::Uint64: + return EquateIntegers<ui64>(x, y); + case ENodeType::Int64: + return EquateIntegers<i64>(x, y); + case ENodeType::Bool: + return x.Get<bool>() == y.Get<bool>(); + case ENodeType::String: + return EquateStrings(x, y); + case ENodeType::Entity: + return true; + case ENodeType::List: + return EquateLists(x, y); + case ENodeType::Dict: + return EquateDicts(x, y); + case ENodeType::Attr: + return EquateDicts(x, y) && EquateDoms(x.GetVariantItem().Release(), y.GetVariantItem().Release()); + } + } + return false; +} + +} diff --git a/yql/essentials/minikql/dom/hash.h b/yql/essentials/minikql/dom/hash.h new file mode 100644 index 0000000000..870435ff4e --- /dev/null +++ b/yql/essentials/minikql/dom/hash.h @@ -0,0 +1,13 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_types.h> +#include <yql/essentials/public/udf/udf_type_ops.h> + +namespace NYql::NDom { + +NUdf::THashType HashDom(const NUdf::TUnboxedValuePod value); + +bool EquateDoms(const NUdf::TUnboxedValuePod lhs, const NUdf::TUnboxedValuePod rhs); + +} + diff --git a/yql/essentials/minikql/dom/json.cpp b/yql/essentials/minikql/dom/json.cpp new file mode 100644 index 0000000000..a29d044adf --- /dev/null +++ b/yql/essentials/minikql/dom/json.cpp @@ -0,0 +1,349 @@ +#include "json.h" +#include "node.h" + +#include <library/cpp/containers/stack_vector/stack_vec.h> + +#include <library/cpp/json/json_reader.h> +#include <library/cpp/json/json_writer.h> + +#include <util/stream/input.h> +#include <util/stream/str.h> +#include <util/generic/stack.h> +#include <util/system/yassert.h> +#include <util/system/compiler.h> + +#include <cmath> +#include <ctype.h> + +namespace NYql::NDom { + +using namespace NUdf; +using namespace NJson; + +namespace { + +size_t AsciiSize(const TStringBuf& str) { + size_t s = 0U; + while (s < str.size() && isascii(str[s])) + ++s; + return s; +} + +TString EncodeUtf(const TStringBuf& str, size_t from) +{ + TString result(str.substr(0, from)); + while (from < str.size()) { + const auto c = str[from++]; + if (isascii(c)) { + result.append(c); + } else { + result.append((c >> '\x06') & '\x03' | '\xC0'); + result.append(c & '\x3F' | '\x80'); + } + } + + return result; +} + +TString DecodeUtf(const TStringBuf& str, size_t from) +{ + TString result(str); + auto i = from; + while (from < str.size()) { + const auto c = str[from++]; + if (isascii(c)) { + result[i++] = c; + } else if ((c & '\xFC') == '\xC0') { + result[i++] = ((c & '\x03') << '\x06') | (str[from++] & '\x3F'); + } else { + ythrow yexception() << "Unicode symbols with codes greater than 255 are not supported."; + } + } + result.resize(i); + return result; +} + +template<bool DecodeUtf8> +class TDomCallbacks : public TJsonCallbacks { +public: + TDomCallbacks(const IValueBuilder* valueBuilder, bool throwException) + : TJsonCallbacks(throwException) + , ValueBuilder(valueBuilder) + { + Result.push({}); + } + + bool OnNull() override { + return PushToCurrentCollection(MakeEntity()); + } + + bool OnBoolean(bool value) override { + return PushToCurrentCollection(MakeBool(value)); + } + + bool OnInteger(long long value) override { + return PushToCurrentCollection(MakeInt64(static_cast<i64>(value))); + } + + bool OnUInteger(unsigned long long value) override { + return PushToCurrentCollection(MakeUint64(static_cast<ui64>(value))); + } + + bool OnDouble(double value) override { + if (Y_UNLIKELY(std::isinf(value))) { + ythrow yexception() << "JSON number is infinite"; + } + + return PushToCurrentCollection(MakeDouble(value)); + } + + bool OnString(const TStringBuf& value) override { + if constexpr (DecodeUtf8) { + if (const auto from = AsciiSize(value); from < value.size()) { + return PushToCurrentCollection(MakeString(DecodeUtf(value, from), ValueBuilder)); + } + } + return PushToCurrentCollection(MakeString(value, ValueBuilder)); + } + + bool OnOpenMap() override { + return OnCollectionOpen(); + } + + bool OnMapKey(const TStringBuf& value) override { + return OnString(value); + } + + bool OnCloseMap() override { + Y_DEBUG_ABORT_UNLESS(!Result.empty()); + auto& items = Result.top(); + Y_DEBUG_ABORT_UNLESS(items.size() % 2 == 0); + + TSmallVec<TPair, TStdAllocatorForUdf<TPair>> pairs; + for (size_t i = 0; i < items.size(); i += 2) { + pairs.emplace_back(std::move(items[i]), std::move(items[i + 1])); + } + + Result.pop(); + return PushToCurrentCollection(MakeDict(pairs.data(), pairs.size())); + } + + bool OnOpenArray() override { + return OnCollectionOpen(); + } + + bool OnCloseArray() override { + Y_DEBUG_ABORT_UNLESS(!Result.empty()); + auto& items = Result.top(); + TUnboxedValue list = MakeList(items.data(), items.size(), ValueBuilder); + Result.pop(); + return PushToCurrentCollection(std::move(list)); + } + + bool OnEnd() override { + return IsResultSingle(); + } + + TUnboxedValue GetResult() && { + Y_DEBUG_ABORT_UNLESS(IsResultSingle()); + return std::move(Result.top()[0]); + } + +private: + bool OnCollectionOpen() { + Result.emplace(); + return true; + } + + bool PushToCurrentCollection(TUnboxedValue&& value) { + Y_DEBUG_ABORT_UNLESS(!Result.empty()); + Result.top().emplace_back(std::move(value)); + return true; + } + + bool IsResultSingle() { + return Result.size() == 1 && Result.top().size() == 1; + } + + const IValueBuilder* ValueBuilder; + + using TUnboxedValues = TSmallVec<TUnboxedValue, TStdAllocatorForUdf<TUnboxedValue>>; + std::stack<TUnboxedValues, TSmallVec<TUnboxedValues, TStdAllocatorForUdf<TUnboxedValues>>> Result; +}; + +class TTestCallbacks : public TJsonCallbacks { +public: + TTestCallbacks() + : TJsonCallbacks(false) + {} + + bool OnNull() final { return true; } + + bool OnBoolean(bool) final { return true; } + + bool OnInteger(long long) final { return true; } + + bool OnUInteger(unsigned long long) final { return true; } + + bool OnDouble(double value) final { return !std::isinf(value); } + + bool OnString(const TStringBuf&) final { return true; } + + bool OnOpenMap() final { return true; } + + bool OnMapKey(const TStringBuf&) final { return true; } + + bool OnCloseMap() final { return true; } + + bool OnOpenArray() final { return true; } + + bool OnCloseArray() final { return true; } + + bool OnEnd() final { + if (HasResult) + return false; + + return HasResult = true; + } + + private: + bool HasResult = false; +}; + +bool IsEntity(const TUnboxedValuePod value) { + switch (GetNodeType(value)) { + case ENodeType::Entity: return true; + case ENodeType::Attr: return IsEntity(value.GetVariantItem().Release()); + default: return false; + } +} + +template<bool SkipMapEntity, bool EncodeUtf8> +void WriteValue(const TUnboxedValuePod value, TJsonWriter& writer); + +template<bool SkipMapEntity, bool EncodeUtf8> +void WriteArray(const TUnboxedValuePod value, TJsonWriter& writer) { + writer.OpenArray(); + if (value.IsBoxed()) { + if (const auto elements = value.GetElements()) { + const auto size = value.GetListLength(); + for (ui64 i = 0; i < size; ++i) { + WriteValue<SkipMapEntity, EncodeUtf8>(elements[i], writer); + } + } else { + const auto it = value.GetListIterator(); + for (TUnboxedValue v; it.Next(v); WriteValue<SkipMapEntity, EncodeUtf8>(v, writer)) + continue; + } + } + writer.CloseArray(); +} + +template<bool SkipMapEntity, bool EncodeUtf8> +void WriteMap(const TUnboxedValuePod value, TJsonWriter& writer) { + writer.OpenMap(); + if (value.IsBoxed()) { + TUnboxedValue key, payload; + for (const auto it = value.GetDictIterator(); it.NextPair(key, payload);) { + if constexpr (SkipMapEntity) + if (IsEntity(payload)) + continue; + const TStringBuf str = key.AsStringRef(); + if constexpr (EncodeUtf8) + if (const auto from = AsciiSize(str); from < str.size()) + writer.WriteKey(EncodeUtf(str, from)); + else + writer.WriteKey(str); + else + writer.WriteKey(str); + WriteValue<SkipMapEntity, EncodeUtf8>(payload, writer); + } + } + writer.CloseMap(); +} + +template<bool SkipMapEntity, bool EncodeUtf8> +void WriteValue(const TUnboxedValuePod value, TJsonWriter& writer) { + switch (GetNodeType(value)) { + case ENodeType::String: { + const TStringBuf str = value.AsStringRef(); + if constexpr (EncodeUtf8) { + if (const auto from = AsciiSize(str); from < str.size()) { + return writer.Write(EncodeUtf(str, from)); + } + } + return writer.Write(str); + } + case ENodeType::Bool: + return writer.Write(value.Get<bool>()); + case ENodeType::Int64: + return writer.Write(value.Get<i64>()); + case ENodeType::Uint64: + return writer.Write(value.Get<ui64>()); + case ENodeType::Double: + return writer.Write(value.Get<double>()); + case ENodeType::Entity: + return writer.WriteNull(); + case ENodeType::List: + return WriteArray<SkipMapEntity, EncodeUtf8>(value, writer); + case ENodeType::Dict: + return WriteMap<SkipMapEntity, EncodeUtf8>(value, writer); + case ENodeType::Attr: + writer.OpenMap(); + writer.WriteKey("$attributes"); + WriteMap<SkipMapEntity, EncodeUtf8>(value, writer); + writer.WriteKey("$value"); + WriteValue<SkipMapEntity, EncodeUtf8>(value.GetVariantItem().Release(), writer); + writer.CloseMap(); + } +} + +} + +bool IsValidJson(const TStringBuf json) { + TMemoryInput input(json.data(), json.size()); + TTestCallbacks callbacks; + return ReadJson(&input, &callbacks); +} + +TUnboxedValue TryParseJsonDom(const TStringBuf json, const IValueBuilder* valueBuilder, bool dencodeUtf8) { + TMemoryInput input(json.data(), json.size()); + if (dencodeUtf8) { + TDomCallbacks<true> callbacks(valueBuilder, /* throwException */ true); + if (!ReadJson(&input, &callbacks)) { + UdfTerminate("Internal error: parser error occurred but corresponding callback was not called"); + } + return std::move(callbacks).GetResult(); + } else { + TDomCallbacks<false> callbacks(valueBuilder, /* throwException */ true); + if (!ReadJson(&input, &callbacks)) { + UdfTerminate("Internal error: parser error occurred but corresponding callback was not called"); + } + return std::move(callbacks).GetResult(); + } +} + +TString SerializeJsonDom(const NUdf::TUnboxedValuePod dom, bool skipMapEntity, bool encodeUtf8, bool writeNanAsString) { + TStringStream output; + TJsonWriterConfig config; + + config.SetFormatOutput(false); + config.WriteNanAsString = writeNanAsString; + + config.FloatToStringMode = EFloatToStringMode::PREC_AUTO; + TJsonWriter writer(&output, config); + if (skipMapEntity) + if (encodeUtf8) + WriteValue<true, true>(dom, writer); + else + WriteValue<true, false>(dom, writer); + else + if (encodeUtf8) + WriteValue<false, true>(dom, writer); + else + WriteValue<false, false>(dom, writer); + writer.Flush(); + return output.Str(); +} + +} diff --git a/yql/essentials/minikql/dom/json.h b/yql/essentials/minikql/dom/json.h new file mode 100644 index 0000000000..ea95807de8 --- /dev/null +++ b/yql/essentials/minikql/dom/json.h @@ -0,0 +1,14 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> + +namespace NYql::NDom { + +bool IsValidJson(const TStringBuf json); + +NUdf::TUnboxedValue TryParseJsonDom(const TStringBuf json, const NUdf::IValueBuilder* valueBuilder, bool decodeUtf8 = false); + +TString SerializeJsonDom(const NUdf::TUnboxedValuePod dom, bool skipMapEntity = false, bool encodeUtf8 = false, bool writeNanAsString = false); + +} diff --git a/yql/essentials/minikql/dom/make.cpp b/yql/essentials/minikql/dom/make.cpp new file mode 100644 index 0000000000..ca6864f759 --- /dev/null +++ b/yql/essentials/minikql/dom/make.cpp @@ -0,0 +1,170 @@ +#include "make.h" +#include "node.h" +#include "yson.h" +#include "json.h" + +#include <yql/essentials/public/udf/udf_type_inspection.h> + +#include <util/string/builder.h> + +namespace NYql::NDom { +using namespace NUdf; + +namespace { + +TUnboxedValuePod MakeData(const TDataTypeId nodeType, const TUnboxedValuePod value, const IValueBuilder* valueBuilder) { + switch (nodeType) { + case TDataType<char*>::Id: return value; + case TDataType<TUtf8>::Id: return value; + case TDataType<bool>::Id: return SetNodeType<ENodeType::Bool>(value); + case TDataType<i8>::Id: return SetNodeType<ENodeType::Int64>(TUnboxedValuePod(i64(value.Get<i8>()))); + case TDataType<i16>::Id: return SetNodeType<ENodeType::Int64>(TUnboxedValuePod(i64(value.Get<i16>()))); + case TDataType<i32>::Id: return SetNodeType<ENodeType::Int64>(TUnboxedValuePod(i64(value.Get<i32>()))); + case TDataType<i64>::Id: return SetNodeType<ENodeType::Int64>(value); + case TDataType<ui8>::Id: return SetNodeType<ENodeType::Uint64>(TUnboxedValuePod(ui64(value.Get<ui8>()))); + case TDataType<ui16>::Id: return SetNodeType<ENodeType::Uint64>(TUnboxedValuePod(ui64(value.Get<ui16>()))); + case TDataType<ui32>::Id: return SetNodeType<ENodeType::Uint64>(TUnboxedValuePod(ui64(value.Get<ui32>()))); + case TDataType<ui64>::Id: return SetNodeType<ENodeType::Uint64>(value); + case TDataType<float>::Id: return SetNodeType<ENodeType::Double>(TUnboxedValuePod(double(value.Get<float>()))); + case TDataType<double>::Id: return SetNodeType<ENodeType::Double>(value); + case TDataType<TYson>::Id: return TryParseYsonDom(value.AsStringRef(), valueBuilder).Release(); + case TDataType<TJson>::Id: return TryParseJsonDom(value.AsStringRef(), valueBuilder).Release(); + default: break; + } + + Y_ABORT("Unsupported data type."); +} + +TUnboxedValuePod MakeList(const ITypeInfoHelper* typeHelper, const TType* itemType, const TUnboxedValuePod value, const IValueBuilder* valueBuilder) { + if (const auto elements = value.GetElements()) { + if (const auto size = value.GetListLength()) { + TUnboxedValue* items = nullptr; + auto res = valueBuilder->NewArray(size, items); + for (ui64 i = 0ULL; i < size; ++i) { + *items++ = MakeDom(typeHelper, itemType, elements[i], valueBuilder); + } + return SetNodeType<ENodeType::List>(res.Release()); + } + } else { + TSmallVec<TUnboxedValue> items; + if (value.HasFastListLength()) { + items.reserve(value.GetListLength()); + } + const auto iterator = value.GetListIterator(); + for (TUnboxedValue current; iterator.Next(current);) { + items.emplace_back(MakeDom(typeHelper, itemType, current, valueBuilder)); + } + if (!items.empty()) { + auto res = valueBuilder->NewList(items.data(), items.size()); + return SetNodeType<ENodeType::List>(res.Release()); + } + } + + return SetNodeType<ENodeType::List>(TUnboxedValuePod::Void()); +} + +TUnboxedValuePod MakeDict(const ITypeInfoHelper* typeHelper, const TType* itemType, const TUnboxedValuePod value, const IValueBuilder* valueBuilder) { + TSmallVec<TPair, TStdAllocatorForUdf<TPair>> items; + items.reserve(value.GetDictLength()); + const auto it = value.GetDictIterator(); + for (TUnboxedValue x, y; it.NextPair(x, y);) { + items.emplace_back(x, MakeDom(typeHelper, itemType, y, valueBuilder)); + } + + if (items.empty()) { + return SetNodeType<ENodeType::Dict>(TUnboxedValuePod::Void()); + } + + return SetNodeType<ENodeType::Dict>(TUnboxedValuePod(new TMapNode(items.data(), items.size()))); +} + +TUnboxedValuePod MakeTuple(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder) { + if (const auto tupleTypeInspector = TTupleTypeInspector(*typeHelper, shape); const auto size = tupleTypeInspector.GetElementsCount()) { + TUnboxedValue* items = nullptr; + auto res = valueBuilder->NewArray(size, items); + for (ui64 i = 0ULL; i < size; ++i) { + *items++ = MakeDom(typeHelper, tupleTypeInspector.GetElementType(i), static_cast<const TUnboxedValuePod&>(value.GetElement(i)), valueBuilder); + } + return SetNodeType<ENodeType::List>(res.Release()); + } + + return SetNodeType<ENodeType::List>(TUnboxedValuePod::Void()); +} + +TUnboxedValuePod MakeStruct(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder) { + if (const auto structTypeInspector = TStructTypeInspector(*typeHelper, shape); const auto size = structTypeInspector.GetMembersCount()) { + TSmallVec<TPair, TStdAllocatorForUdf<TPair>> items; + items.reserve(size); + + for (ui64 i = 0ULL; i < size; ++i) { + items.emplace_back( + valueBuilder->NewString(structTypeInspector.GetMemberName(i)), + MakeDom(typeHelper, structTypeInspector.GetMemberType(i), static_cast<const TUnboxedValuePod&>(value.GetElement(i)), valueBuilder) + ); + } + + return SetNodeType<ENodeType::Dict>(TUnboxedValuePod(new TMapNode(items.data(), items.size()))); + } + + return SetNodeType<ENodeType::Dict>(TUnboxedValuePod::Void()); +} + +TUnboxedValuePod MakeVariant(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder) { + const auto index = value.GetVariantIndex(); + const auto& item = value.GetVariantItem(); + const auto underlyingType = TVariantTypeInspector(*typeHelper, shape).GetUnderlyingType(); + switch (const auto kind = typeHelper->GetTypeKind(underlyingType)) { + case ETypeKind::Tuple: + if (const auto tupleTypeInspector = TTupleTypeInspector(*typeHelper, underlyingType); index < tupleTypeInspector.GetElementsCount()) + return MakeDom(typeHelper, tupleTypeInspector.GetElementType(index), item, valueBuilder); + break; + case ETypeKind::Struct: + if (const auto structTypeInspector = TStructTypeInspector(*typeHelper, underlyingType); index < structTypeInspector.GetMembersCount()) + return MakeDom(typeHelper, structTypeInspector.GetMemberType(index), item, valueBuilder); + break; + default: + break; + } + Y_ABORT("Unsupported underlying type."); +} + +} + +TUnboxedValuePod MakeDom(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder) { + switch (const auto kind = typeHelper->GetTypeKind(shape)) { + case ETypeKind::Null: + return MakeEntity(); + case ETypeKind::EmptyList: + return SetNodeType<ENodeType::List>(TUnboxedValuePod::Void()); + case ETypeKind::EmptyDict: + return SetNodeType<ENodeType::Dict>(TUnboxedValuePod::Void()); + case ETypeKind::Data: + return MakeData(TDataTypeInspector(*typeHelper, shape).GetTypeId(), value, valueBuilder); + case ETypeKind::Optional: + return value ? MakeDom(typeHelper, TOptionalTypeInspector(*typeHelper, shape).GetItemType(), value.GetOptionalValue(), valueBuilder) : MakeEntity(); + case ETypeKind::List: + return MakeList(typeHelper, TListTypeInspector(*typeHelper, shape).GetItemType(), value, valueBuilder); + case ETypeKind::Dict: { + const auto dictTypeInspector = TDictTypeInspector(*typeHelper, shape); + const auto keyType = dictTypeInspector.GetKeyType(); + Y_ABORT_UNLESS(ETypeKind::Data == typeHelper->GetTypeKind(keyType), "Unsupported dict key type kind."); + const auto keyId = TDataTypeInspector(*typeHelper, keyType).GetTypeId(); + Y_ABORT_UNLESS(keyId == TDataType<char*>::Id || keyId == TDataType<TUtf8>::Id, "Unsupported dict key data type."); + return MakeDict(typeHelper, dictTypeInspector.GetValueType(), value, valueBuilder); + } + case ETypeKind::Tuple: + return MakeTuple(typeHelper, shape, value, valueBuilder); + case ETypeKind::Struct: + return MakeStruct(typeHelper, shape, value, valueBuilder); + case ETypeKind::Variant: + return MakeVariant(typeHelper, shape, value, valueBuilder); + case ETypeKind::Resource: + if (const auto inspector = TResourceTypeInspector(*typeHelper, shape); TStringBuf(inspector.GetTag()) == NodeResourceName) + return value; + [[fallthrough]]; + default: + Y_ABORT("Unsupported data kind: %s", ToCString(kind)); + } +} + +} diff --git a/yql/essentials/minikql/dom/make.h b/yql/essentials/minikql/dom/make.h new file mode 100644 index 0000000000..48aab89474 --- /dev/null +++ b/yql/essentials/minikql/dom/make.h @@ -0,0 +1,10 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_types.h> +#include <yql/essentials/public/udf/udf_value_builder.h> + +namespace NYql::NDom { + +NUdf::TUnboxedValuePod MakeDom(const NUdf::ITypeInfoHelper* typeHelper, const NUdf::TType* shape, const NUdf::TUnboxedValuePod value, const NUdf::IValueBuilder* valueBuilder); + +} diff --git a/yql/essentials/minikql/dom/node.cpp b/yql/essentials/minikql/dom/node.cpp new file mode 100644 index 0000000000..6eabde3e4e --- /dev/null +++ b/yql/essentials/minikql/dom/node.cpp @@ -0,0 +1,202 @@ +#include "node.h" + +#include <util/generic/algorithm.h> + +namespace NYql::NDom { + +namespace { + +inline bool StringLess(const TPair& x, const TPair& y) { + return x.first.AsStringRef() < y.first.AsStringRef(); +} + +inline bool StringRefLess(const TPair& x, const TStringRef& y) { + return x.first.AsStringRef() < y; +} + +inline bool StringEquals(const TPair& x, const TPair& y) { + return x.first.AsStringRef() == y.first.AsStringRef(); +} + +} + +template <bool NoSwap> +TMapNode::TIterator<NoSwap>::TIterator(const TMapNode* parent) + : Parent(const_cast<TMapNode*>(parent)) + , Index(-1) +{} + +template <bool NoSwap> +bool TMapNode::TIterator<NoSwap>::Skip() { + if (Index + 1 == Parent->UniqueCount_) { + return false; + } + + ++Index; + return true; +} + +template <bool NoSwap> +bool TMapNode::TIterator<NoSwap>::Next(TUnboxedValue& key) { + if (!Skip()) + return false; + if constexpr (NoSwap) { + key = Parent->Items_[Index].first; + } else { + key = Parent->Items_[Index].second; + } + return true; +} + +template <bool NoSwap> +bool TMapNode::TIterator<NoSwap>::NextPair(TUnboxedValue& key, TUnboxedValue& payload) { + if (!Next(key)) + return false; + if constexpr (NoSwap) { + payload = Parent->Items_[Index].second; + } else { + payload = Parent->Items_[Index].first; + } + return true; +} + +TMapNode::TMapNode(TMapNode&& src) + : Count_(src.Count_), UniqueCount_(src.UniqueCount_), Items_(src.Items_) +{ + src.Count_ = src.UniqueCount_ = 0U; + src.Items_ = nullptr; +} + +TMapNode::TMapNode(const TPair* items, ui32 count) + : Count_(count) + , Items_((TPair*)UdfAllocateWithSize(sizeof(TPair) * count)) +{ + std::memset(Items_, 0, sizeof(TPair) * count); + for (ui32 i = 0; i < count; ++i) { + Items_[i] = std::move(items[i]); + } + + StableSort(Items_, Items_ + count, StringLess); + UniqueCount_ = Unique(Items_, Items_ + count, StringEquals) - Items_; + for (ui32 i = UniqueCount_; i < count; ++i) { + Items_[i].first.Clear(); + Items_[i].second.Clear(); + } +} + +TMapNode::~TMapNode() { + for (ui32 i = 0; i < UniqueCount_; ++i) { + Items_[i].first.Clear(); + Items_[i].second.Clear(); + } + + UdfFreeWithSize(Items_, sizeof(TPair) * Count_); +} + +ui64 TMapNode::GetDictLength() const { + return UniqueCount_; +} + +TUnboxedValue TMapNode::GetDictIterator() const { + return TUnboxedValuePod(new TIterator<true>(this)); +} + +TUnboxedValue TMapNode::GetKeysIterator() const { + return TUnboxedValuePod(new TIterator<true>(this)); +} + +TUnboxedValue TMapNode::GetPayloadsIterator() const { + return TUnboxedValuePod(new TIterator<false>(this)); +} + +bool TMapNode::Contains(const TUnboxedValuePod& key) const { + return BinarySearch(Items_, Items_ + UniqueCount_, std::make_pair(key, TUnboxedValuePod()), StringLess); +} + +TUnboxedValue TMapNode::Lookup(const TUnboxedValuePod& key) const { + return Lookup(key.AsStringRef()); +} + +TUnboxedValue TMapNode::Lookup(const TStringRef& key) const { + const auto it = LowerBound(Items_, Items_ + UniqueCount_, key, StringRefLess); + if (it == Items_ + UniqueCount_ || static_cast<TStringBuf>(it->first.AsStringRef()) != static_cast<TStringBuf>(key)) + return {}; + + return it->second; +} + +bool TMapNode::HasDictItems() const { + return UniqueCount_ > 0ULL; +} + +bool TMapNode::IsSortedDict() const { + return true; +} + +void* TMapNode::GetResource() { + return Items_; +} + +TAttrNode::TAttrNode(const TUnboxedValue& map, TUnboxedValue&& value) + : TMapNode(std::move(*static_cast<TMapNode*>(map.AsBoxed().Get()))), Value_(std::move(value)) +{} + +TAttrNode::TAttrNode(TUnboxedValue&& value, const TPair* items, ui32 count) + : TMapNode(items, count), Value_(std::move(value)) +{} + +TUnboxedValue TAttrNode::GetVariantItem() const { + return Value_; +} + +TDebugPrinter::TDebugPrinter(const TUnboxedValuePod& node) + : Node(node) +{} + +IOutputStream& TDebugPrinter::Out(IOutputStream &o) const { + switch (GetNodeType(Node)) { + case ENodeType::Entity: + o << "entity (#)"; + break; + case ENodeType::Bool: + o << "boolean (" << (Node.Get<bool>() ? "true" : "false") << ") value"; + break; + case ENodeType::Int64: + o << "integer (" << Node.Get<i64>() << ") value"; + break; + case ENodeType::Uint64: + o << "unsigned integer (" << Node.Get<ui64>() << ") value"; + break; + case ENodeType::Double: + o << "floating point (" << Node.Get<double>() << ") value"; + break; + case ENodeType::String: + if (const std::string_view str(Node.AsStringRef()); str.empty()) + o << "empty string"; + else if(Node.IsEmbedded() && str.cend() == std::find_if(str.cbegin(), str.cend(), [](char c){ return !std::isprint(c); })) + o << "string '" << str << "' value"; + else + o << "string value of size " << str.size(); + break; + case ENodeType::List: + if (Node.IsBoxed()) + o << "list of size " << Node.GetListLength(); + else + o << "empty list"; + break; + case ENodeType::Dict: + if (Node.IsBoxed()) + o << "dict of size " << Node.GetDictLength(); + else + o << "empty dict"; + break; + case ENodeType::Attr: + return TDebugPrinter(Node.GetVariantItem()).Out(o); + default: + o << "invalid node"; + break; + } + return o; +} + +} diff --git a/yql/essentials/minikql/dom/node.h b/yql/essentials/minikql/dom/node.h new file mode 100644 index 0000000000..04a211fd09 --- /dev/null +++ b/yql/essentials/minikql/dom/node.h @@ -0,0 +1,167 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_value.h> + +namespace NYql::NDom { + +using namespace NUdf; + +constexpr char NodeResourceName[] = "Yson2.Node"; + +using TPair = std::pair<TUnboxedValue, TUnboxedValue>; + +enum class ENodeType : ui8 { + String = 0, + Bool = 1, + Int64 = 2, + Uint64 = 3, + Double = 4, + Entity = 5, + List = 6, + Dict = 7, + Attr = 8, +}; + +constexpr ui8 NodeTypeShift = 4; +constexpr ui8 NodeTypeMask = 0xf0; + +template<ENodeType type> +constexpr inline TUnboxedValuePod SetNodeType(TUnboxedValuePod node) { + const auto buffer = reinterpret_cast<ui8*>(&node); + buffer[TUnboxedValuePod::InternalBufferSize] = ui8(type) << NodeTypeShift; + return node; +} + +template<ENodeType type> +constexpr inline bool IsNodeType(const TUnboxedValuePod node) { + const auto buffer = reinterpret_cast<const ui8*>(&node); + const auto currentMask = buffer[TUnboxedValuePod::InternalBufferSize] & NodeTypeMask; + constexpr ui8 expectedMask = static_cast<ui8>(type) << NodeTypeShift; + return currentMask == expectedMask; +} + +inline ENodeType GetNodeType(const TUnboxedValuePod& node) { + const auto* buffer = reinterpret_cast<const char*>(&node); + const ui8 flag = (buffer[TUnboxedValuePod::InternalBufferSize] & NodeTypeMask) >> NodeTypeShift; + return static_cast<ENodeType>(flag); +} + +inline bool IsNodeType(const TUnboxedValuePod& node, ENodeType type) { + const auto* buffer = reinterpret_cast<const char*>(&node); + const ui8 currentMask = buffer[TUnboxedValuePod::InternalBufferSize] & NodeTypeMask; + const ui8 expectedMask = static_cast<ui8>(type) << NodeTypeShift; + return currentMask == expectedMask; +} + +class TMapNode : public TManagedBoxedValue { +public: + template <bool NoSwap> + class TIterator: public TManagedBoxedValue { + public: + TIterator(const TMapNode* parent); + + private: + bool Skip() final; + bool Next(TUnboxedValue& key) final; + bool NextPair(TUnboxedValue& key, TUnboxedValue& payload) final; + + const TRefCountedPtr<TMapNode> Parent; + ui32 Index; + }; + + TMapNode(const TPair* items, ui32 count); + + TMapNode(TMapNode&& src); + + ~TMapNode(); + + TUnboxedValue Lookup(const TStringRef& key) const; +private: + ui64 GetDictLength() const final; + + TUnboxedValue GetDictIterator() const final; + + TUnboxedValue GetKeysIterator() const final; + + TUnboxedValue GetPayloadsIterator() const final; + + bool Contains(const TUnboxedValuePod& key) const final; + + TUnboxedValue Lookup(const TUnboxedValuePod& key) const final; + + bool HasDictItems() const final; + + bool IsSortedDict() const final; + + void* GetResource() final; + + ui32 Count_; + ui32 UniqueCount_; + TPair * Items_; +}; + +class TAttrNode : public TMapNode { +public: + TAttrNode(const TUnboxedValue& map, NUdf::TUnboxedValue&& value); + + TAttrNode(NUdf::TUnboxedValue&& value, const TPair* items, ui32 count); + + NUdf::TUnboxedValue GetVariantItem() const final; + +private: + const NUdf::TUnboxedValue Value_; +}; + +inline TUnboxedValuePod MakeAttr(TUnboxedValue&& value, TPair* items, ui32 count) { + if (count == 0) { + return value.Release(); + } + + return SetNodeType<ENodeType::Attr>(TUnboxedValuePod(new TAttrNode(std::move(value), items, count))); +} + +inline TUnboxedValuePod MakeString(const TStringBuf value, const IValueBuilder* valueBuilder) { + return valueBuilder->NewString(value).Release(); +} + +inline TUnboxedValuePod MakeBool(bool value) { + return SetNodeType<ENodeType::Bool>(TUnboxedValuePod(value)); +} + +inline TUnboxedValuePod MakeInt64(i64 value) { + return SetNodeType<ENodeType::Int64>(TUnboxedValuePod(value)); +} + +inline TUnboxedValuePod MakeUint64(ui64 value) { + return SetNodeType<ENodeType::Uint64>(TUnboxedValuePod(value)); +} + +inline TUnboxedValuePod MakeDouble(double value) { + return SetNodeType<ENodeType::Double>(TUnboxedValuePod(value)); +} + +inline TUnboxedValuePod MakeEntity() { + return SetNodeType<ENodeType::Entity>(TUnboxedValuePod::Zero()); +} + +inline TUnboxedValuePod MakeList(TUnboxedValue* items, ui32 count, const IValueBuilder* valueBuilder) { + return SetNodeType<ENodeType::List>(count > 0U ? valueBuilder->NewList(items, count).Release() : TUnboxedValuePod::Zero()); +} + +inline TUnboxedValuePod MakeDict(const TPair* items, ui32 count) { + return SetNodeType<ENodeType::Dict>(count > 0U ? TUnboxedValuePod(new TMapNode(items, count)) : TUnboxedValuePod::Zero()); +} + +struct TDebugPrinter { + TDebugPrinter(const TUnboxedValuePod& node); + class IOutputStream& Out(class IOutputStream &o) const; + const TUnboxedValuePod& Node; +}; + +} + +template<> +inline void Out<NYql::NDom::TDebugPrinter>(class IOutputStream &o, const NYql::NDom::TDebugPrinter& p) { + p.Out(o); +} diff --git a/yql/essentials/minikql/dom/peel.cpp b/yql/essentials/minikql/dom/peel.cpp new file mode 100644 index 0000000000..7508bfe6f8 --- /dev/null +++ b/yql/essentials/minikql/dom/peel.cpp @@ -0,0 +1,373 @@ +#include "peel.h" +#include "node.h" +#include "yson.h" +#include "json.h" +#include "convert.h" + +#include <yql/essentials/public/udf/udf_type_inspection.h> +#include <yql/essentials/public/udf/udf_type_printer.h> + +namespace NYql::NDom { +using namespace NUdf; + +namespace { + +template<bool Strict, bool AutoConvert> +TUnboxedValuePod PeelData(const TDataTypeId nodeType, const TUnboxedValuePod value, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { + switch (nodeType) { + case TDataType<char*>::Id: return ConvertToString<Strict, AutoConvert, false>(value, valueBuilder, pos); + case TDataType<TUtf8>::Id: return ConvertToString<Strict, AutoConvert, true>(value, valueBuilder, pos); + case TDataType<bool>::Id: return ConvertToBool<Strict, AutoConvert>(value, valueBuilder, pos); + case TDataType<i8>::Id: return ConvertToIntegral<Strict, AutoConvert, i8>(value, valueBuilder, pos); + case TDataType<i16>::Id: return ConvertToIntegral<Strict, AutoConvert, i16>(value, valueBuilder, pos); + case TDataType<i32>::Id: return ConvertToIntegral<Strict, AutoConvert, i32>(value, valueBuilder, pos); + case TDataType<i64>::Id: return ConvertToIntegral<Strict, AutoConvert, i64>(value, valueBuilder, pos); + case TDataType<ui8>::Id: return ConvertToIntegral<Strict, AutoConvert, ui8>(value, valueBuilder, pos); + case TDataType<ui16>::Id: return ConvertToIntegral<Strict, AutoConvert, ui16>(value, valueBuilder, pos); + case TDataType<ui32>::Id: return ConvertToIntegral<Strict, AutoConvert, ui32>(value, valueBuilder, pos); + case TDataType<ui64>::Id: return ConvertToIntegral<Strict, AutoConvert, ui64>(value, valueBuilder, pos); + case TDataType<float>::Id: return ConvertToFloat<Strict, AutoConvert, float>(value, valueBuilder, pos); + case TDataType<double>::Id: return ConvertToFloat<Strict, AutoConvert, double>(value, valueBuilder, pos); + case TDataType<TYson>::Id: return valueBuilder->NewString(SerializeYsonDomToBinary(value)).Release(); + case TDataType<TJson>::Id: return valueBuilder->NewString(SerializeJsonDom(value)).Release(); + default: break; + } + + UdfTerminate((::TStringBuilder() << "Unsupported data type: " << static_cast<int>(nodeType)).c_str()); +} + +template<bool Strict, bool AutoConvert> +TUnboxedValuePod TryPeelDom(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder, const TSourcePosition& pos); + +template<bool Strict, bool AutoConvert> +TUnboxedValuePod PeelList(const ITypeInfoHelper* typeHelper, const TType* itemType, const TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { + switch (GetNodeType(x)) { + case ENodeType::List: { + if (!x.IsBoxed()) + break; + if constexpr (Strict || AutoConvert) { + return TUnboxedValuePod(new TLazyConveter(x, std::bind(&PeelDom<Strict, AutoConvert>, typeHelper, itemType, std::placeholders::_1, valueBuilder, pos))); + } + TSmallVec<TUnboxedValue, TUnboxedValue::TAllocator> values; + if (const auto elements = x.GetElements()) { + const auto size = x.GetListLength(); + values.reserve(size); + for (ui32 i = 0U; i < size; ++i) { + if (const auto item = TryPeelDom<Strict, AutoConvert>(typeHelper, itemType, elements[i], valueBuilder, pos)) + values.emplace_back(item.GetOptionalValue()); + else if constexpr (Strict) + UdfTerminate("Error on convert list item."); + } + } else { + const auto it = x.GetListIterator(); + for (TUnboxedValue v; it.Next(v);) { + if (const auto item = TryPeelDom<Strict, AutoConvert>(typeHelper, itemType, v, valueBuilder, pos)) + values.emplace_back(item.GetOptionalValue()); + else if constexpr (Strict) + UdfTerminate("Error on convert list item."); + } + } + if (values.empty()) { + break; + } + return valueBuilder->NewList(values.data(), values.size()).Release(); + } + case ENodeType::Attr: + return PeelList<Strict, AutoConvert>(typeHelper, itemType, x.GetVariantItem().Release(), valueBuilder, pos); + default: + if constexpr (AutoConvert) + break; + else if constexpr (Strict) + UdfTerminate("Cannot parse list from entity, scalar value or dict."); + else + return {}; + } + + return valueBuilder->NewEmptyList().Release(); +} + +template<bool Strict, bool AutoConvert, bool Utf8Keys> +TUnboxedValuePod PeelDict(const ITypeInfoHelper* typeHelper, const TType* itemType, const TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { + switch (GetNodeType(x)) { + case ENodeType::Dict: + if (!x.IsBoxed()) + break; + if constexpr (!Utf8Keys && (Strict || AutoConvert)) { + return TUnboxedValuePod(new TLazyConveter(x, std::bind(&PeelDom<Strict, AutoConvert>, typeHelper, itemType, std::placeholders::_1, valueBuilder, pos))); + } + if (const auto size = x.GetDictLength()) { + TSmallVec<TPair, TStdAllocatorForUdf<TPair>> pairs; + pairs.reserve(size); + const auto it = x.GetDictIterator(); + for (TUnboxedValue key, payload; it.NextPair(key, payload);) { + if (const auto k = ConvertToString<Strict, AutoConvert, Utf8Keys>(key.Release(), valueBuilder, pos)) { + if (const auto item = TryPeelDom<Strict, AutoConvert>(typeHelper, itemType, payload, valueBuilder, pos)) { + pairs.emplace_back(std::move(k), item.GetOptionalValue()); + continue; + } + } + + if constexpr (Strict) + UdfTerminate("Error on convert dict payload."); + } + if (pairs.empty()) { + break; + } + return TUnboxedValuePod(new TMapNode(pairs.data(), pairs.size())); + } + break; + case ENodeType::Attr: + return PeelDict<Strict, AutoConvert, Utf8Keys>(typeHelper, itemType, x.GetVariantItem().Release(), valueBuilder, pos); + default: + if constexpr (AutoConvert) + break; + else if constexpr (Strict) + UdfTerminate("Cannot parse dict from entity, scalar value or list."); + else + return {}; + } + + return valueBuilder->NewEmptyList().Release(); +} + +TUnboxedValuePod MakeStub(const ITypeInfoHelper* typeHelper, const TType* shape, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { + switch (const auto kind = typeHelper->GetTypeKind(shape)) { + case ETypeKind::Optional: + return TUnboxedValuePod(); + case ETypeKind::Data: + switch (const auto nodeType = TDataTypeInspector(*typeHelper, shape).GetTypeId()) { + case TDataType<char*>::Id: + case TDataType<TUtf8>::Id: + case TDataType<bool>::Id: + case TDataType<i8>::Id: + case TDataType<i16>::Id: + case TDataType<i32>::Id: + case TDataType<i64>::Id: + case TDataType<ui8>::Id: + case TDataType<ui16>::Id: + case TDataType<ui32>::Id: + case TDataType<ui64>::Id: + case TDataType<float>::Id: + case TDataType<double>::Id: + case TDataType<TDecimal>::Id: + return TUnboxedValuePod::Zero(); + case TDataType<TYson>::Id: + return TUnboxedValuePod::Embedded("#"); + case TDataType<TJson>::Id: + return TUnboxedValuePod::Embedded("null"); + default: + UdfTerminate((::TStringBuilder() << "Unsupported data type: " << static_cast<int>(nodeType)).c_str()); + } + case ETypeKind::Tuple: + if (const auto tupleTypeInspector = TTupleTypeInspector(*typeHelper, shape); auto count = tupleTypeInspector.GetElementsCount()) { + TUnboxedValue* items = nullptr; + auto result = valueBuilder->NewArray(count, items); + items += count; + do *--items = MakeStub(typeHelper, tupleTypeInspector.GetElementType(--count), valueBuilder, pos); + while (count); + return result.Release(); + } + return valueBuilder->NewEmptyList().Release(); + case ETypeKind::Struct: + if (const auto structTypeInspector = TStructTypeInspector(*typeHelper, shape); auto count = structTypeInspector.GetMembersCount()) { + TUnboxedValue* items = nullptr; + auto result = valueBuilder->NewArray(count, items); + items += count; + do *--items = MakeStub(typeHelper, structTypeInspector.GetMemberType(--count), valueBuilder, pos); + while (count); + return result.Release(); + } + return valueBuilder->NewEmptyList().Release(); + case ETypeKind::List: + case ETypeKind::Dict: + return valueBuilder->NewEmptyList().Release(); + case ETypeKind::Resource: + if (const auto inspector = TResourceTypeInspector(*typeHelper, shape); TStringBuf(inspector.GetTag()) == NodeResourceName) + return MakeEntity(); + [[fallthrough]]; + default: + UdfTerminate((::TStringBuilder() << "Unsupported data kind: " << kind).c_str()); + } +} + +template<bool Strict, bool AutoConvert> +TUnboxedValuePod PeelTuple(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { + if (const auto tupleTypeInspector = TTupleTypeInspector(*typeHelper, shape); auto count = tupleTypeInspector.GetElementsCount()) { + switch (GetNodeType(x)) { + case ENodeType::List: { + TUnboxedValue* items = nullptr; + auto result = valueBuilder->NewArray(count, items); + ui32 i = 0U; + if (x.IsBoxed()) { + if (auto elements = x.GetElements()) { + for (auto size = x.GetListLength(); count && size--; --count) { + if (const auto item = TryPeelDom<Strict, AutoConvert>(typeHelper, tupleTypeInspector.GetElementType(i++), *elements++, valueBuilder, pos)) + *items++ = item.GetOptionalValue(); + else if constexpr (Strict) + UdfTerminate("Error on convert tuple item."); + else + return {}; + } + } else if (const auto it = x.GetListIterator()) { + for (TUnboxedValue v; count && it.Next(v); --count) { + if (const auto item = TryPeelDom<Strict, AutoConvert>(typeHelper, tupleTypeInspector.GetElementType(i++), v, valueBuilder, pos)) + *items++ = item.GetOptionalValue(); + else if constexpr (Strict) + UdfTerminate("Error on convert tuple item."); + else + return {}; + } + } + } + if (count) do + if constexpr (AutoConvert) + *items++ = MakeStub(typeHelper, tupleTypeInspector.GetElementType(i++), valueBuilder, pos); + else if (ETypeKind::Optional == typeHelper->GetTypeKind(tupleTypeInspector.GetElementType(i++))) + ++items; + else if constexpr (Strict) + UdfTerminate((::TStringBuilder() << "DOM list has less items then " << tupleTypeInspector.GetElementsCount() << " tuple elements.").c_str()); + else + return {}; + while (--count); + return result.Release(); + } + case ENodeType::Attr: + return PeelTuple<Strict, AutoConvert>(typeHelper, shape, x.GetVariantItem().Release(), valueBuilder, pos); + default: + if constexpr (AutoConvert) { + TUnboxedValue* items = nullptr; + auto result = valueBuilder->NewArray(count, items); + for (ui32 i = 0ULL; i < count; ++i) + if (ETypeKind::Optional != typeHelper->GetTypeKind(tupleTypeInspector.GetElementType(i))) + *items++ = MakeStub(typeHelper, tupleTypeInspector.GetElementType(i), valueBuilder, pos); + else + ++items; + return result.Release(); + } else if constexpr (Strict) + UdfTerminate("Cannot parse tuple from entity, scalar value or dict."); + else + break; + } + } + + return {}; +} + +template<bool Strict, bool AutoConvert> +TUnboxedValuePod PeelStruct(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { + if (const auto structTypeInspector = TStructTypeInspector(*typeHelper, shape)) { + const auto size = structTypeInspector.GetMembersCount(); + switch (GetNodeType(x)) { + case ENodeType::Dict: { + TUnboxedValue* items = nullptr; + auto result = valueBuilder->NewArray(size, items); + for (ui32 i = 0ULL; i < size; ++i) { + if (x.IsBoxed()) { + if (const auto v = x.Lookup(valueBuilder->NewString(structTypeInspector.GetMemberName(i)))) { + if (const auto item = TryPeelDom<Strict, AutoConvert>(typeHelper, structTypeInspector.GetMemberType(i), v.GetOptionalValue(), valueBuilder, pos)) + *items++ = item.GetOptionalValue(); + else if constexpr (Strict) + UdfTerminate((::TStringBuilder() << "Error on convert struct member '" << structTypeInspector.GetMemberName(i) << "'.").c_str()); + else + return {}; + continue; + } + } + if constexpr (AutoConvert) + *items++ = MakeStub(typeHelper, structTypeInspector.GetMemberType(i), valueBuilder, pos); + else if (ETypeKind::Optional == typeHelper->GetTypeKind(structTypeInspector.GetMemberType(i))) + ++items; + else if constexpr (Strict) + UdfTerminate((::TStringBuilder() << "Missed struct member '" << structTypeInspector.GetMemberName(i) << "'.").c_str()); + else + return {}; + } + return result.Release(); + } + case ENodeType::Attr: + return PeelStruct<Strict, AutoConvert>(typeHelper, shape, x.GetVariantItem().Release(), valueBuilder, pos); + default: + if constexpr (AutoConvert) { + TUnboxedValue* items = nullptr; + auto result = valueBuilder->NewArray(size, items); + for (ui32 i = 0ULL; i < size; ++i) + if (ETypeKind::Optional != typeHelper->GetTypeKind(structTypeInspector.GetMemberType(i))) + *items++ = MakeStub(typeHelper, structTypeInspector.GetMemberType(i), valueBuilder, pos); + else + ++items; + return result.Release(); + } else if constexpr (Strict) + UdfTerminate("Cannot parse struct from entity, scalar value or list."); + else + break; + } + } + + return {}; +} + +template<bool Strict, bool AutoConvert> +TUnboxedValuePod PeelOptional(const ITypeInfoHelper* typeHelper, const TType* itemType, const TUnboxedValuePod value, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { + if (IsNodeType<ENodeType::Entity>(value)) + return TUnboxedValuePod().MakeOptional(); + + if (const auto result = TryPeelDom<Strict, AutoConvert>(typeHelper, itemType, value, valueBuilder, pos); AutoConvert || result) + return result; + else if constexpr (Strict) + UdfTerminate("Failed to convert Yson DOM."); + else + return TUnboxedValuePod().MakeOptional(); +} + +template<bool Strict, bool AutoConvert> +TUnboxedValuePod TryPeelDom(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { + switch (const auto kind = typeHelper->GetTypeKind(shape)) { + case ETypeKind::Data: + return PeelData<Strict, AutoConvert>(TDataTypeInspector(*typeHelper, shape).GetTypeId(), value, valueBuilder, pos); + case ETypeKind::Optional: + return PeelOptional<Strict, AutoConvert>(typeHelper, TOptionalTypeInspector(*typeHelper, shape).GetItemType(), value, valueBuilder, pos); + case ETypeKind::List: + return PeelList<Strict, AutoConvert>(typeHelper, TListTypeInspector(*typeHelper, shape).GetItemType(), value, valueBuilder, pos); + case ETypeKind::Dict: { + const auto dictTypeInspector = TDictTypeInspector(*typeHelper, shape); + const auto keyType = dictTypeInspector.GetKeyType(); + if (const auto keyKind = typeHelper->GetTypeKind(keyType); ETypeKind::Data == keyKind) + switch (const auto keyId = TDataTypeInspector(*typeHelper, keyType).GetTypeId()) { + case TDataType<char*>::Id: return PeelDict<Strict, AutoConvert, false>(typeHelper, dictTypeInspector.GetValueType(), value, valueBuilder, pos); + case TDataType<TUtf8>::Id: return PeelDict<Strict, AutoConvert, true>(typeHelper, dictTypeInspector.GetValueType(), value, valueBuilder, pos); + default: UdfTerminate((::TStringBuilder() << "Unsupported dict key type: " << keyId).c_str()); + } + else + UdfTerminate((::TStringBuilder() << "Unsupported dict key kind: " << keyKind).c_str()); + } + case ETypeKind::Tuple: + return PeelTuple<Strict, AutoConvert>(typeHelper, shape, value, valueBuilder, pos); + case ETypeKind::Struct: + return PeelStruct<Strict, AutoConvert>(typeHelper, shape, value, valueBuilder, pos); + case ETypeKind::Resource: + if (const auto inspector = TResourceTypeInspector(*typeHelper, shape); TStringBuf(inspector.GetTag()) == NodeResourceName) + return value; + [[fallthrough]]; // AUTOGENERATED_FALLTHROUGH_FIXME + default: + UdfTerminate((::TStringBuilder() << "Unsupported data kind: " << kind).c_str()); + } +} + +} + +template<bool Strict, bool AutoConvert> +TUnboxedValuePod PeelDom(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { + if (const auto result = TryPeelDom<Strict, AutoConvert>(typeHelper, shape, value, valueBuilder, pos)) + return result.GetOptionalValue(); + ::TStringBuilder sb; + sb << "Failed to convert Yson DOM into strict type: "; + TTypePrinter(*typeHelper, shape).Out(sb.Out); + UdfTerminate(sb.c_str()); +} + +template TUnboxedValuePod PeelDom<true, true>(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder, const TSourcePosition& pos); +template TUnboxedValuePod PeelDom<false, true>(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder, const TSourcePosition& pos); +template TUnboxedValuePod PeelDom<true, false>(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder, const TSourcePosition& pos); +template TUnboxedValuePod PeelDom<false, false>(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder, const TSourcePosition& pos); + +} diff --git a/yql/essentials/minikql/dom/peel.h b/yql/essentials/minikql/dom/peel.h new file mode 100644 index 0000000000..6e0dab1002 --- /dev/null +++ b/yql/essentials/minikql/dom/peel.h @@ -0,0 +1,11 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_types.h> +#include <yql/essentials/public/udf/udf_value_builder.h> + +namespace NYql::NDom { + +template<bool Strict, bool AutoConvert> +NUdf::TUnboxedValuePod PeelDom(const NUdf::ITypeInfoHelper* typeHelper, const NUdf::TType* shape, const NUdf::TUnboxedValuePod value, const NUdf::IValueBuilder* valueBuilder, const NUdf::TSourcePosition& pos); + +} diff --git a/yql/essentials/minikql/dom/ut/json_ut.cpp b/yql/essentials/minikql/dom/ut/json_ut.cpp new file mode 100644 index 0000000000..7184f3507f --- /dev/null +++ b/yql/essentials/minikql/dom/ut/json_ut.cpp @@ -0,0 +1,2028 @@ +#include <yql/essentials/minikql/dom/json.h> + +#include <library/cpp/testing/unittest/registar.h> +#include <contrib/ydb/library/yql/minikql/mkql_alloc.h> +#include <contrib/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h> +#include <contrib/ydb/library/yql/minikql/computation/mkql_value_builder.h> + +using namespace NYql; +using namespace NYql::NDom; +using namespace NKikimr; + +constexpr char json[] = +R"( +{ + "Fullname": [ + { + "freqs": { + "sum_qf@de": 28, + "sum_qf@en": 8, + "sum_qf@ru": 10060, + "sum_qf@tr": 91, + "sum_qf@uk": 245, + "sum_qf@uz": 6 + }, + "src": [ + { + "c": "ltr" + } + ], + "value": "Татьяна Сорокина" + } + ], + "Gender": [ + { + "src": [ + { + "c": "yam", + "is_guessed": "True" + }, + { + "c": "scm", + "is_guessed": "True" + }, + { + "c": "ltr", + "is_guessed": "True" + }, + { + "c": "lbr", + "is_guessed": "True" + } + ], + "value": "female" + } + ], + "Image": [ + { + "RelevLocale": [ + "universe" + ], + "avatar_type": "face", + "color_wiz": { + "back": "#DBC4B5", + "button": "#BFAC9E", + "button_text": "#23211E", + "text": "#705549" + }, + "faces_count": 1, + "langua": [ + "uk", + "by", + "kk", + "ru" + ], + "mds_avatar_id": "2001742/402534297", + "original_size": { + "height": 1478, + "width": 1478 + }, + "show_on_serp": true, + "src": [ + { + "url": "http://music.yandex.ru/artist/7945920", + "url_type": "page", + "value": "yam" + } + ], + "thumb": "Face", + "type": "image", + "url": "//avatars.yandex.net/get-music-content/113160/26f40ebf.a.8459289-1/orig", + "value": "//avatars.yandex.net/get-music-content/113160/26f40ebf.a.8459289-1/orig" + } + ], + "ImageSearchRequest": [ + { + "RelevLocale": [ + "ru", + "by" + ], + "value": "Сорокина Татьяна фото" + } + ], + "Key": [ + { + "langua": [ + "ru" + ], + "predict": "972", + "rank": 0, + "src": [ + { + "c": "rut" + } + ], + "value": "sorokina tatyana" + }, + { + "freqs": { + "sum_qf@de": 3, + "sum_qf@en": 2, + "sum_qf@ru": 11504, + "sum_qf@tr": 35, + "sum_qf@uk": 145, + "sum_qf@uz": 1 + }, + "langua": [ + "ru" + ], + "src": [ + { + "c": "yam" + }, + { + "c": "ltr" + }, + { + "c": "lbr" + } + ], + "value": "сорокина татьяна" + }, + { + "langua": [ + "ru" + ], + "predict": "931", + "rank": 1, + "src": [ + { + "c": "rut" + } + ], + "value": "tatiana sorokina" + }, + { + "langua": [ + "ru" + ], + "predict": "951", + "rank": 0, + "src": [ + { + "c": "rut" + } + ], + "value": "tatyana sorokina" + }, + { + "freqs": { + "SenseRatio": 0.01, + "SenseRatio@de": 0.5, + "SenseRatio@en": 0.5, + "SenseRatio@et": 0.5, + "SenseRatio@fi": 0.5, + "SenseRatio@id": 0.5, + "SenseRatio@kk": 0.5, + "SenseRatio@lt": 0.5, + "SenseRatio@lv": 0.5, + "SenseRatio@pl": 0.5, + "SenseRatio@ru": 0, + "SenseRatio@tr": 0.5, + "SenseRatio@uk": 0.5, + "SenseRatio@uz": 0.5, + "sum_qf@de": 28, + "sum_qf@en": 8, + "sum_qf@ru": 10060, + "sum_qf@tr": 91, + "sum_qf@uk": 245, + "sum_qf@uz": 6 + }, + "langua": [ + "ru" + ], + "src": [ + { + "c": "scm", + "name": "bookmate.com" + }, + { + "c": "yam" + }, + { + "c": "ltr" + } + ], + "value": "татьяна сорокина" + } + ], + "Projects": [ + { + "Role": [ + "Performer@on" + ], + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845750|Наши дети]]" + } + ], + "freqs": { + "sum_qf": 256643 + }, + "otype": "Music/Recording@on", + "report": "False", + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845750|Наши дети]]" + }, + { + "Role": [ + "MAIN_ARTIST" + ], + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam08459289|Сто дорог, одна – моя]]" + } + ], + "freqs": { + "sum_qf": 54092 + }, + "hint_description": [ + { + "RelevLocale": [ + "ru", + "ua", + "tr", + "by" + ], + "value": "2019" + } + ], + "otype": "Music/Album@on", + "report": "False", + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam08459289|Сто дорог, одна – моя]]" + }, + { + "Role": "Author@on", + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#scm540668de..0|История медицины]]" + } + ], + "freqs": { + "sum_qf": 49611 + }, + "report": "False", + "src": [ + { + "c": "scm" + } + ], + "value": "[[#scm540668de..0|История медицины]]" + }, + { + "Role": "Author@on", + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#scm-3f1fcad4..0|Мыколка]]" + } + ], + "report": "False", + "src": [ + { + "c": "scm" + } + ], + "value": "[[#scm-3f1fcad4..0|Мыколка]]" + }, + { + "Role": [ + "Performer@on" + ], + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845751|100 дорог]]" + } + ], + "freqs": { + "sum_qf": 21522 + }, + "otype": "Music/Recording@on", + "report": "False", + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845751|100 дорог]]" + }, + { + "Role": [ + "Author@on" + ], + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#ltr08920335|Система дистрибуции. Инструменты создания конкурентного преимущества]]" + } + ], + "hint_description": [ + { + "RelevLocale": [ + "ru", + "ua", + "tr", + "by" + ], + "value": "2015" + } + ], + "report": "False", + "src": [ + { + "c": "ltr" + }, + { + "c": "lbr" + } + ], + "value": "[[#ltr08920335]]" + }, + { + "Role": [ + "Author@on" + ], + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#lbrbs464788|Пейп-арт]]" + } + ], + "freqs": { + "sum_qf": 12676 + }, + "report": "False", + "src": [ + { + "c": "lbr" + } + ], + "value": "[[#lbrbs464788]]" + }, + { + "Role": [ + "Author@on" + ], + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#lbrbs137089|Филиальная сеть: развитие и управление]]" + } + ], + "freqs": { + "sum_qf": 21 + }, + "report": "False", + "src": [ + { + "c": "lbr" + } + ], + "value": "[[#lbrbs137089]]" + }, + { + "Role": [ + "Author@on" + ], + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#lbrb467470|Система дистрибуции. Инструменты создания конкурентного преимущества]]" + } + ], + "report": "False", + "src": [ + { + "c": "lbr", + "f": "book" + } + ], + "value": "[[#lbrb467470|Система дистрибуции. Инструменты создания конкурентного преимущества]]" + }, + { + "Role": [ + "Author@on" + ], + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#lbrb464788|Пейп-арт]]" + } + ], + "freqs": { + "sum_qf": 12676 + }, + "report": "False", + "src": [ + { + "c": "lbr", + "f": "book" + } + ], + "value": "[[#lbrb464788|Пейп-арт]]" + }, + { + "Role": [ + "Artist@on" + ], + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#lbrb274279|Что сначала,что потом?]]" + } + ], + "freqs": { + "sum_qf": 15 + }, + "report": "False", + "src": [ + { + "c": "lbr", + "f": "book" + } + ], + "value": "[[#lbrb274279|Что сначала,что потом?]]" + }, + { + "Role": [ + "Author@on" + ], + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#lbrb137089|Филиальная сеть: развитие и управление]]" + } + ], + "freqs": { + "sum_qf": 21 + }, + "report": "False", + "src": [ + { + "c": "lbr", + "f": "book" + } + ], + "value": "[[#lbrb137089|Филиальная сеть: развитие и управление]]" + }, + { + "Role": [ + "Performer@on" + ], + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845752|Храни его]]" + } + ], + "otype": "Music/Recording@on", + "report": "False", + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845752|Храни его]]" + }, + { + "Role": [ + "Performer@on" + ], + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845753|Удача]]" + } + ], + "freqs": { + "sum_qf": 1431963 + }, + "otype": "Music/Recording@on", + "report": "False", + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845753|Удача]]" + }, + { + "Role": [ + "Performer@on" + ], + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845754|Матушка Россия]]" + } + ], + "freqs": { + "sum_qf": 34699 + }, + "otype": "Music/Recording@on", + "report": "False", + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845754|Матушка Россия]]" + }, + { + "Role": [ + "Performer@on" + ], + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845755|Нежданное свидание]]" + } + ], + "otype": "Music/Recording@on", + "report": "False", + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845755|Нежданное свидание]]" + }, + { + "Role": [ + "Performer@on" + ], + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845756|Я – мама]]" + } + ], + "freqs": { + "sum_qf": 441 + }, + "otype": "Music/Recording@on", + "report": "False", + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845756|Я – мама]]" + }, + { + "Role": [ + "Performer@on" + ], + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845757|Глупый сон]]" + } + ], + "otype": "Music/Recording@on", + "report": "False", + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845757|Глупый сон]]" + }, + { + "Role": [ + "Performer@on" + ], + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845760|Спасибо вам]]" + } + ], + "freqs": { + "sum_qf": 152646 + }, + "otype": "Music/Recording@on", + "report": "False", + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845760|Спасибо вам]]" + }, + { + "Role": [ + "Performer@on" + ], + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845758|С Днём рождения]]" + } + ], + "freqs": { + "sum_qf": 16331217 + }, + "otype": "Music/Recording@on", + "report": "False", + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845758|С Днём рождения]]" + }, + { + "Role": [ + "Performer@on" + ], + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845759|Песенка о мечтах]]" + } + ], + "freqs": { + "sum_qf": 94 + }, + "otype": "Music/Recording@on", + "report": "False", + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845759|Песенка о мечтах]]" + }, + { + "Role": "Author@on", + "carousel": "False", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#scm-1eeb2744..0|Система дистрибуции: Инструменты создания конкурентного преимущества]]" + } + ], + "report": "False", + "src": [ + { + "c": "scm" + } + ], + "value": "[[#scm-1eeb2744..0|Система дистрибуции: Инструменты создания конкурентного преимущества]]" + } + ], + "SearchRequest": [ + { + "RelevLocale": [ + "ru", + "by" + ], + "value": "Сорокина Татьяна" + } + ], + "Title": [ + { + "freqs": { + "sum_qf@de": 3, + "sum_qf@en": 2, + "sum_qf@ru": 11504, + "sum_qf@tr": 35, + "sum_qf@uk": 145, + "sum_qf@uz": 1 + }, + "langua": [ + "ru" + ], + "src": [ + { + "c": "lbr" + } + ], + "value": "Сорокина Татьяна" + }, + { + "RelevLocale": [ + "kz", + "ua", + "by", + "ru" + ], + "freqs": { + "sum_qf@de": 28, + "sum_qf@en": 8, + "sum_qf@ru": 10060, + "sum_qf@tr": 91, + "sum_qf@uk": 245, + "sum_qf@uz": 6 + }, + "langua": [ + "ru" + ], + "src": [ + { + "c": "scm", + "name": "bookmate.com" + }, + { + "c": "yam" + }, + { + "c": "ltr" + } + ], + "value": "Татьяна Сорокина" + } + ], + "TopTracks": [ + { + "Position": 9, + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845757|Глупый сон]]" + } + ], + "langua": [ + "uk", + "ru", + "kk", + "by" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845757|Глупый сон]]" + }, + { + "Position": 8, + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845758|С Днём рождения]]" + } + ], + "langua": [ + "uk", + "ru", + "kk", + "by" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845758|С Днём рождения]]" + }, + { + "Position": 7, + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845759|Песенка о мечтах]]" + } + ], + "langua": [ + "uk", + "ru", + "kk", + "by" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845759|Песенка о мечтах]]" + }, + { + "Position": 6, + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845752|Храни его]]" + } + ], + "langua": [ + "uk", + "ru", + "kk", + "by" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845752|Храни его]]" + }, + { + "Position": 5, + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845753|Удача]]" + } + ], + "langua": [ + "uk", + "ru", + "kk", + "by" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845753|Удача]]" + }, + { + "Position": 4, + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845754|Матушка Россия]]" + } + ], + "langua": [ + "uk", + "ru", + "kk", + "by" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845754|Матушка Россия]]" + }, + { + "Position": 3, + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845755|Нежданное свидание]]" + } + ], + "langua": [ + "uk", + "ru", + "kk", + "by" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845755|Нежданное свидание]]" + }, + { + "Position": 2, + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845750|Наши дети]]" + } + ], + "langua": [ + "uk", + "ru", + "kk", + "by" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845750|Наши дети]]" + }, + { + "Position": 1, + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845751|100 дорог]]" + } + ], + "langua": [ + "uk", + "ru", + "kk", + "by" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845751|100 дорог]]" + }, + { + "Position": 0, + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845760|Спасибо вам]]" + } + ], + "langua": [ + "uk", + "ru", + "kk", + "by" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845760|Спасибо вам]]" + } + ], + "freqs": { + "average_proper_ratio": [ + "1.00" + ], + "proper_ratio": [ + { + "src": [ + { + "c": "yam" + } + ], + "value": "1.00" + } + ], + "sum_qf@de": [ + "31" + ], + "sum_qf@en": [ + "10" + ], + "sum_qf@ru": [ + "21572" + ], + "sum_qf@tr": [ + "126" + ], + "sum_qf@uk": [ + "390" + ], + "sum_qf@uz": [ + "7" + ] + }, + "fullname": [ + { + "freqs": { + "sum_qf@de": 28, + "sum_qf@en": 8, + "sum_qf@ru": 10060, + "sum_qf@tr": 91, + "sum_qf@uk": 245, + "sum_qf@uz": 6 + }, + "rfr": [ + "[[#rfr21731b2]]" + ], + "src": [ + { + "c": "ltr" + } + ], + "value": "Татьяна Сорокина" + } + ], + "human_gender": [ + { + "rfr": [ + "[[#rfr21f0d779]]" + ], + "src": [ + { + "c": "yam", + "is_guessed": "True" + }, + { + "c": "scm", + "is_guessed": "True" + }, + { + "c": "ltr", + "is_guessed": "True" + }, + { + "c": "lbr", + "is_guessed": "True" + } + ], + "value": "female" + } + ], + "ids": [ + { + "src": [ + { + "c": "yam" + } + ], + "value": "http://music.yandex.ru/artist/7945920" + }, + { + "src": [ + { + "c": "ltr" + } + ], + "value": "https://www.litres.ru/4815845" + }, + { + "src": [ + { + "c": "lbr" + } + ], + "value": "http://www.labirint.ru/authors/43298" + } + ], + "isa": { + "Wtype": "Hum", + "otype": [ + { + "src": [ + { + "c": "yam" + }, + { + "c": "scm" + }, + { + "c": "ltr" + }, + { + "c": "lbr" + } + ], + "value": "Hum" + } + ] + }, + "merged_ontoids": [ + "ltr24815845", + "scmbookmatecomh7b363cfd07a49aed419fde3dbd010f64", + "lbrh43298", + "yam17945920" + ], + "musical_artist_groups": [ + { + "Role": [ + "Performer@on" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845750|Наши дети]]" + } + ], + "freqs": { + "sum_qf": 256643 + }, + "otype": "Music/Recording@on", + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845750|Наши дети]]" + }, + { + "Role": [ + "MAIN_ARTIST" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam08459289|Сто дорог, одна – моя]]" + } + ], + "freqs": { + "sum_qf": 54092 + }, + "hint_description": [ + { + "RelevLocale": [ + "ru", + "ua", + "tr", + "by" + ], + "value": "2019" + } + ], + "otype": "Music/Album@on", + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam08459289|Сто дорог, одна – моя]]" + }, + { + "Role": "Author@on", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#scm540668de..0|История медицины]]" + } + ], + "freqs": { + "sum_qf": 49611 + }, + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "scm" + } + ], + "value": "[[#scm540668de..0|История медицины]]" + }, + { + "Role": "Author@on", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#scm-3f1fcad4..0|Мыколка]]" + } + ], + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "scm" + } + ], + "value": "[[#scm-3f1fcad4..0|Мыколка]]" + }, + { + "Role": [ + "Performer@on" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845751|100 дорог]]" + } + ], + "freqs": { + "sum_qf": 21522 + }, + "otype": "Music/Recording@on", + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845751|100 дорог]]" + }, + { + "Role": [ + "Author@on" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#ltr08920335|Система дистрибуции. Инструменты создания конкурентного преимущества]]" + } + ], + "hint_description": [ + { + "RelevLocale": [ + "ru", + "ua", + "tr", + "by" + ], + "value": "2015" + } + ], + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "ltr" + }, + { + "c": "lbr" + } + ], + "value": "[[#ltr08920335]]" + }, + { + "Role": [ + "Author@on" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#lbrbs464788|Пейп-арт]]" + } + ], + "freqs": { + "sum_qf": 12676 + }, + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "lbr" + } + ], + "value": "[[#lbrbs464788]]" + }, + { + "Role": [ + "Author@on" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#lbrbs137089|Филиальная сеть: развитие и управление]]" + } + ], + "freqs": { + "sum_qf": 21 + }, + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "lbr" + } + ], + "value": "[[#lbrbs137089]]" + }, + { + "Role": [ + "Author@on" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#lbrb467470|Система дистрибуции. Инструменты создания конкурентного преимущества]]" + } + ], + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "lbr", + "f": "book" + } + ], + "value": "[[#lbrb467470|Система дистрибуции. Инструменты создания конкурентного преимущества]]" + }, + { + "Role": [ + "Author@on" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#lbrb464788|Пейп-арт]]" + } + ], + "freqs": { + "sum_qf": 12676 + }, + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "lbr", + "f": "book" + } + ], + "value": "[[#lbrb464788|Пейп-арт]]" + }, + { + "Role": [ + "Artist@on" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#lbrb274279|Что сначала,что потом?]]" + } + ], + "freqs": { + "sum_qf": 15 + }, + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "lbr", + "f": "book" + } + ], + "value": "[[#lbrb274279|Что сначала,что потом?]]" + }, + { + "Role": [ + "Author@on" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#lbrb137089|Филиальная сеть: развитие и управление]]" + } + ], + "freqs": { + "sum_qf": 21 + }, + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "lbr", + "f": "book" + } + ], + "value": "[[#lbrb137089|Филиальная сеть: развитие и управление]]" + }, + { + "Role": [ + "Performer@on" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845752|Храни его]]" + } + ], + "otype": "Music/Recording@on", + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845752|Храни его]]" + }, + { + "Role": [ + "Performer@on" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845753|Удача]]" + } + ], + "freqs": { + "sum_qf": 1431963 + }, + "otype": "Music/Recording@on", + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845753|Удача]]" + }, + { + "Role": [ + "Performer@on" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845754|Матушка Россия]]" + } + ], + "freqs": { + "sum_qf": 34699 + }, + "otype": "Music/Recording@on", + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845754|Матушка Россия]]" + }, + { + "Role": [ + "Performer@on" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845755|Нежданное свидание]]" + } + ], + "otype": "Music/Recording@on", + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845755|Нежданное свидание]]" + }, + { + "Role": [ + "Performer@on" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845756|Я – мама]]" + } + ], + "freqs": { + "sum_qf": 441 + }, + "otype": "Music/Recording@on", + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845756|Я – мама]]" + }, + { + "Role": [ + "Performer@on" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845757|Глупый сон]]" + } + ], + "otype": "Music/Recording@on", + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845757|Глупый сон]]" + }, + { + "Role": [ + "Performer@on" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845760|Спасибо вам]]" + } + ], + "freqs": { + "sum_qf": 152646 + }, + "otype": "Music/Recording@on", + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845760|Спасибо вам]]" + }, + { + "Role": [ + "Performer@on" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845758|С Днём рождения]]" + } + ], + "freqs": { + "sum_qf": 16331217 + }, + "otype": "Music/Recording@on", + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845758|С Днём рождения]]" + }, + { + "Role": [ + "Performer@on" + ], + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#yam356845759|Песенка о мечтах]]" + } + ], + "freqs": { + "sum_qf": 94 + }, + "otype": "Music/Recording@on", + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "yam" + } + ], + "value": "[[#yam356845759|Песенка о мечтах]]" + }, + { + "Role": "Author@on", + "formatted": [ + { + "RelevLocale": [ + "ru", + "ua", + "by", + "kz" + ], + "value": "[[#scm-1eeb2744..0|Система дистрибуции: Инструменты создания конкурентного преимущества]]" + } + ], + "rfr": [ + "[[#rfr110390d1]]" + ], + "src": [ + { + "c": "scm" + } + ], + "value": "[[#scm-1eeb2744..0|Система дистрибуции: Инструменты создания конкурентного преимущества]]" + } + ] +} +)"; + +constexpr auto Steps = 10000U; + +Y_UNIT_TEST_SUITE(TJsonTests) { + Y_UNIT_TEST(TestValidate) { + UNIT_ASSERT(IsValidJson(json)); + + UNIT_ASSERT(!IsValidJson("[123}")); + UNIT_ASSERT(!IsValidJson("[123],[456]")); + UNIT_ASSERT(!IsValidJson(R"({"c" : "scm"])")); + UNIT_ASSERT(!IsValidJson("")); + UNIT_ASSERT(!IsValidJson(R"({"c",})")); + UNIT_ASSERT(!IsValidJson(R"({null : "scm"})")); + UNIT_ASSERT(!IsValidJson(R"({'one': 1})")); + } + + Y_UNIT_TEST(TestPerfValidate) { + const auto t = TInstant::Now(); + for (auto i = 0U; i < Steps; ++i) { + UNIT_ASSERT(IsValidJson(json)); + } + const auto time = TInstant::Now() - t; + Cerr << "Time is " << time << Endl; + } + + Y_UNIT_TEST(TestPerfParse) { + NMiniKQL::TScopedAlloc alloc(__LOCATION__); + NMiniKQL::TMemoryUsageInfo memInfo("Memory"); + NMiniKQL::THolderFactory holderFactory(alloc.Ref(), memInfo, nullptr); + NMiniKQL::TDefaultValueBuilder builder(holderFactory); + + std::array<NUdf::TUnboxedValue, Steps> v; + + const auto t = TInstant::Now(); + for (auto& i : v) { + UNIT_ASSERT(i = TryParseJsonDom(json, &builder)); + } + const auto time = TInstant::Now() - t; + Cerr << "Time is " << time << Endl; + } + + Y_UNIT_TEST(TestPerfSerialize) { + NMiniKQL::TScopedAlloc alloc(__LOCATION__); + NMiniKQL::TMemoryUsageInfo memInfo("Memory"); + NMiniKQL::THolderFactory holderFactory(alloc.Ref(), memInfo, nullptr); + NMiniKQL::TDefaultValueBuilder builder(holderFactory); + + const auto dom = TryParseJsonDom(json, &builder); + std::array<NUdf::TUnboxedValue, Steps> v; + + const auto t = TInstant::Now(); + for (auto& i : v) { + UNIT_ASSERT(i = builder.NewString(SerializeJsonDom(dom))); + } + const auto time = TInstant::Now() - t; + Cerr << "Time is " << time << Endl; + } +} diff --git a/yql/essentials/minikql/dom/ut/ya.make b/yql/essentials/minikql/dom/ut/ya.make new file mode 100644 index 0000000000..da77c16342 --- /dev/null +++ b/yql/essentials/minikql/dom/ut/ya.make @@ -0,0 +1,20 @@ +IF (NOT WINDOWS) + UNITTEST_FOR(yql/essentials/minikql/dom) + + SRCS( + yson_ut.cpp + json_ut.cpp + ) + + SIZE(MEDIUM) + + PEERDIR( + contrib/ydb/library/yql/minikql/computation/llvm14 + yql/essentials/public/udf/service/exception_policy + contrib/ydb/library/yql/sql/pg_dummy + ) + + YQL_LAST_ABI_VERSION() + + END() +ENDIF() diff --git a/yql/essentials/minikql/dom/ut/yson_ut.cpp b/yql/essentials/minikql/dom/ut/yson_ut.cpp new file mode 100644 index 0000000000..a91fb70bf2 --- /dev/null +++ b/yql/essentials/minikql/dom/ut/yson_ut.cpp @@ -0,0 +1,2087 @@ +#include <yql/essentials/minikql/dom/yson.h> + +#include <yql/essentials/minikql/dom/json.h> + +#include <library/cpp/testing/unittest/registar.h> +#include <contrib/ydb/library/yql/minikql/mkql_alloc.h> +#include <contrib/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h> +#include <contrib/ydb/library/yql/minikql/computation/mkql_value_builder.h> + +using namespace NYql; +using namespace NYql::NDom; +using namespace NKikimr; + +constexpr char yson[] = +R"( +{ + "Fullname" = [ + { + "freqs" = { + "sum_qf@de" = 28; + "sum_qf@en" = 8; + "sum_qf@ru" = 10060; + "sum_qf@tr" = 91; + "sum_qf@uk" = 245; + "sum_qf@uz" = 6 + }; + "src" = [ + { + "c" = "ltr" + } + ]; + "value" = "Татьяна Сорокина" + } + ]; + "Gender" = [ + { + "src" = [ + { + "c" = "yam"; + "is_guessed" = "True" + }; + { + "c" = "scm"; + "is_guessed" = "True" + }; + { + "c" = "ltr"; + "is_guessed" = "True" + }; + { + "c" = "lbr"; + "is_guessed" = "True" + } + ]; + "value" = "female" + } + ]; + "Image" = [ + { + "RelevLocale" = [ + "universe" + ]; + "avatar_type" = "face"; + "color_wiz" = { + "back" = "#DBC4B5"; + "button" = "#BFAC9E"; + "button_text" = "#23211E"; + "text" = "#705549" + }; + "faces_count" = 1; + "langua" = [ + "uk"; + "by"; + "kk"; + "ru" + ]; + "mds_avatar_id" = "2001742/402534297"; + "original_size" = { + "height" = 1478; + "width" = 1478 + }; + "show_on_serp" = %true; + "src" = [ + { + "url" = "http://music.yandex.ru/artist/7945920"; + "url_type" = "page"; + "value" = "yam" + } + ]; + "thumb" = "Face"; + "type" = "image"; + "url" = "//avatars.yandex.net/get-music-content/113160/26f40ebf.a.8459289-1/orig"; + "value" = "//avatars.yandex.net/get-music-content/113160/26f40ebf.a.8459289-1/orig" + } + ]; + "ImageSearchRequest" = [ + { + "RelevLocale" = [ + "ru"; + "by" + ]; + "value" = "Сорокина Татьяна фото" + } + ]; + "Key" = [ + { + "langua" = [ + "ru" + ]; + "predict" = "972"; + "rank" = 0; + "src" = [ + { + "c" = "rut" + } + ]; + "value" = "sorokina tatyana" + }; + { + "freqs" = { + "sum_qf@de" = 3; + "sum_qf@en" = 2; + "sum_qf@ru" = 11504; + "sum_qf@tr" = 35; + "sum_qf@uk" = 145; + "sum_qf@uz" = 1 + }; + "langua" = [ + "ru" + ]; + "src" = [ + { + "c" = "yam" + }; + { + "c" = "ltr" + }; + { + "c" = "lbr" + } + ]; + "value" = "сорокина татьяна" + }; + { + "langua" = [ + "ru" + ]; + "predict" = "931"; + "rank" = 1; + "src" = [ + { + "c" = "rut" + } + ]; + "value" = "tatiana sorokina" + }; + { + "langua" = [ + "ru" + ]; + "predict" = "951"; + "rank" = 0; + "src" = [ + { + "c" = "rut" + } + ]; + "value" = "tatyana sorokina" + }; + { + "freqs" = { + "SenseRatio" = 0.01; + "SenseRatio@de" = 0.5; + "SenseRatio@en" = 0.5; + "SenseRatio@et" = 0.5; + "SenseRatio@fi" = 0.5; + "SenseRatio@id" = 0.5; + "SenseRatio@kk" = 0.5; + "SenseRatio@lt" = 0.5; + "SenseRatio@lv" = 0.5; + "SenseRatio@pl" = 0.5; + "SenseRatio@ru" = 0; + "SenseRatio@tr" = 0.5; + "SenseRatio@uk" = 0.5; + "SenseRatio@uz" = 0.5; + "sum_qf@de" = 28; + "sum_qf@en" = 8; + "sum_qf@ru" = 10060; + "sum_qf@tr" = 91; + "sum_qf@uk" = 245; + "sum_qf@uz" = 6 + }; + "langua" = [ + "ru" + ]; + "src" = [ + { + "c" = "scm"; + "name" = "bookmate.com" + }; + { + "c" = "yam" + }; + { + "c" = "ltr" + } + ]; + "value" = "татьяна сорокина" + } + ]; + "Projects" = [ + { + "Role" = [ + "Performer@on" + ]; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845750|Наши дети]]" + } + ]; + "freqs" = { + "sum_qf" = 256643 + }; + "otype" = "Music/Recording@on"; + "report" = "False"; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845750|Наши дети]]" + }; + { + "Role" = [ + "MAIN_ARTIST" + ]; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam08459289|Сто дорог, одна – моя]]" + } + ]; + "freqs" = { + "sum_qf" = 54092 + }; + "hint_description" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "tr"; + "by" + ]; + "value" = "2019" + } + ]; + "otype" = "Music/Album@on"; + "report" = "False"; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam08459289|Сто дорог, одна – моя]]" + }; + { + "Role" = "Author@on"; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#scm540668de..0|История медицины]]" + } + ]; + "freqs" = { + "sum_qf" = 49611 + }; + "report" = "False"; + "src" = [ + { + "c" = "scm" + } + ]; + "value" = "[[#scm540668de..0|История медицины]]" + }; + { + "Role" = "Author@on"; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#scm-3f1fcad4..0|Мыколка]]" + } + ]; + "report" = "False"; + "src" = [ + { + "c" = "scm" + } + ]; + "value" = "[[#scm-3f1fcad4..0|Мыколка]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845751|100 дорог]]" + } + ]; + "freqs" = { + "sum_qf" = 21522 + }; + "otype" = "Music/Recording@on"; + "report" = "False"; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845751|100 дорог]]" + }; + { + "Role" = [ + "Author@on" + ]; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#ltr08920335|Система дистрибуции. Инструменты создания конкурентного преимущества]]" + } + ]; + "hint_description" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "tr"; + "by" + ]; + "value" = "2015" + } + ]; + "report" = "False"; + "src" = [ + { + "c" = "ltr" + }; + { + "c" = "lbr" + } + ]; + "value" = "[[#ltr08920335]]" + }; + { + "Role" = [ + "Author@on" + ]; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#lbrbs464788|Пейп-арт]]" + } + ]; + "freqs" = { + "sum_qf" = 12676 + }; + "report" = "False"; + "src" = [ + { + "c" = "lbr" + } + ]; + "value" = "[[#lbrbs464788]]" + }; + { + "Role" = [ + "Author@on" + ]; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#lbrbs137089|Филиальная сеть: развитие и управление]]" + } + ]; + "freqs" = { + "sum_qf" = 21 + }; + "report" = "False"; + "src" = [ + { + "c" = "lbr" + } + ]; + "value" = "[[#lbrbs137089]]" + }; + { + "Role" = [ + "Author@on" + ]; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#lbrb467470|Система дистрибуции. Инструменты создания конкурентного преимущества]]" + } + ]; + "report" = "False"; + "src" = [ + { + "c" = "lbr"; + "f" = "book" + } + ]; + "value" = "[[#lbrb467470|Система дистрибуции. Инструменты создания конкурентного преимущества]]" + }; + { + "Role" = [ + "Author@on" + ]; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#lbrb464788|Пейп-арт]]" + } + ]; + "freqs" = { + "sum_qf" = 12676 + }; + "report" = "False"; + "src" = [ + { + "c" = "lbr"; + "f" = "book" + } + ]; + "value" = "[[#lbrb464788|Пейп-арт]]" + }; + { + "Role" = [ + "Artist@on" + ]; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#lbrb274279|Что сначала,что потом?]]" + } + ]; + "freqs" = { + "sum_qf" = 15 + }; + "report" = "False"; + "src" = [ + { + "c" = "lbr"; + "f" = "book" + } + ]; + "value" = "[[#lbrb274279|Что сначала,что потом?]]" + }; + { + "Role" = [ + "Author@on" + ]; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#lbrb137089|Филиальная сеть: развитие и управление]]" + } + ]; + "freqs" = { + "sum_qf" = 21 + }; + "report" = "False"; + "src" = [ + { + "c" = "lbr"; + "f" = "book" + } + ]; + "value" = "[[#lbrb137089|Филиальная сеть: развитие и управление]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845752|Храни его]]" + } + ]; + "otype" = "Music/Recording@on"; + "report" = "False"; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845752|Храни его]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845753|Удача]]" + } + ]; + "freqs" = { + "sum_qf" = 1431963 + }; + "otype" = "Music/Recording@on"; + "report" = "False"; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845753|Удача]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845754|Матушка Россия]]" + } + ]; + "freqs" = { + "sum_qf" = 34699 + }; + "otype" = "Music/Recording@on"; + "report" = "False"; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845754|Матушка Россия]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845755|Нежданное свидание]]" + } + ]; + "otype" = "Music/Recording@on"; + "report" = "False"; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845755|Нежданное свидание]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845756|Я – мама]]" + } + ]; + "freqs" = { + "sum_qf" = 441 + }; + "otype" = "Music/Recording@on"; + "report" = "False"; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845756|Я – мама]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845757|Глупый сон]]" + } + ]; + "otype" = "Music/Recording@on"; + "report" = "False"; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845757|Глупый сон]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845760|Спасибо вам]]" + } + ]; + "freqs" = { + "sum_qf" = 152646 + }; + "otype" = "Music/Recording@on"; + "report" = "False"; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845760|Спасибо вам]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845758|С Днём рождения]]" + } + ]; + "freqs" = { + "sum_qf" = 16331217 + }; + "otype" = "Music/Recording@on"; + "report" = "False"; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845758|С Днём рождения]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845759|Песенка о мечтах]]" + } + ]; + "freqs" = { + "sum_qf" = 94 + }; + "otype" = "Music/Recording@on"; + "report" = "False"; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845759|Песенка о мечтах]]" + }; + { + "Role" = "Author@on"; + "carousel" = "False"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#scm-1eeb2744..0|Система дистрибуции: Инструменты создания конкурентного преимущества]]" + } + ]; + "report" = "False"; + "src" = [ + { + "c" = "scm" + } + ]; + "value" = "[[#scm-1eeb2744..0|Система дистрибуции: Инструменты создания конкурентного преимущества]]" + } + ]; + "SearchRequest" = [ + { + "RelevLocale" = [ + "ru"; + "by" + ]; + "value" = "Сорокина Татьяна" + } + ]; + "Title" = [ + { + "freqs" = { + "sum_qf@de" = 3; + "sum_qf@en" = 2; + "sum_qf@ru" = 11504; + "sum_qf@tr" = 35; + "sum_qf@uk" = 145; + "sum_qf@uz" = 1 + }; + "langua" = [ + "ru" + ]; + "src" = [ + { + "c" = "lbr" + } + ]; + "value" = "Сорокина Татьяна" + }; + { + "RelevLocale" = [ + "kz"; + "ua"; + "by"; + "ru" + ]; + "freqs" = { + "sum_qf@de" = 28; + "sum_qf@en" = 8; + "sum_qf@ru" = 10060; + "sum_qf@tr" = 91; + "sum_qf@uk" = 245; + "sum_qf@uz" = 6 + }; + "langua" = [ + "ru" + ]; + "src" = [ + { + "c" = "scm"; + "name" = "bookmate.com" + }; + { + "c" = "yam" + }; + { + "c" = "ltr" + } + ]; + "value" = "Татьяна Сорокина" + } + ]; + "TopTracks" = [ + { + "Position" = 9; + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845757|Глупый сон]]" + } + ]; + "langua" = [ + "uk"; + "ru"; + "kk"; + "by" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845757|Глупый сон]]" + }; + { + "Position" = 8; + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845758|С Днём рождения]]" + } + ]; + "langua" = [ + "uk"; + "ru"; + "kk"; + "by" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845758|С Днём рождения]]" + }; + { + "Position" = 7; + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845759|Песенка о мечтах]]" + } + ]; + "langua" = [ + "uk"; + "ru"; + "kk"; + "by" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845759|Песенка о мечтах]]" + }; + { + "Position" = 6; + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845752|Храни его]]" + } + ]; + "langua" = [ + "uk"; + "ru"; + "kk"; + "by" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845752|Храни его]]" + }; + { + "Position" = 5; + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845753|Удача]]" + } + ]; + "langua" = [ + "uk"; + "ru"; + "kk"; + "by" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845753|Удача]]" + }; + { + "Position" = 4; + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845754|Матушка Россия]]" + } + ]; + "langua" = [ + "uk"; + "ru"; + "kk"; + "by" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845754|Матушка Россия]]" + }; + { + "Position" = 3; + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845755|Нежданное свидание]]" + } + ]; + "langua" = [ + "uk"; + "ru"; + "kk"; + "by" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845755|Нежданное свидание]]" + }; + { + "Position" = 2; + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845750|Наши дети]]" + } + ]; + "langua" = [ + "uk"; + "ru"; + "kk"; + "by" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845750|Наши дети]]" + }; + { + "Position" = 1; + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845751|100 дорог]]" + } + ]; + "langua" = [ + "uk"; + "ru"; + "kk"; + "by" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845751|100 дорог]]" + }; + { + "Position" = 0; + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845760|Спасибо вам]]" + } + ]; + "langua" = [ + "uk"; + "ru"; + "kk"; + "by" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845760|Спасибо вам]]" + } + ]; + "freqs" = { + "average_proper_ratio" = [ + "1.00" + ]; + "proper_ratio" = [ + { + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "1.00" + } + ]; + "sum_qf@de" = [ + "31" + ]; + "sum_qf@en" = [ + "10" + ]; + "sum_qf@ru" = [ + "21572" + ]; + "sum_qf@tr" = [ + "126" + ]; + "sum_qf@uk" = [ + "390" + ]; + "sum_qf@uz" = [ + "7" + ] + }; + "fullname" = [ + { + "freqs" = { + "sum_qf@de" = 28; + "sum_qf@en" = 8; + "sum_qf@ru" = 10060; + "sum_qf@tr" = 91; + "sum_qf@uk" = 245; + "sum_qf@uz" = 6 + }; + "rfr" = [ + "[[#rfr21731b2]]" + ]; + "src" = [ + { + "c" = "ltr" + } + ]; + "value" = "Татьяна Сорокина" + } + ]; + "human_gender" = [ + { + "rfr" = [ + "[[#rfr21f0d779]]" + ]; + "src" = [ + { + "c" = "yam"; + "is_guessed" = "True" + }; + { + "c" = "scm"; + "is_guessed" = "True" + }; + { + "c" = "ltr"; + "is_guessed" = "True" + }; + { + "c" = "lbr"; + "is_guessed" = "True" + } + ]; + "value" = "female" + } + ]; + "ids" = [ + { + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "http://music.yandex.ru/artist/7945920" + }; + { + "src" = [ + { + "c" = "ltr" + } + ]; + "value" = "https://www.litres.ru/4815845" + }; + { + "src" = [ + { + "c" = "lbr" + } + ]; + "value" = "http://www.labirint.ru/authors/43298" + } + ]; + "isa" = { + "Wtype" = "Hum"; + "otype" = [ + { + "src" = [ + { + "c" = "yam" + }; + { + "c" = "scm" + }; + { + "c" = "ltr" + }; + { + "c" = "lbr" + } + ]; + "value" = "Hum" + } + ] + }; + "merged_ontoids" = [ + "ltr24815845"; + "scmbookmatecomh7b363cfd07a49aed419fde3dbd010f64"; + "lbrh43298"; + "yam17945920" + ]; + "musical_artist_groups" = [ + { + "Role" = [ + "Performer@on" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845750|Наши дети]]" + } + ]; + "freqs" = { + "sum_qf" = 256643 + }; + "otype" = "Music/Recording@on"; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845750|Наши дети]]" + }; + { + "Role" = [ + "MAIN_ARTIST" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam08459289|Сто дорог, одна – моя]]" + } + ]; + "freqs" = { + "sum_qf" = 54092 + }; + "hint_description" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "tr"; + "by" + ]; + "value" = "2019" + } + ]; + "otype" = "Music/Album@on"; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam08459289|Сто дорог, одна – моя]]" + }; + { + "Role" = "Author@on"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#scm540668de..0|История медицины]]" + } + ]; + "freqs" = { + "sum_qf" = 49611 + }; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "scm" + } + ]; + "value" = "[[#scm540668de..0|История медицины]]" + }; + { + "Role" = "Author@on"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#scm-3f1fcad4..0|Мыколка]]" + } + ]; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "scm" + } + ]; + "value" = "[[#scm-3f1fcad4..0|Мыколка]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845751|100 дорог]]" + } + ]; + "freqs" = { + "sum_qf" = 21522 + }; + "otype" = "Music/Recording@on"; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845751|100 дорог]]" + }; + { + "Role" = [ + "Author@on" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#ltr08920335|Система дистрибуции. Инструменты создания конкурентного преимущества]]" + } + ]; + "hint_description" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "tr"; + "by" + ]; + "value" = "2015" + } + ]; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "ltr" + }; + { + "c" = "lbr" + } + ]; + "value" = "[[#ltr08920335]]" + }; + { + "Role" = [ + "Author@on" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#lbrbs464788|Пейп-арт]]" + } + ]; + "freqs" = { + "sum_qf" = 12676 + }; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "lbr" + } + ]; + "value" = "[[#lbrbs464788]]" + }; + { + "Role" = [ + "Author@on" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#lbrbs137089|Филиальная сеть: развитие и управление]]" + } + ]; + "freqs" = { + "sum_qf" = 21 + }; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "lbr" + } + ]; + "value" = "[[#lbrbs137089]]" + }; + { + "Role" = [ + "Author@on" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#lbrb467470|Система дистрибуции. Инструменты создания конкурентного преимущества]]" + } + ]; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "lbr"; + "f" = "book" + } + ]; + "value" = "[[#lbrb467470|Система дистрибуции. Инструменты создания конкурентного преимущества]]" + }; + { + "Role" = [ + "Author@on" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#lbrb464788|Пейп-арт]]" + } + ]; + "freqs" = { + "sum_qf" = 12676 + }; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "lbr"; + "f" = "book" + } + ]; + "value" = "[[#lbrb464788|Пейп-арт]]" + }; + { + "Role" = [ + "Artist@on" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#lbrb274279|Что сначала,что потом?]]" + } + ]; + "freqs" = { + "sum_qf" = 15 + }; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "lbr"; + "f" = "book" + } + ]; + "value" = "[[#lbrb274279|Что сначала,что потом?]]" + }; + { + "Role" = [ + "Author@on" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#lbrb137089|Филиальная сеть: развитие и управление]]" + } + ]; + "freqs" = { + "sum_qf" = 21 + }; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "lbr"; + "f" = "book" + } + ]; + "value" = "[[#lbrb137089|Филиальная сеть: развитие и управление]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845752|Храни его]]" + } + ]; + "otype" = "Music/Recording@on"; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845752|Храни его]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845753|Удача]]" + } + ]; + "freqs" = { + "sum_qf" = 1431963 + }; + "otype" = "Music/Recording@on"; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845753|Удача]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845754|Матушка Россия]]" + } + ]; + "freqs" = { + "sum_qf" = 34699 + }; + "otype" = "Music/Recording@on"; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845754|Матушка Россия]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845755|Нежданное свидание]]" + } + ]; + "otype" = "Music/Recording@on"; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845755|Нежданное свидание]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845756|Я – мама]]" + } + ]; + "freqs" = { + "sum_qf" = 441 + }; + "otype" = "Music/Recording@on"; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845756|Я – мама]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845757|Глупый сон]]" + } + ]; + "otype" = "Music/Recording@on"; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845757|Глупый сон]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845760|Спасибо вам]]" + } + ]; + "freqs" = { + "sum_qf" = 152646 + }; + "otype" = "Music/Recording@on"; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845760|Спасибо вам]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845758|С Днём рождения]]" + } + ]; + "freqs" = { + "sum_qf" = 16331217 + }; + "otype" = "Music/Recording@on"; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845758|С Днём рождения]]" + }; + { + "Role" = [ + "Performer@on" + ]; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#yam356845759|Песенка о мечтах]]" + } + ]; + "freqs" = { + "sum_qf" = 94 + }; + "otype" = "Music/Recording@on"; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "yam" + } + ]; + "value" = "[[#yam356845759|Песенка о мечтах]]" + }; + { + "Role" = "Author@on"; + "formatted" = [ + { + "RelevLocale" = [ + "ru"; + "ua"; + "by"; + "kz" + ]; + "value" = "[[#scm-1eeb2744..0|Система дистрибуции: Инструменты создания конкурентного преимущества]]" + } + ]; + "rfr" = [ + "[[#rfr110390d1]]" + ]; + "src" = [ + { + "c" = "scm" + } + ]; + "value" = "[[#scm-1eeb2744..0|Система дистрибуции: Инструменты создания конкурентного преимущества]]" + } + ] +} +)"; + +constexpr auto Steps = 10000U; + +Y_UNIT_TEST_SUITE(TYsonTests) { + Y_UNIT_TEST(TestValidate) { + UNIT_ASSERT(IsValidYson(yson)); + + UNIT_ASSERT(!IsValidYson("[123}")); + UNIT_ASSERT(!IsValidYson("[123];[456]")); + UNIT_ASSERT(!IsValidYson(R"({"c" = "scm"])")); + UNIT_ASSERT(!IsValidYson("")); + UNIT_ASSERT(!IsValidYson(R"({"c";})")); + UNIT_ASSERT(!IsValidYson(R"({# = "scm"})")); + UNIT_ASSERT(!IsValidYson(R"({'one'= 1})")); + } + + Y_UNIT_TEST(TestPerfValidate) { + const auto t = TInstant::Now(); + for (auto i = 0U; i < Steps; ++i) { + UNIT_ASSERT(IsValidYson(yson)); + } + const auto time = TInstant::Now() - t; + Cerr << "Time is " << time << Endl; + } + + Y_UNIT_TEST(TestPerfParse) { + NMiniKQL::TScopedAlloc alloc(__LOCATION__); + NMiniKQL::TMemoryUsageInfo memInfo("Memory"); + NMiniKQL::THolderFactory holderFactory(alloc.Ref(), memInfo, nullptr); + NMiniKQL::TDefaultValueBuilder builder(holderFactory); + + std::array<NUdf::TUnboxedValue, Steps> v; + + const auto t = TInstant::Now(); + for (auto& i : v) { + UNIT_ASSERT(i = TryParseYsonDom(yson, &builder)); + } + const auto time = TInstant::Now() - t; + Cerr << "Time is " << time << Endl; + } + + Y_UNIT_TEST(TestPerfSerialize) { + NMiniKQL::TScopedAlloc alloc(__LOCATION__); + NMiniKQL::TMemoryUsageInfo memInfo("Memory"); + NMiniKQL::THolderFactory holderFactory(alloc.Ref(), memInfo, nullptr); + NMiniKQL::TDefaultValueBuilder builder(holderFactory); + + const auto dom = TryParseYsonDom(yson, &builder); + std::array<NUdf::TUnboxedValue, Steps> v; + + const auto t = TInstant::Now(); + for (auto& i : v) { + UNIT_ASSERT(i = builder.NewString(SerializeYsonDomToBinary(dom))); + } + const auto time = TInstant::Now() - t; + Cerr << "Time is " << time << Endl; + } + + Y_UNIT_TEST(TestPerfSerializeText) { + NMiniKQL::TScopedAlloc alloc(__LOCATION__); + NMiniKQL::TMemoryUsageInfo memInfo("Memory"); + NMiniKQL::THolderFactory holderFactory(alloc.Ref(), memInfo, nullptr); + NMiniKQL::TDefaultValueBuilder builder(holderFactory); + + const auto dom = TryParseYsonDom(yson, &builder); + std::array<NUdf::TUnboxedValue, Steps> v; + + const auto t = TInstant::Now(); + for (auto& i : v) { + UNIT_ASSERT(i = builder.NewString(SerializeYsonDomToText(dom))); + } + const auto time = TInstant::Now() - t; + Cerr << "Time is " << time << Endl; + } + + Y_UNIT_TEST(TestPerfSerializePrettyText) { + NMiniKQL::TScopedAlloc alloc(__LOCATION__); + NMiniKQL::TMemoryUsageInfo memInfo("Memory"); + NMiniKQL::THolderFactory holderFactory(alloc.Ref(), memInfo, nullptr); + NMiniKQL::TDefaultValueBuilder builder(holderFactory); + + const auto dom = TryParseYsonDom(yson, &builder); + std::array<NUdf::TUnboxedValue, Steps> v; + + const auto t = TInstant::Now(); + for (auto& i : v) { + UNIT_ASSERT(i = builder.NewString(SerializeYsonDomToPrettyText(dom))); + } + const auto time = TInstant::Now() - t; + Cerr << "Time is " << time << Endl; + } + + Y_UNIT_TEST(TestSerializeJsonNanInf) { + NMiniKQL::TScopedAlloc alloc(__LOCATION__); + NMiniKQL::TMemoryUsageInfo memInfo("Memory"); + NMiniKQL::THolderFactory holderFactory(alloc.Ref(), memInfo, nullptr); + NMiniKQL::TDefaultValueBuilder builder(holderFactory); + + constexpr char yson[] = + R"( + { + "Nan" = %nan; + "Inf" = %inf; + "NegInf" = %-inf + } + )"; + + TString expected(R"({"Inf":"inf","Nan":"nan","NegInf":"-inf"})"); + + const auto dom = TryParseYsonDom(yson, &builder); + TString res = SerializeJsonDom(dom, false, true, true); + + UNIT_ASSERT_EQUAL(expected, res); + } +} diff --git a/yql/essentials/minikql/dom/ya.make b/yql/essentials/minikql/dom/ya.make new file mode 100644 index 0000000000..772eb55dc6 --- /dev/null +++ b/yql/essentials/minikql/dom/ya.make @@ -0,0 +1,26 @@ +LIBRARY() + +YQL_ABI_VERSION(2 28 0) + +PEERDIR( + library/cpp/containers/stack_vector + library/cpp/json + library/cpp/yson_pull + yql/essentials/public/udf + yql/essentials/utils +) + +SRCS( + node.cpp + json.cpp + yson.cpp + make.cpp + peel.cpp + hash.cpp +) + +END() + +RECURSE_FOR_TESTS( + ut +) diff --git a/yql/essentials/minikql/dom/yson.cpp b/yql/essentials/minikql/dom/yson.cpp new file mode 100644 index 0000000000..f3ab30f22c --- /dev/null +++ b/yql/essentials/minikql/dom/yson.cpp @@ -0,0 +1,360 @@ +#include "node.h" +#include "yson.h" + +#include <library/cpp/containers/stack_vector/stack_vec.h> + +#include <library/cpp/yson_pull/exceptions.h> +#include <library/cpp/yson_pull/reader.h> +#include <library/cpp/yson_pull/writer.h> + +#include <util/string/builder.h> + +namespace NYql::NDom { + +using namespace NUdf; +using namespace NYsonPull; + +namespace { + +[[noreturn]] Y_NO_INLINE void UnexpectedEvent(EEventType ev) { + UdfTerminate((::TStringBuilder() << "Unexpected event: " << ev).c_str()); +} + +TUnboxedValuePod ParseScalar(const TScalar& scalar, const IValueBuilder* valueBuilder) { + switch (scalar.Type()) { + case EScalarType::Entity: + return MakeEntity(); + + case EScalarType::Boolean: + return MakeBool(scalar.AsBoolean()); + + case EScalarType::Int64: + return MakeInt64(scalar.AsInt64()); + + case EScalarType::UInt64: + return MakeUint64(scalar.AsUInt64()); + + case EScalarType::Float64: + return MakeDouble(scalar.AsFloat64()); + + case EScalarType::String: + return MakeString(scalar.AsString(), valueBuilder); + } +} + +TUnboxedValue ParseAttributes(TReader& reader, const IValueBuilder* valueBuilder); +TUnboxedValue ParseDict(TReader& reader, const IValueBuilder* valueBuilder); + +TUnboxedValue ParseList(TReader& reader, const IValueBuilder* valueBuilder) { + TSmallVec<TUnboxedValue, TStdAllocatorForUdf<TUnboxedValue>> items; + for (;;) { + const auto& ev = reader.NextEvent(); + switch (ev.Type()) { + case EEventType::BeginList: + items.emplace_back(ParseList(reader, valueBuilder)); + break; + case EEventType::EndList: + return MakeList(items.data(), items.size(), valueBuilder); + case EEventType::BeginMap: + items.emplace_back(ParseDict(reader, valueBuilder)); + break; + case EEventType::BeginAttributes: + items.emplace_back(ParseAttributes(reader, valueBuilder)); + break; + case EEventType::Scalar: + items.emplace_back(ParseScalar(ev.AsScalar(), valueBuilder)); + break; + default: + UnexpectedEvent(ev.Type()); + } + } +} + +TUnboxedValue ParseDict(TReader& reader, const IValueBuilder* valueBuilder) { + TSmallVec<TPair, TStdAllocatorForUdf<TPair>> items; + for (;;) { + const auto& evKey = reader.NextEvent(); + if (evKey.Type() == EEventType::EndMap) { + return MakeDict(items.data(), items.size()); + } + + Y_ASSERT(evKey.Type() == EEventType::Key); + auto key = valueBuilder->NewString(evKey.AsString()); + const auto& ev = reader.NextEvent(); + switch (ev.Type()) { + case EEventType::BeginList: + items.emplace_back(std::make_pair(std::move(key), ParseList(reader, valueBuilder))); + break; + case EEventType::BeginMap: + items.emplace_back(std::make_pair(std::move(key), ParseDict(reader, valueBuilder))); + break; + case EEventType::BeginAttributes: + items.emplace_back(std::make_pair(std::move(key), ParseAttributes(reader, valueBuilder))); + break; + case EEventType::Scalar: + items.emplace_back(std::make_pair(std::move(key), ParseScalar(ev.AsScalar(), valueBuilder))); + break; + default: + UnexpectedEvent(ev.Type()); + } + } +} + +TUnboxedValue ParseValue(TReader& reader, const IValueBuilder* valueBuilder); + +TUnboxedValue ParseAttributes(TReader& reader, const IValueBuilder* valueBuilder) { + TSmallVec<TPair, TStdAllocatorForUdf<TPair>> items; + for (;;) { + const auto& evKey = reader.NextEvent(); + if (evKey.Type() == EEventType::EndAttributes) { + break; + } + + Y_ASSERT(evKey.Type() == EEventType::Key); + auto key = valueBuilder->NewString(evKey.AsString()); + const auto& ev = reader.NextEvent(); + switch (ev.Type()) { + case EEventType::BeginList: + items.emplace_back(std::make_pair(std::move(key), ParseList(reader, valueBuilder))); + break; + case EEventType::BeginMap: + items.emplace_back(std::make_pair(std::move(key), ParseDict(reader, valueBuilder))); + break; + case EEventType::BeginAttributes: + items.emplace_back(std::make_pair(std::move(key), ParseAttributes(reader, valueBuilder))); + break; + case EEventType::Scalar: + items.emplace_back(std::make_pair(std::move(key), ParseScalar(ev.AsScalar(), valueBuilder))); + break; + default: + UnexpectedEvent(ev.Type()); + } + } + + return MakeAttr(ParseValue(reader, valueBuilder), items.data(), items.size()); +} + +TUnboxedValue ParseValue(TReader& reader, const IValueBuilder* valueBuilder) { + const auto& ev = reader.NextEvent(); + switch (ev.Type()) { + case EEventType::BeginList: + return ParseList(reader, valueBuilder); + case EEventType::BeginMap: + return ParseDict(reader, valueBuilder); + case EEventType::BeginAttributes: + return ParseAttributes(reader, valueBuilder); + case EEventType::Scalar: + return ParseScalar(ev.AsScalar(), valueBuilder); + default: + UnexpectedEvent(ev.Type()); + } +} + +///////////////////////////////////// + +bool CheckValue(TReader& reader); + +bool CheckDict(TReader& reader) { + for (;;) { + const auto& evKey = reader.NextEvent(); + if (evKey.Type() == EEventType::EndMap) + return true; + + if (evKey.Type() != EEventType::Key) + return false; + + if (CheckValue(reader)) + continue; + else + return false; + } +} + +bool CheckAttributes(TReader& reader) { + for (;;) { + const auto& evKey = reader.NextEvent(); + if (evKey.Type() == EEventType::EndAttributes) + break; + + if (evKey.Type() != EEventType::Key) + return false; + + if (CheckValue(reader)) + continue; + else + return false; + } + + return CheckValue(reader); +} + +bool CheckList(TReader& reader) { + for (;;) { + const auto& ev = reader.NextEvent(); + switch (ev.Type()) { + case EEventType::BeginList: + if (CheckList(reader)) + break; + else + return false; + case EEventType::BeginMap: + if (CheckDict(reader)) + break; + else + return false; + case EEventType::BeginAttributes: + if (CheckAttributes(reader)) + break; + else + return false; + case EEventType::Scalar: + break; + case EEventType::EndList: + return true; + default: + return false; + } + } +} + +bool CheckValue(TReader& reader) { + const auto& ev = reader.NextEvent(); + switch (ev.Type()) { + case EEventType::BeginList: + if (CheckList(reader)) + break; + else + return false; + case EEventType::BeginMap: + if (CheckDict(reader)) + break; + else + return false; + case EEventType::BeginAttributes: + if (CheckAttributes(reader)) + break; + else + return false; + case EEventType::Scalar: + break; + default: + return false; + } + return true; +} + +void WriteValue(TWriter& writer, const TUnboxedValue& x) { + switch (GetNodeType(x)) { + case ENodeType::String: + writer.String(x.AsStringRef()); + break; + case ENodeType::Bool: + writer.Boolean(x.Get<bool>()); + break; + case ENodeType::Int64: + writer.Int64(x.Get<i64>()); + break; + case ENodeType::Uint64: + writer.UInt64(x.Get<ui64>()); + break; + case ENodeType::Double: + writer.Float64(x.Get<double>()); + break; + case ENodeType::Entity: + writer.Entity(); + break; + case ENodeType::List: + writer.BeginList(); + if (x.IsBoxed()) { + if (const auto elements = x.GetElements()) { + const auto size = x.GetListLength(); + for (ui64 i = 0; i < size; ++i) { + WriteValue(writer, elements[i]); + } + } else { + const auto it = x.GetListIterator(); + for (TUnboxedValue v; it.Next(v); WriteValue(writer, v)) + continue; + } + } + writer.EndList(); + break; + case ENodeType::Dict: + writer.BeginMap(); + if (x.IsBoxed()) { + TUnboxedValue key, payload; + for (const auto it = x.GetDictIterator(); it.NextPair(key, payload);) { + writer.Key(key.AsStringRef()); + WriteValue(writer, payload); + } + } + writer.EndMap(); + break; + case ENodeType::Attr: { + writer.BeginAttributes(); + TUnboxedValue key, payload; + for (const auto it = x.GetDictIterator(); it.NextPair(key, payload);) { + writer.Key(key.AsStringRef()); + WriteValue(writer, payload); + } + + writer.EndAttributes(); + WriteValue(writer, x.GetVariantItem()); + } + break; + } +} + +void SerializeYsonDomImpl(const NUdf::TUnboxedValue& dom, TWriter& writer) { + writer.BeginStream(); + WriteValue(writer, dom); + writer.EndStream(); +} + +} + +NUdf::TUnboxedValue TryParseYsonDom(const TStringBuf yson, const NUdf::IValueBuilder* valueBuilder) { + auto reader = TReader(NInput::FromMemory(yson), EStreamType::Node); + const auto& begin = reader.NextEvent(); + Y_ASSERT(begin.Type() == EEventType::BeginStream); + auto value = ParseValue(reader, valueBuilder); + const auto& end = reader.NextEvent(); + Y_ASSERT(end.Type() == EEventType::EndStream); + return value; +} + +bool IsValidYson(const TStringBuf yson) try { + auto reader = TReader(NInput::FromMemory(yson), EStreamType::Node); + const auto& begin = reader.NextEvent(); + if (begin.Type() != EEventType::BeginStream) + return false; + if (!CheckValue(reader)) + return false; + const auto& end = reader.NextEvent(); + return end.Type() == EEventType::EndStream; +} catch (const NException::TBadStream&) { + return false; +} + +TString SerializeYsonDomToBinary(const NUdf::TUnboxedValue& dom) { + TString result; + TWriter writer = MakeBinaryWriter(NOutput::FromString(&result), EStreamType::Node); + SerializeYsonDomImpl(dom, writer); + return result; +} + +TString SerializeYsonDomToText(const NUdf::TUnboxedValue& dom) { + TString result; + TWriter writer = MakeTextWriter(NOutput::FromString(&result), EStreamType::Node); + SerializeYsonDomImpl(dom, writer); + return result; +} + +TString SerializeYsonDomToPrettyText(const NUdf::TUnboxedValue& dom) { + TString result; + TWriter writer = MakePrettyTextWriter(NOutput::FromString(&result), EStreamType::Node); + SerializeYsonDomImpl(dom, writer); + return result; +} + +} diff --git a/yql/essentials/minikql/dom/yson.h b/yql/essentials/minikql/dom/yson.h new file mode 100644 index 0000000000..2fb6ac1ee3 --- /dev/null +++ b/yql/essentials/minikql/dom/yson.h @@ -0,0 +1,18 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> + +namespace NYql::NDom { + +bool IsValidYson(const TStringBuf yson); + +NUdf::TUnboxedValue TryParseYsonDom(const TStringBuf yson, const NUdf::IValueBuilder* valueBuilder); + +TString SerializeYsonDomToBinary(const NUdf::TUnboxedValue& dom); + +TString SerializeYsonDomToText(const NUdf::TUnboxedValue& dom); + +TString SerializeYsonDomToPrettyText(const NUdf::TUnboxedValue& dom); + +} diff --git a/yql/essentials/minikql/jsonpath/ast_builder.cpp b/yql/essentials/minikql/jsonpath/ast_builder.cpp new file mode 100644 index 0000000000..fadf003bfc --- /dev/null +++ b/yql/essentials/minikql/jsonpath/ast_builder.cpp @@ -0,0 +1,499 @@ +#include "ast_builder.h" +#include "ast_nodes.h" +#include "parse_double.h" + +#include <yql/essentials/core/issue/protos/issue_id.pb.h> +#include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h> +#include <yql/essentials/ast/yql_ast_escaping.h> + +#include <util/generic/singleton.h> +#include <util/system/compiler.h> +#include <util/string/cast.h> +#include <util/string/builder.h> +#include <util/charset/utf8.h> +#include <util/system/cpu_id.h> + +#include <cmath> + +using namespace NYql; +using namespace NYql::NJsonPath; +using namespace NJsonPathGenerated; +using namespace NReWrapper; + +namespace { + +constexpr ui32 RegexpLibId = NReWrapper::TSerialization::YDB_REWRAPPER_LIB_ID; + +TPosition GetPos(const TToken& token) { + return TPosition(token.GetColumn(), token.GetLine()); +} + +bool TryStringContent(const TString& str, TString& result, TString& error, bool onlyDoubleQuoted = true) { + result.clear(); + error.clear(); + + const bool doubleQuoted = str.StartsWith('"') && str.EndsWith('"'); + const bool singleQuoted = str.StartsWith('\'') && str.EndsWith('\''); + if (!doubleQuoted && !singleQuoted) { + error = "String must be quoted"; + return false; + } + if (singleQuoted && onlyDoubleQuoted) { + error = "Only double quoted strings allowed"; + return false; + } + + char quoteChar = doubleQuoted ? '"' : '\''; + size_t readBytes = 0; + TStringBuf atom(str); + atom.Skip(1); + TStringOutput sout(result); + result.reserve(str.size()); + + auto unescapeResult = UnescapeArbitraryAtom(atom, quoteChar, &sout, &readBytes); + + if (unescapeResult == EUnescapeResult::OK) { + return true; + } else { + error = UnescapeResultToString(unescapeResult); + return false; + } +} + +} + +TAstBuilder::TAstBuilder(TIssues& issues) + : Issues(issues) +{ +} + +void TAstBuilder::Error(TPosition pos, const TStringBuf message) { + Issues.AddIssue(pos, message); + Issues.back().SetCode(TIssuesIds::JSONPATH_PARSE_ERROR, TSeverityIds::S_ERROR); +} + +TArrayAccessNode::TSubscript TAstBuilder::BuildArraySubscript(const TRule_array_subscript& node) { + TAstNodePtr from = BuildExpr(node.GetRule_expr1()); + TAstNodePtr to = nullptr; + if (node.HasBlock2()) { + to = BuildExpr(node.GetBlock2().GetRule_expr2()); + } + return {from, to}; +} + +TAstNodePtr TAstBuilder::BuildArrayAccessor(const TRule_array_accessor& node, TAstNodePtr input) { + TVector<TArrayAccessNode::TSubscript> subscripts; + subscripts.reserve(1 + node.Block3Size()); + + subscripts.push_back(BuildArraySubscript(node.GetRule_array_subscript2())); + for (size_t i = 0; i < node.Block3Size(); i++) { + subscripts.push_back(BuildArraySubscript(node.GetBlock3(i).GetRule_array_subscript2())); + } + + return new TArrayAccessNode(GetPos(node.GetToken1()), subscripts, input); +} + +TAstNodePtr TAstBuilder::BuildWildcardArrayAccessor(const TRule_wildcard_array_accessor& node, TAstNodePtr input) { + return new TWildcardArrayAccessNode(GetPos(node.GetToken1()), input); +} + +TString TAstBuilder::BuildIdentifier(const TRule_identifier& node) { + switch (node.GetAltCase()) { + case TRule_identifier::kAltIdentifier1: + return node.GetAlt_identifier1().GetToken1().GetValue(); + case TRule_identifier::kAltIdentifier2: + return node.GetAlt_identifier2().GetRule_keyword1().GetToken1().GetValue(); + case TRule_identifier::ALT_NOT_SET: + Y_ABORT("Alternative for 'identifier' rule is not set"); + } +} + +TAstNodePtr TAstBuilder::BuildMemberAccessor(const TRule_member_accessor& node, TAstNodePtr input) { + TString name; + const auto& nameBlock = node.GetBlock2(); + switch (nameBlock.GetAltCase()) { + case TRule_member_accessor_TBlock2::kAlt1: + name = BuildIdentifier(nameBlock.GetAlt1().GetRule_identifier1()); + break; + case TRule_member_accessor_TBlock2::kAlt2: { + const auto& token = nameBlock.GetAlt2().GetToken1(); + TString error; + if (!TryStringContent(token.GetValue(), name, error, /* onlyDoubleQuoted */ false)) { + Error(GetPos(token), error); + return nullptr; + } + break; + } + case TRule_member_accessor_TBlock2::ALT_NOT_SET: + Y_ABORT("Alternative for 'member_accessor' rule is not set"); + } + + return new TMemberAccessNode(GetPos(node.GetToken1()), name, input); +} + +TAstNodePtr TAstBuilder::BuildWildcardMemberAccessor(const TRule_wildcard_member_accessor& node, TAstNodePtr input) { + const auto& token = node.GetToken2(); + return new TWildcardMemberAccessNode(GetPos(token), input); +} + +TAstNodePtr TAstBuilder::BuildFilter(const TRule_filter& node, TAstNodePtr input) { + const auto predicate = BuildExpr(node.GetRule_expr3()); + return new TFilterPredicateNode(GetPos(node.GetToken2()), predicate, input); +} + +TAstNodePtr TAstBuilder::BuildMethod(const TRule_method& node, TAstNodePtr input) { + const auto& token = node.GetToken2(); + const auto pos = GetPos(token); + const auto& value = token.GetValue(); + auto type = EMethodType::Double; + if (value == "abs") { + type = EMethodType::Abs; + } else if (value == "floor") { + type = EMethodType::Floor; + } else if (value == "ceiling") { + type = EMethodType::Ceiling; + } else if (value == "type") { + type = EMethodType::Type; + } else if (value == "size") { + type = EMethodType::Size; + } else if (value == "keyvalue") { + type = EMethodType::KeyValue; + } + + return new TMethodCallNode(pos, type, input); +} + +TAstNodePtr TAstBuilder::BuildAccessorOp(const TRule_accessor_op& node, TAstNodePtr input) { + switch (node.GetAltCase()) { + case TRule_accessor_op::kAltAccessorOp1: + return BuildMemberAccessor(node.GetAlt_accessor_op1().GetRule_member_accessor1(), input); + case TRule_accessor_op::kAltAccessorOp2: + return BuildWildcardMemberAccessor(node.GetAlt_accessor_op2().GetRule_wildcard_member_accessor1(), input); + case TRule_accessor_op::kAltAccessorOp3: + return BuildArrayAccessor(node.GetAlt_accessor_op3().GetRule_array_accessor1(), input); + case TRule_accessor_op::kAltAccessorOp4: + return BuildWildcardArrayAccessor(node.GetAlt_accessor_op4().GetRule_wildcard_array_accessor1(), input); + case TRule_accessor_op::kAltAccessorOp5: + return BuildFilter(node.GetAlt_accessor_op5().GetRule_filter1(), input); + case TRule_accessor_op::kAltAccessorOp6: + return BuildMethod(node.GetAlt_accessor_op6().GetRule_method1(), input); + case TRule_accessor_op::ALT_NOT_SET: + Y_ABORT("Alternative for 'accessor_op' rule is not set"); + } +} + +TAstNodePtr TAstBuilder::BuildPrimary(const TRule_primary& node) { + switch (node.GetAltCase()) { + case TRule_primary::kAltPrimary1: { + const auto& token = node.GetAlt_primary1().GetToken1(); + const auto& numberString = token.GetValue(); + const double parsedValue = ParseDouble(numberString); + if (Y_UNLIKELY(std::isnan(parsedValue))) { + Y_ABORT("Invalid number was allowed by JsonPath grammar"); + } + if (Y_UNLIKELY(std::isinf(parsedValue))) { + Error(GetPos(token), "Number literal is infinity"); + return nullptr; + } + return new TNumberLiteralNode(GetPos(token), parsedValue); + } + case TRule_primary::kAltPrimary2: { + const auto& token = node.GetAlt_primary2().GetToken1(); + return new TContextObjectNode(GetPos(token)); + } + case TRule_primary::kAltPrimary3: { + const auto& token = node.GetAlt_primary3().GetToken1(); + return new TLastArrayIndexNode(GetPos(token)); + } + case TRule_primary::kAltPrimary4: { + const auto& primary = node.GetAlt_primary4().GetBlock1(); + const auto input = BuildExpr(primary.GetRule_expr2()); + if (primary.HasBlock4()) { + const auto& token = primary.GetBlock4().GetToken1(); + return new TIsUnknownPredicateNode(GetPos(token), input); + } + return input; + } + case TRule_primary::kAltPrimary5: { + const auto& token = node.GetAlt_primary5().GetToken1(); + return new TVariableNode(GetPos(token), token.GetValue().substr(1)); + } + case TRule_primary::kAltPrimary6: { + const auto& token = node.GetAlt_primary6().GetToken1(); + return new TBooleanLiteralNode(GetPos(token), true); + } + case TRule_primary::kAltPrimary7: { + const auto& token = node.GetAlt_primary7().GetToken1(); + return new TBooleanLiteralNode(GetPos(token), false); + } + case TRule_primary::kAltPrimary8: { + const auto& token = node.GetAlt_primary8().GetToken1(); + return new TNullLiteralNode(GetPos(token)); + } + case TRule_primary::kAltPrimary9: { + const auto& token = node.GetAlt_primary9().GetToken1(); + TString value; + TString error; + if (!TryStringContent(token.GetValue(), value, error)) { + Error(GetPos(token), error); + return nullptr; + } + return new TStringLiteralNode(GetPos(token), value); + } + case TRule_primary::kAltPrimary10: { + const auto& token = node.GetAlt_primary10().GetToken1(); + return new TFilterObjectNode(GetPos(token)); + } + case TRule_primary::ALT_NOT_SET: + Y_ABORT("Alternative for 'primary' rule is not set"); + } +} + +TAstNodePtr TAstBuilder::BuildAccessorExpr(const TRule_accessor_expr& node) { + TAstNodePtr input = BuildPrimary(node.GetRule_primary1()); + for (size_t i = 0; i < node.Block2Size(); i++) { + input = BuildAccessorOp(node.GetBlock2(i).GetRule_accessor_op1(), input); + } + return input; +} + +TAstNodePtr TAstBuilder::BuildPlainExpr(const TRule_plain_expr& node) { + return BuildAccessorExpr(node.GetRule_accessor_expr1()); +} + +TAstNodePtr TAstBuilder::BuildLikeRegexExpr(const TRule_like_regex_expr& node, TAstNodePtr input) { + const auto& regexToken = node.GetToken2(); + TString regex; + TString error; + if (!TryStringContent(regexToken.GetValue(), regex, error)) { + Error(GetPos(regexToken), error); + return nullptr; + } + + ui32 parsedFlags = 0; + if (node.HasBlock3()) { + TString flags; + const auto& flagsToken = node.GetBlock3().GetToken2(); + if (!TryStringContent(flagsToken.GetValue(), flags, error)) { + Error(GetPos(flagsToken), error); + return nullptr; + } + + for (char flag : flags) { + switch (flag) { + case 'i': + parsedFlags |= FLAGS_CASELESS; + break; + default: + Error(GetPos(flagsToken), TStringBuilder() << "Unsupported regex flag '" << flag << "'"); + break; + } + } + } + + IRePtr compiledRegex; + try { + compiledRegex = NDispatcher::Compile(regex, parsedFlags, RegexpLibId); + } catch (const NReWrapper::TCompileException& e) { + Error(GetPos(regexToken), e.AsStrBuf()); + return nullptr; + } + + return new TLikeRegexPredicateNode(GetPos(node.GetToken1()), input, std::move(compiledRegex)); +} + +TAstNodePtr TAstBuilder::BuildPredicateExpr(const TRule_predicate_expr& node) { + switch (node.GetAltCase()) { + case TRule_predicate_expr::kAltPredicateExpr1: { + const auto& predicate = node.GetAlt_predicate_expr1().GetBlock1(); + const auto input = BuildPlainExpr(predicate.GetRule_plain_expr1()); + if (!predicate.HasBlock2()) { + return input; + } + + const auto& block = predicate.GetBlock2(); + switch (block.GetAltCase()) { + case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::kAlt1: { + const auto& innerBlock = block.GetAlt1().GetRule_starts_with_expr1(); + const auto& prefix = BuildPlainExpr(innerBlock.GetRule_plain_expr3()); + return new TStartsWithPredicateNode(GetPos(innerBlock.GetToken1()), input, prefix); + } + case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::kAlt2: { + return BuildLikeRegexExpr(block.GetAlt2().GetRule_like_regex_expr1(), input); + } + case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::ALT_NOT_SET: + Y_ABORT("Alternative for inner block of 'predicate_expr' rule is not set"); + } + Y_UNREACHABLE(); + } + case TRule_predicate_expr::kAltPredicateExpr2: { + const auto& predicate = node.GetAlt_predicate_expr2().GetBlock1(); + const auto input = BuildExpr(predicate.GetRule_expr3()); + return new TExistsPredicateNode(GetPos(predicate.GetToken1()), input); + } + case TRule_predicate_expr::ALT_NOT_SET: + Y_ABORT("Alternative for 'predicate' rule is not set"); + } + Y_UNREACHABLE(); +} + +TAstNodePtr TAstBuilder::BuildUnaryExpr(const TRule_unary_expr& node) { + const auto predicateExpr = BuildPredicateExpr(node.GetRule_predicate_expr2()); + if (!node.HasBlock1()) { + return predicateExpr; + } + + const auto& opToken = node.GetBlock1().GetToken1(); + const auto& opValue = opToken.GetValue(); + auto operation = EUnaryOperation::Plus; + if (opValue == "-") { + operation = EUnaryOperation::Minus; + } else if (opValue == "!") { + operation = EUnaryOperation::Not; + } + return new TUnaryOperationNode(GetPos(opToken), operation, predicateExpr); +} + +TAstNodePtr TAstBuilder::BuildMulExpr(const TRule_mul_expr& node) { + TAstNodePtr result = BuildUnaryExpr(node.GetRule_unary_expr1()); + + for (size_t i = 0; i < node.Block2Size(); i++) { + const auto& block = node.GetBlock2(i); + + const auto& opToken = block.GetToken1(); + const auto& opValue = opToken.GetValue(); + auto operation = EBinaryOperation::Multiply; + if (opValue == "/") { + operation = EBinaryOperation::Divide; + } else if (opValue == "%") { + operation = EBinaryOperation::Modulo; + } + + const auto rightOperand = BuildUnaryExpr(block.GetRule_unary_expr2()); + result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand); + } + + return result; +} + +TAstNodePtr TAstBuilder::BuildAddExpr(const TRule_add_expr& node) { + TAstNodePtr result = BuildMulExpr(node.GetRule_mul_expr1()); + + for (size_t i = 0; i < node.Block2Size(); i++) { + const auto& block = node.GetBlock2(i); + + const auto& opToken = block.GetToken1(); + auto operation = EBinaryOperation::Add; + if (opToken.GetValue() == "-") { + operation = EBinaryOperation::Substract; + } + + const auto rightOperand = BuildMulExpr(block.GetRule_mul_expr2()); + result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand); + } + + return result; +} + +TAstNodePtr TAstBuilder::BuildCompareExpr(const TRule_compare_expr& node) { + TAstNodePtr result = BuildAddExpr(node.GetRule_add_expr1()); + + if (node.HasBlock2()) { + const auto& block = node.GetBlock2(); + + const auto& opToken = block.GetToken1(); + const auto& opValue = opToken.GetValue(); + auto operation = EBinaryOperation::Less; + if (opValue == "<=") { + operation = EBinaryOperation::LessEqual; + } else if (opValue == ">") { + operation = EBinaryOperation::Greater; + } else if (opValue == ">=") { + operation = EBinaryOperation::GreaterEqual; + } + + const auto rightOperand = BuildAddExpr(block.GetRule_add_expr2()); + result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand); + } + + return result; +} + +TAstNodePtr TAstBuilder::BuildEqualExpr(const TRule_equal_expr& node) { + TAstNodePtr result = BuildCompareExpr(node.GetRule_compare_expr1()); + + if (node.HasBlock2()) { + const auto& block = node.GetBlock2(); + + const auto& opToken = block.GetToken1(); + const auto& opValue = opToken.GetValue(); + auto operation = EBinaryOperation::Equal; + if (opValue == "<>" || opValue == "!=") { + operation = EBinaryOperation::NotEqual; + } + + const auto rightOperand = BuildCompareExpr(block.GetRule_compare_expr2()); + result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand); + } + + return result; +} + +TAstNodePtr TAstBuilder::BuildAndExpr(const TRule_and_expr& node) { + TAstNodePtr result = BuildEqualExpr(node.GetRule_equal_expr1()); + + for (size_t i = 0; i < node.Block2Size(); i++) { + const auto& block = node.GetBlock2(i); + + const auto& opToken = block.GetToken1(); + const auto rightOperand = BuildEqualExpr(block.GetRule_equal_expr2()); + result = new TBinaryOperationNode(GetPos(opToken), EBinaryOperation::And, result, rightOperand); + } + + return result; +} + +TAstNodePtr TAstBuilder::BuildOrExpr(const TRule_or_expr& node) { + TAstNodePtr result = BuildAndExpr(node.GetRule_and_expr1()); + + for (size_t i = 0; i < node.Block2Size(); i++) { + const auto& block = node.GetBlock2(i); + + const auto& opToken = block.GetToken1(); + const auto rightOperand = BuildAndExpr(block.GetRule_and_expr2()); + result = new TBinaryOperationNode(GetPos(opToken), EBinaryOperation::Or, result, rightOperand); + } + + return result; +} + +TAstNodePtr TAstBuilder::BuildExpr(const TRule_expr& node) { + return BuildOrExpr(node.GetRule_or_expr1()); +} + +TAstNodePtr TAstBuilder::BuildJsonPath(const TRule_jsonpath& node) { + TPosition pos; + auto mode = EJsonPathMode::Lax; + if (node.HasBlock1()) { + const auto& modeToken = node.GetBlock1().GetToken1(); + pos = GetPos(modeToken); + if (modeToken.GetValue() == "strict") { + mode = EJsonPathMode::Strict; + } + } + + const auto expr = BuildExpr(node.GetRule_expr2()); + return new TRootNode(pos, expr, mode); +} + +TAstNodePtr TAstBuilder::Build(const TJsonPathParserAST& ast) { + return BuildJsonPath(ast.GetRule_jsonpath()); +} + +namespace NYql::NJsonPath { + +ui32 GetReLibId() { + return RegexpLibId; +} + +} diff --git a/yql/essentials/minikql/jsonpath/ast_builder.h b/yql/essentials/minikql/jsonpath/ast_builder.h new file mode 100644 index 0000000000..66a47483b3 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/ast_builder.h @@ -0,0 +1,52 @@ +#pragma once + +#include "ast_nodes.h" + +#include <yql/essentials/parser/proto_ast/gen/jsonpath/JsonPathParser.pb.h> + +namespace NYql::NJsonPath { + +class TAstBuilder { +public: + TAstBuilder(TIssues& issues); + + TAstNodePtr Build(const NJsonPathGenerated::TJsonPathParserAST& ast); + +private: + TArrayAccessNode::TSubscript BuildArraySubscript(const NJsonPathGenerated::TRule_array_subscript& node); + TAstNodePtr BuildArrayAccessor(const NJsonPathGenerated::TRule_array_accessor& node, TAstNodePtr input); + TAstNodePtr BuildWildcardArrayAccessor(const NJsonPathGenerated::TRule_wildcard_array_accessor& node, TAstNodePtr input); + + TString BuildIdentifier(const NJsonPathGenerated::TRule_identifier& node); + TAstNodePtr BuildMemberAccessor(const NJsonPathGenerated::TRule_member_accessor& node, TAstNodePtr input); + TAstNodePtr BuildWildcardMemberAccessor(const NJsonPathGenerated::TRule_wildcard_member_accessor& node, TAstNodePtr input); + + TAstNodePtr BuildFilter(const NJsonPathGenerated::TRule_filter& node, TAstNodePtr input); + + TAstNodePtr BuildMethod(const NJsonPathGenerated::TRule_method& node, TAstNodePtr input); + + TAstNodePtr BuildAccessorOp(const NJsonPathGenerated::TRule_accessor_op& node, TAstNodePtr input); + TAstNodePtr BuildAccessorExpr(const NJsonPathGenerated::TRule_accessor_expr& node); + + TAstNodePtr BuildPrimary(const NJsonPathGenerated::TRule_primary& node); + + TAstNodePtr BuildPlainExpr(const NJsonPathGenerated::TRule_plain_expr& node); + TAstNodePtr BuildLikeRegexExpr(const NJsonPathGenerated::TRule_like_regex_expr& node, TAstNodePtr input); + TAstNodePtr BuildPredicateExpr(const NJsonPathGenerated::TRule_predicate_expr& node); + TAstNodePtr BuildUnaryExpr(const NJsonPathGenerated::TRule_unary_expr& node); + TAstNodePtr BuildMulExpr(const NJsonPathGenerated::TRule_mul_expr& node); + TAstNodePtr BuildAddExpr(const NJsonPathGenerated::TRule_add_expr& node); + TAstNodePtr BuildCompareExpr(const NJsonPathGenerated::TRule_compare_expr& node); + TAstNodePtr BuildEqualExpr(const NJsonPathGenerated::TRule_equal_expr& node); + TAstNodePtr BuildAndExpr(const NJsonPathGenerated::TRule_and_expr& node); + TAstNodePtr BuildOrExpr(const NJsonPathGenerated::TRule_or_expr& node); + + TAstNodePtr BuildExpr(const NJsonPathGenerated::TRule_expr& node); + TAstNodePtr BuildJsonPath(const NJsonPathGenerated::TRule_jsonpath& node); + + void Error(TPosition pos, const TStringBuf message); + + TIssues& Issues; +}; + +} diff --git a/yql/essentials/minikql/jsonpath/ast_nodes.cpp b/yql/essentials/minikql/jsonpath/ast_nodes.cpp new file mode 100644 index 0000000000..5a51c2e90e --- /dev/null +++ b/yql/essentials/minikql/jsonpath/ast_nodes.cpp @@ -0,0 +1,383 @@ +#include "ast_nodes.h" + +namespace NYql::NJsonPath { + +TAstNode::TAstNode(TPosition pos) + : Pos(pos) +{ +} + +TPosition TAstNode::GetPos() const { + return Pos; +} + +EReturnType TAstNode::GetReturnType() const { + return EReturnType::Any; +} + +TRootNode::TRootNode(TPosition pos, TAstNodePtr expr, EJsonPathMode mode) + : TAstNode(pos) + , Expr(expr) + , Mode(mode) +{ +} + +const TAstNodePtr TRootNode::GetExpr() const { + return Expr; +} + +EJsonPathMode TRootNode::GetMode() const { + return Mode; +} + +void TRootNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitRoot(*this); +} + +EReturnType TRootNode::GetReturnType() const { + return Expr->GetReturnType(); +} + +TContextObjectNode::TContextObjectNode(TPosition pos) + : TAstNode(pos) +{ +} + +void TContextObjectNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitContextObject(*this); +} + +TVariableNode::TVariableNode(TPosition pos, const TString& name) + : TAstNode(pos) + , Name(name) +{ +} + +const TString& TVariableNode::GetName() const { + return Name; +} + +void TVariableNode::Accept(IAstNodeVisitor& visitor) const { + visitor.VisitVariable(*this); +} + +TLastArrayIndexNode::TLastArrayIndexNode(TPosition pos) + : TAstNode(pos) +{ +} + +void TLastArrayIndexNode::Accept(IAstNodeVisitor& visitor) const { + visitor.VisitLastArrayIndex(*this); +} + +TNumberLiteralNode::TNumberLiteralNode(TPosition pos, double value) + : TAstNode(pos) + , Value(value) +{ +} + +double TNumberLiteralNode::GetValue() const { + return Value; +} + +void TNumberLiteralNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitNumberLiteral(*this); +} + +TMemberAccessNode::TMemberAccessNode(TPosition pos, const TString& member, TAstNodePtr input) + : TAstNode(pos) + , Member(member) + , Input(input) +{ +} + +const TStringBuf TMemberAccessNode::GetMember() const { + return Member; +} + +const TAstNodePtr TMemberAccessNode::GetInput() const { + return Input; +} + +void TMemberAccessNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitMemberAccess(*this); +} + +TWildcardMemberAccessNode::TWildcardMemberAccessNode(TPosition pos, TAstNodePtr input) + : TAstNode(pos) + , Input(input) +{ +} + +const TAstNodePtr TWildcardMemberAccessNode::GetInput() const { + return Input; +} + +void TWildcardMemberAccessNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitWildcardMemberAccess(*this); +} + +TArrayAccessNode::TArrayAccessNode(TPosition pos, TVector<TSubscript> subscripts, TAstNodePtr input) + : TAstNode(pos) + , Subscripts(subscripts) + , Input(input) +{ +} + +const TVector<TArrayAccessNode::TSubscript>& TArrayAccessNode::GetSubscripts() const { + return Subscripts; +} + +const TAstNodePtr TArrayAccessNode::GetInput() const { + return Input; +} + +void TArrayAccessNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitArrayAccess(*this); +} + +TWildcardArrayAccessNode::TWildcardArrayAccessNode(TPosition pos, TAstNodePtr input) + : TAstNode(pos) + , Input(input) +{ +} + +const TAstNodePtr TWildcardArrayAccessNode::GetInput() const { + return Input; +} + +void TWildcardArrayAccessNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitWildcardArrayAccess(*this); +} + +TUnaryOperationNode::TUnaryOperationNode(TPosition pos, EUnaryOperation op, TAstNodePtr expr) + : TAstNode(pos) + , Operation(op) + , Expr(expr) +{ +} + +EUnaryOperation TUnaryOperationNode::GetOp() const { + return Operation; +} + +const TAstNodePtr TUnaryOperationNode::GetExpr() const { + return Expr; +} + +void TUnaryOperationNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitUnaryOperation(*this); +} + +EReturnType TUnaryOperationNode::GetReturnType() const { + return Operation == EUnaryOperation::Not ? EReturnType::Bool : EReturnType::Any; +} + +TBinaryOperationNode::TBinaryOperationNode(TPosition pos, EBinaryOperation op, TAstNodePtr leftExpr, TAstNodePtr rightExpr) + : TAstNode(pos) + , Operation(op) + , LeftExpr(leftExpr) + , RightExpr(rightExpr) +{ +} + +EBinaryOperation TBinaryOperationNode::GetOp() const { + return Operation; +} + +const TAstNodePtr TBinaryOperationNode::GetLeftExpr() const { + return LeftExpr; +} + +const TAstNodePtr TBinaryOperationNode::GetRightExpr() const { + return RightExpr; +} + +void TBinaryOperationNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitBinaryOperation(*this); +} + +EReturnType TBinaryOperationNode::GetReturnType() const { + switch (Operation) { + case EBinaryOperation::Less: + case EBinaryOperation::LessEqual: + case EBinaryOperation::Greater: + case EBinaryOperation::GreaterEqual: + case EBinaryOperation::Equal: + case EBinaryOperation::NotEqual: + case EBinaryOperation::And: + case EBinaryOperation::Or: + return EReturnType::Bool; + + default: + return EReturnType::Any; + } +} + +TBooleanLiteralNode::TBooleanLiteralNode(TPosition pos, bool value) + : TAstNode(pos) + , Value(value) +{ +} + +bool TBooleanLiteralNode::GetValue() const { + return Value; +} + +void TBooleanLiteralNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitBooleanLiteral(*this); +} + +TNullLiteralNode::TNullLiteralNode(TPosition pos) + : TAstNode(pos) +{ +} + +void TNullLiteralNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitNullLiteral(*this); +} + +TStringLiteralNode::TStringLiteralNode(TPosition pos, const TString& value) + : TAstNode(pos) + , Value(value) +{ +} + +const TString& TStringLiteralNode::GetValue() const { + return Value; +} + +void TStringLiteralNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitStringLiteral(*this); +} + +TFilterObjectNode::TFilterObjectNode(TPosition pos) + : TAstNode(pos) +{ +} + +void TFilterObjectNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitFilterObject(*this); +} + +TFilterPredicateNode::TFilterPredicateNode(TPosition pos, TAstNodePtr predicate, TAstNodePtr input) + : TAstNode(pos) + , Predicate(predicate) + , Input(input) +{ +} + +const TAstNodePtr TFilterPredicateNode::GetPredicate() const { + return Predicate; +} + +const TAstNodePtr TFilterPredicateNode::GetInput() const { + return Input; +} + +void TFilterPredicateNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitFilterPredicate(*this); +} + +TMethodCallNode::TMethodCallNode(TPosition pos, EMethodType type, TAstNodePtr input) + : TAstNode(pos) + , Type(type) + , Input(input) +{ +} + +EMethodType TMethodCallNode::GetType() const { + return Type; +} + +const TAstNodePtr TMethodCallNode::GetInput() const { + return Input; +} + +void TMethodCallNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitMethodCall(*this); +} + +TStartsWithPredicateNode::TStartsWithPredicateNode(TPosition pos, TAstNodePtr input, TAstNodePtr prefix) + : TAstNode(pos) + , Input(input) + , Prefix(prefix) +{ +} + +const TAstNodePtr TStartsWithPredicateNode::GetInput() const { + return Input; +} + +const TAstNodePtr TStartsWithPredicateNode::GetPrefix() const { + return Prefix; +} + +EReturnType TStartsWithPredicateNode::GetReturnType() const { + return EReturnType::Bool; +} + +void TStartsWithPredicateNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitStartsWithPredicate(*this); +} + +TExistsPredicateNode::TExistsPredicateNode(TPosition pos, TAstNodePtr input) + : TAstNode(pos) + , Input(input) +{ +} + +const TAstNodePtr TExistsPredicateNode::GetInput() const { + return Input; +} + +EReturnType TExistsPredicateNode::GetReturnType() const { + return EReturnType::Bool; +} + +void TExistsPredicateNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitExistsPredicate(*this); +} + +TIsUnknownPredicateNode::TIsUnknownPredicateNode(TPosition pos, TAstNodePtr input) + : TAstNode(pos) + , Input(input) +{ +} + +const TAstNodePtr TIsUnknownPredicateNode::GetInput() const { + return Input; +} + +EReturnType TIsUnknownPredicateNode::GetReturnType() const { + return EReturnType::Bool; +} + +void TIsUnknownPredicateNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitIsUnknownPredicate(*this); +} + +TLikeRegexPredicateNode::TLikeRegexPredicateNode(TPosition pos, TAstNodePtr input, NReWrapper::IRePtr&& regex) + : TAstNode(pos) + , Input(input) + , Regex(std::move(regex)) +{ +} + +const TAstNodePtr TLikeRegexPredicateNode::GetInput() const { + return Input; +} + +const NReWrapper::IRePtr& TLikeRegexPredicateNode::GetRegex() const { + return Regex; +} + +EReturnType TLikeRegexPredicateNode::GetReturnType() const { + return EReturnType::Bool; +} + +void TLikeRegexPredicateNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitLikeRegexPredicate(*this); +} + +} diff --git a/yql/essentials/minikql/jsonpath/ast_nodes.h b/yql/essentials/minikql/jsonpath/ast_nodes.h new file mode 100644 index 0000000000..6ccb8a56ea --- /dev/null +++ b/yql/essentials/minikql/jsonpath/ast_nodes.h @@ -0,0 +1,401 @@ +#pragma once + +#include <yql/essentials/public/issue/yql_issue.h> + +#include <library/cpp/json/json_value.h> +#include <yql/essentials/minikql/jsonpath/rewrapper/re.h> + +namespace NYql::NJsonPath { + +class TRootNode; +class TContextObjectNode; +class TVariableNode; +class TLastArrayIndexNode; +class TNumberLiteralNode; +class TAccessorExprNode; +class TMemberAccessNode; +class TWildcardMemberAccessNode; +class TArrayAccessNode; +class TWildcardArrayAccessNode; +class TUnaryOperationNode; +class TBinaryOperationNode; +class TBooleanLiteralNode; +class TNullLiteralNode; +class TStringLiteralNode; +class TFilterObjectNode; +class TFilterPredicateNode; +class TMethodCallNode; +class TStartsWithPredicateNode; +class TExistsPredicateNode; +class TIsUnknownPredicateNode; +class TLikeRegexPredicateNode; + +enum class EJsonPathMode { + Lax = 0, + Strict = 1, +}; + +class IAstNodeVisitor { +public: + virtual void VisitRoot(const TRootNode& node) = 0; + virtual void VisitContextObject(const TContextObjectNode& node) = 0; + virtual void VisitVariable(const TVariableNode& node) = 0; + virtual void VisitLastArrayIndex(const TLastArrayIndexNode& node) = 0; + virtual void VisitNumberLiteral(const TNumberLiteralNode& node) = 0; + virtual void VisitMemberAccess(const TMemberAccessNode& node) = 0; + virtual void VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) = 0; + virtual void VisitArrayAccess(const TArrayAccessNode& node) = 0; + virtual void VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) = 0; + virtual void VisitUnaryOperation(const TUnaryOperationNode& node) = 0; + virtual void VisitBinaryOperation(const TBinaryOperationNode& node) = 0; + virtual void VisitBooleanLiteral(const TBooleanLiteralNode& node) = 0; + virtual void VisitNullLiteral(const TNullLiteralNode& node) = 0; + virtual void VisitStringLiteral(const TStringLiteralNode& node) = 0; + virtual void VisitFilterObject(const TFilterObjectNode& node) = 0; + virtual void VisitFilterPredicate(const TFilterPredicateNode& node) = 0; + virtual void VisitMethodCall(const TMethodCallNode& node) = 0; + virtual void VisitStartsWithPredicate(const TStartsWithPredicateNode& node) = 0; + virtual void VisitExistsPredicate(const TExistsPredicateNode& node) = 0; + virtual void VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) = 0; + virtual void VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) = 0; + + virtual ~IAstNodeVisitor() = default; +}; + +enum class EReturnType { + Any = 0, + Bool = 1, +}; + +class TAstNode : public TSimpleRefCount<TAstNode> { +public: + explicit TAstNode(TPosition pos); + + TPosition GetPos() const; + + virtual void Accept(IAstNodeVisitor& visitor) const = 0; + + virtual EReturnType GetReturnType() const; + + virtual ~TAstNode() = default; + +private: + TPosition Pos; +}; + +using TAstNodePtr = TIntrusivePtr<TAstNode>; + +class TRootNode : public TAstNode { +public: + TRootNode(TPosition pos, TAstNodePtr expr, EJsonPathMode mode); + + const TAstNodePtr GetExpr() const; + + EJsonPathMode GetMode() const; + + void Accept(IAstNodeVisitor& visitor) const override; + + EReturnType GetReturnType() const override; + +private: + TAstNodePtr Expr; + EJsonPathMode Mode; +}; + +class TContextObjectNode : public TAstNode { +public: + explicit TContextObjectNode(TPosition pos); + + void Accept(IAstNodeVisitor& visitor) const override; +}; + +class TVariableNode : public TAstNode { +public: + TVariableNode(TPosition pos, const TString& name); + + const TString& GetName() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TString Name; +}; + +class TLastArrayIndexNode : public TAstNode { +public: + explicit TLastArrayIndexNode(TPosition pos); + + void Accept(IAstNodeVisitor& visitor) const override; +}; + +class TNumberLiteralNode : public TAstNode { +public: + TNumberLiteralNode(TPosition pos, double value); + + double GetValue() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + double Value; +}; + +class TMemberAccessNode : public TAstNode { +public: + TMemberAccessNode(TPosition pos, const TString& member, TAstNodePtr input); + + const TStringBuf GetMember() const; + + const TAstNodePtr GetInput() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TString Member; + TAstNodePtr Input; +}; + +class TWildcardMemberAccessNode : public TAstNode { +public: + TWildcardMemberAccessNode(TPosition pos, TAstNodePtr input); + + const TAstNodePtr GetInput() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TAstNodePtr Input; +}; + +class TArrayAccessNode : public TAstNode { +public: + struct TSubscript { + TAstNodePtr From; + TAstNodePtr To; + }; + + TArrayAccessNode(TPosition pos, TVector<TSubscript> subscripts, TAstNodePtr input); + + const TVector<TSubscript>& GetSubscripts() const; + + const TAstNodePtr GetInput() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TVector<TSubscript> Subscripts; + TAstNodePtr Input; +}; + +class TWildcardArrayAccessNode : public TAstNode { +public: + TWildcardArrayAccessNode(TPosition pos, TAstNodePtr input); + + const TAstNodePtr GetInput() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TAstNodePtr Input; +}; + +enum class EUnaryOperation { + Plus = 0, + Minus = 1, + Not = 2, +}; + +class TUnaryOperationNode : public TAstNode { +public: + TUnaryOperationNode(TPosition pos, EUnaryOperation op, TAstNodePtr expr); + + EUnaryOperation GetOp() const; + + const TAstNodePtr GetExpr() const; + + void Accept(IAstNodeVisitor& visitor) const override; + + EReturnType GetReturnType() const override; + +private: + EUnaryOperation Operation; + TAstNodePtr Expr; +}; + +enum class EBinaryOperation { + Add = 0, + Substract = 1, + Multiply = 2, + Divide = 3, + Modulo = 4, + Less = 5, + LessEqual = 6, + Greater = 7, + GreaterEqual = 8, + Equal = 9, + NotEqual = 10, + And = 11, + Or = 12, +}; + +class TBinaryOperationNode : public TAstNode { +public: + TBinaryOperationNode(TPosition pos, EBinaryOperation op, TAstNodePtr leftExpr, TAstNodePtr rightExpr); + + EBinaryOperation GetOp() const; + + const TAstNodePtr GetLeftExpr() const; + + const TAstNodePtr GetRightExpr() const; + + void Accept(IAstNodeVisitor& visitor) const override; + + EReturnType GetReturnType() const override; + +private: + EBinaryOperation Operation; + TAstNodePtr LeftExpr; + TAstNodePtr RightExpr; +}; + +class TBooleanLiteralNode : public TAstNode { +public: + TBooleanLiteralNode(TPosition pos, bool value); + + bool GetValue() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + bool Value; +}; + +class TNullLiteralNode : public TAstNode { +public: + explicit TNullLiteralNode(TPosition pos); + + void Accept(IAstNodeVisitor& visitor) const override; +}; + +class TStringLiteralNode : public TAstNode { +public: + TStringLiteralNode(TPosition pos, const TString& value); + + const TString& GetValue() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TString Value; +}; + +class TFilterObjectNode : public TAstNode { +public: + explicit TFilterObjectNode(TPosition pos); + + void Accept(IAstNodeVisitor& visitor) const override; +}; + +class TFilterPredicateNode : public TAstNode { +public: + TFilterPredicateNode(TPosition pos, TAstNodePtr predicate, TAstNodePtr input); + + const TAstNodePtr GetPredicate() const; + + const TAstNodePtr GetInput() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TAstNodePtr Predicate; + TAstNodePtr Input; +}; + +enum class EMethodType { + Abs = 0, + Floor = 1, + Ceiling = 2, + Double = 3, + Type = 4, + Size = 5, + KeyValue = 6, +}; + +class TMethodCallNode : public TAstNode { +public: + TMethodCallNode(TPosition pos, EMethodType type, TAstNodePtr input); + + EMethodType GetType() const; + + const TAstNodePtr GetInput() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + EMethodType Type; + TAstNodePtr Input; +}; + +class TStartsWithPredicateNode : public TAstNode { +public: + TStartsWithPredicateNode(TPosition pos, TAstNodePtr input, TAstNodePtr prefix); + + const TAstNodePtr GetInput() const; + + const TAstNodePtr GetPrefix() const; + + EReturnType GetReturnType() const override; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TAstNodePtr Input; + TAstNodePtr Prefix; +}; + +class TExistsPredicateNode : public TAstNode { +public: + TExistsPredicateNode(TPosition pos, TAstNodePtr input); + + const TAstNodePtr GetInput() const; + + EReturnType GetReturnType() const override; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TAstNodePtr Input; +}; + +class TIsUnknownPredicateNode : public TAstNode { +public: + TIsUnknownPredicateNode(TPosition pos, TAstNodePtr input); + + const TAstNodePtr GetInput() const; + + EReturnType GetReturnType() const override; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TAstNodePtr Input; +}; + +class TLikeRegexPredicateNode : public TAstNode { +public: + TLikeRegexPredicateNode(TPosition pos, TAstNodePtr input, NReWrapper::IRePtr&& regex); + + const TAstNodePtr GetInput() const; + + const NReWrapper::IRePtr& GetRegex() const; + + EReturnType GetReturnType() const override; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TAstNodePtr Input; + NReWrapper::IRePtr Regex; +}; + +} diff --git a/yql/essentials/minikql/jsonpath/benchmark/main.cpp b/yql/essentials/minikql/jsonpath/benchmark/main.cpp new file mode 100644 index 0000000000..456a09c399 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/benchmark/main.cpp @@ -0,0 +1,114 @@ +#include <yql/essentials/minikql/dom/json.h> +#include <yql/essentials/minikql/jsonpath/jsonpath.h> + +#include <contrib/ydb/library/yql/minikql/computation/mkql_value_builder.h> +#include <contrib/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h> +#include <contrib/ydb/library/yql/minikql/invoke_builtins/mkql_builtins.h> +#include <contrib/ydb/library/yql/minikql/mkql_mem_info.h> +#include <contrib/ydb/library/yql/minikql/mkql_function_registry.h> +#include <contrib/ydb/library/yql/minikql/mkql_alloc.h> +#include <contrib/ydb/library/yql/minikql/mkql_node.h> + +#include <library/cpp/json/json_value.h> +#include <library/cpp/testing/benchmark/bench.h> + +#include <util/random/fast.h> + +using namespace NJson; + +using namespace NYql; +using namespace NYql::NDom; +using namespace NYql::NUdf; +using namespace NYql::NJsonPath; +using namespace NJson; +using namespace NKikimr::NMiniKQL; + +TString RandomString(ui32 min, ui32 max) { + static TReallyFastRng32 rand(0); + TString result; + const ui32 length = rand.Uniform(min, max + 1); + result.reserve(length); + for (ui32 i = 0; i < length; ++i) { + result.push_back(char(rand.Uniform('a', 'z' + 1))); + } + return result; +} + +TString RandomString(ui32 length) { + return RandomString(length, length); +} + +TString GenerateRandomJson() { + TJsonMap result; + TJsonMap id; + id.InsertValue("id", TJsonValue(RandomString(24))); + id.InsertValue("issueId", TJsonValue(RandomString(24))); + result.InsertValue("_id", std::move(id)); + result.InsertValue("@class", TJsonValue(RandomString(60))); + result.InsertValue("author", TJsonValue(RandomString(10))); + result.InsertValue("transitionId", TJsonValue(RandomString(24))); + TJsonArray comments; + for (ui32 i = 0; i < 30; i++) { + TJsonMap comment; + comment.InsertValue("id", TJsonValue(RandomString(24))); + comment.InsertValue("newText", TJsonValue(RandomString(150))); + comments.AppendValue(std::move(comment)); + } + TJsonMap changes; + changes.InsertValue("comment", std::move(comments)); + result.InsertValue("changes", std::move(changes)); + return result.GetStringRobust(); +} + +const size_t MAX_PARSE_ERRORS = 100; + +#define PREPARE() \ + TIntrusivePtr<IFunctionRegistry> FunctionRegistry(CreateFunctionRegistry(CreateBuiltinRegistry())); \ + TScopedAlloc Alloc(__LOCATION__); \ + TTypeEnvironment Env(Alloc); \ + TMemoryUsageInfo MemInfo("Memory"); \ + THolderFactory HolderFactory(Alloc.Ref(), MemInfo, FunctionRegistry.Get()); \ + TDefaultValueBuilder ValueBuilder(HolderFactory); \ + + +Y_CPU_BENCHMARK(JsonPath, iface) { + PREPARE() + + const TString json = GenerateRandomJson(); + const TUnboxedValue dom = TryParseJsonDom(json, &ValueBuilder); + + for (size_t i = 0; i < iface.Iterations(); i++) { + TIssues issues; + const auto jsonPath = ParseJsonPath("$.'_id'.issueId", issues, MAX_PARSE_ERRORS); + const auto result = ExecuteJsonPath(jsonPath, TValue(dom), TVariablesMap(), &ValueBuilder); + Y_ABORT_UNLESS(!result.IsError()); + } +} + +Y_CPU_BENCHMARK(JsonPathLikeRegexWithCompile, iface) { + PREPARE() + + const TString json = GenerateRandomJson(); + const TUnboxedValue dom = TryParseJsonDom(json, &ValueBuilder); + + for (size_t i = 0; i < iface.Iterations(); i++) { + TIssues issues; + const auto jsonPath = ParseJsonPath("$[*] like_regex \"[0-9]+\"", issues, MAX_PARSE_ERRORS); + const auto result = ExecuteJsonPath(jsonPath, TValue(dom), TVariablesMap(), &ValueBuilder); + Y_ABORT_UNLESS(!result.IsError()); + } +} + +Y_CPU_BENCHMARK(JsonPathLikeRegex, iface) { + PREPARE() + + const TString json = GenerateRandomJson(); + const TUnboxedValue dom = TryParseJsonDom(json, &ValueBuilder); + + TIssues issues; + const auto jsonPath = ParseJsonPath("$[*] like_regex \"[0-9]+\"", issues, MAX_PARSE_ERRORS); + for (size_t i = 0; i < iface.Iterations(); i++) { + const auto result = ExecuteJsonPath(jsonPath, TValue(dom), TVariablesMap(), &ValueBuilder); + Y_ABORT_UNLESS(!result.IsError()); + } +} diff --git a/yql/essentials/minikql/jsonpath/benchmark/ya.make b/yql/essentials/minikql/jsonpath/benchmark/ya.make new file mode 100644 index 0000000000..b26163510f --- /dev/null +++ b/yql/essentials/minikql/jsonpath/benchmark/ya.make @@ -0,0 +1,19 @@ +Y_BENCHMARK(jsonpath-benchmark) + +PEERDIR( + library/cpp/json + yql/essentials/minikql/dom + contrib/ydb/library/yql/minikql/invoke_builtins/llvm14 + yql/essentials/minikql/jsonpath + yql/essentials/public/issue + yql/essentials/public/udf/service/exception_policy + contrib/ydb/library/yql/sql/pg_dummy +) + +YQL_LAST_ABI_VERSION() + +SRCS( + main.cpp +) + +END() diff --git a/yql/essentials/minikql/jsonpath/binary.cpp b/yql/essentials/minikql/jsonpath/binary.cpp new file mode 100644 index 0000000000..8d75a6d3b9 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/binary.cpp @@ -0,0 +1,604 @@ +#include "binary.h" + +#include <yql/essentials/utils/yql_panic.h> + +namespace NYql::NJsonPath { + +bool TArraySubscriptOffsets::IsRange() const { + return ToOffset > 0; +} + +const TStringBuf TJsonPathItem::GetString() const { + return std::get<TStringBuf>(Data); +} + +const TVector<TArraySubscriptOffsets>& TJsonPathItem::GetSubscripts() const { + return std::get<TVector<TArraySubscriptOffsets>>(Data); +} + +const TBinaryOpArgumentsOffset& TJsonPathItem::GetBinaryOpArguments() const { + return std::get<TBinaryOpArgumentsOffset>(Data); +} + +double TJsonPathItem::GetNumber() const { + return std::get<double>(Data); +} + +bool TJsonPathItem::GetBoolean() const { + return std::get<bool>(Data); +} + +TFilterPredicateOffset TJsonPathItem::GetFilterPredicateOffset() const { + return std::get<TFilterPredicateOffset>(Data); +} + +TStartsWithPrefixOffset TJsonPathItem::GetStartsWithPrefixOffset() const { + return std::get<TStartsWithPrefixOffset>(Data); +} + +const NReWrapper::IRePtr& TJsonPathItem::GetRegex() const { + return std::get<NReWrapper::IRePtr>(Data); +} + +TJsonPathReader::TJsonPathReader(const TJsonPathPtr path) + : Path(path) + , InitialPos(0) + , Mode(ReadMode(InitialPos)) +{ +} + +const TJsonPathItem& TJsonPathReader::ReadFirst() { + return ReadFromPos(InitialPos); +} + +const TJsonPathItem& TJsonPathReader::ReadInput(const TJsonPathItem& item) { + YQL_ENSURE(item.InputItemOffset.Defined()); + return ReadFromPos(*item.InputItemOffset); +} + +const TJsonPathItem& TJsonPathReader::ReadFromSubscript(const TArraySubscriptOffsets& subscript) { + return ReadFromPos(subscript.FromOffset); +} + +const TJsonPathItem& TJsonPathReader::ReadToSubscript(const TArraySubscriptOffsets& subscript) { + YQL_ENSURE(subscript.IsRange()); + return ReadFromPos(subscript.ToOffset); +} + +const TJsonPathItem& TJsonPathReader::ReadLeftOperand(const TJsonPathItem& node) { + return ReadFromPos(node.GetBinaryOpArguments().LeftOffset); +} + +const TJsonPathItem& TJsonPathReader::ReadRightOperand(const TJsonPathItem& node) { + return ReadFromPos(node.GetBinaryOpArguments().RightOffset); +} + +const TJsonPathItem& TJsonPathReader::ReadFilterPredicate(const TJsonPathItem& node) { + return ReadFromPos(node.GetFilterPredicateOffset().Offset); +} + +const TJsonPathItem& TJsonPathReader::ReadPrefix(const TJsonPathItem& node) { + return ReadFromPos(node.GetStartsWithPrefixOffset().Offset); +} + +EJsonPathMode TJsonPathReader::GetMode() const { + return Mode; +} + +const TJsonPathItem& TJsonPathReader::ReadFromPos(TUint pos) { + YQL_ENSURE(pos < Path->Size()); + + const auto it = ItemCache.find(pos); + if (it != ItemCache.end()) { + return it->second; + } + + TJsonPathItem& result = ItemCache[pos]; + result.Type = ReadType(pos); + + const auto row = ReadUint(pos); + const auto column = ReadUint(pos); + result.Pos = TPosition(column, row, "jsonpath"); + + switch (result.Type) { + // Items without input + case EJsonPathItemType::FilterObject: + case EJsonPathItemType::NullLiteral: + case EJsonPathItemType::ContextObject: + case EJsonPathItemType::LastArrayIndex: + break; + + case EJsonPathItemType::Variable: + case EJsonPathItemType::StringLiteral: + result.Data = ReadString(pos); + break; + + case EJsonPathItemType::NumberLiteral: + result.Data = ReadDouble(pos); + break; + + case EJsonPathItemType::BooleanLiteral: + result.Data = ReadBool(pos); + break; + + // Items with single input + case EJsonPathItemType::TypeMethod: + case EJsonPathItemType::SizeMethod: + case EJsonPathItemType::KeyValueMethod: + case EJsonPathItemType::AbsMethod: + case EJsonPathItemType::FloorMethod: + case EJsonPathItemType::CeilingMethod: + case EJsonPathItemType::DoubleMethod: + case EJsonPathItemType::WildcardArrayAccess: + case EJsonPathItemType::WildcardMemberAccess: + case EJsonPathItemType::UnaryMinus: + case EJsonPathItemType::UnaryPlus: + case EJsonPathItemType::UnaryNot: + case EJsonPathItemType::IsUnknownPredicate: + case EJsonPathItemType::ExistsPredicate: + result.InputItemOffset = ReadUint(pos); + break; + + case EJsonPathItemType::MemberAccess: + result.Data = ReadString(pos); + result.InputItemOffset = ReadUint(pos); + break; + + case EJsonPathItemType::ArrayAccess: + result.Data = ReadSubscripts(pos); + result.InputItemOffset = ReadUint(pos); + break; + + case EJsonPathItemType::FilterPredicate: + result.Data = TFilterPredicateOffset{ReadUint(pos)}; + result.InputItemOffset = ReadUint(pos); + break; + + case EJsonPathItemType::StartsWithPredicate: + result.Data = TStartsWithPrefixOffset{ReadUint(pos)}; + result.InputItemOffset = ReadUint(pos); + break; + + case EJsonPathItemType::LikeRegexPredicate: { + const auto serializedRegex = ReadString(pos); + + auto regex = NReWrapper::NDispatcher::Deserialize(serializedRegex); + result.Data = std::move(regex); + result.InputItemOffset = ReadUint(pos); + break; + } + + // Items with 2 inputs + case EJsonPathItemType::BinaryAdd: + case EJsonPathItemType::BinarySubstract: + case EJsonPathItemType::BinaryMultiply: + case EJsonPathItemType::BinaryDivide: + case EJsonPathItemType::BinaryModulo: + case EJsonPathItemType::BinaryLess: + case EJsonPathItemType::BinaryLessEqual: + case EJsonPathItemType::BinaryGreater: + case EJsonPathItemType::BinaryGreaterEqual: + case EJsonPathItemType::BinaryEqual: + case EJsonPathItemType::BinaryNotEqual: + case EJsonPathItemType::BinaryAnd: + case EJsonPathItemType::BinaryOr: + TBinaryOpArgumentsOffset data; + data.LeftOffset = ReadUint(pos); + data.RightOffset = ReadUint(pos); + result.Data = data; + break; + } + + return result; +} + +TUint TJsonPathReader::ReadUint(TUint& pos) { + return ReadPOD<TUint>(pos); +} + +double TJsonPathReader::ReadDouble(TUint& pos) { + return ReadPOD<double>(pos); +} + +bool TJsonPathReader::ReadBool(TUint& pos) { + return ReadPOD<bool>(pos); +} + +EJsonPathItemType TJsonPathReader::ReadType(TUint& pos) { + return static_cast<EJsonPathItemType>(ReadUint(pos)); +} + +EJsonPathMode TJsonPathReader::ReadMode(TUint& pos) { + return static_cast<EJsonPathMode>(ReadUint(pos)); +} + +const TStringBuf TJsonPathReader::ReadString(TUint& pos) { + TUint length = ReadUint(pos); + TStringBuf result(Path->Begin() + pos, length); + pos += length; + return result; +} + +TVector<TArraySubscriptOffsets> TJsonPathReader::ReadSubscripts(TUint& pos) { + const auto count = ReadUint(pos); + TVector<TArraySubscriptOffsets> result(count); + + for (size_t i = 0; i < count; i++) { + result[i].FromOffset = ReadUint(pos); + result[i].ToOffset = ReadUint(pos); + } + return result; +} + +void TJsonPathBuilder::VisitRoot(const TRootNode& node) { + // Block structure: + // <(1) TUint> + // Components: + // (1) Must be casted to EJsonPathMode. Jsonpath execution mode + WriteMode(node.GetMode()); + node.GetExpr()->Accept(*this); +} + +void TJsonPathBuilder::VisitContextObject(const TContextObjectNode& node) { + WriteZeroInputItem(EJsonPathItemType::ContextObject, node); +} + +void TJsonPathBuilder::VisitVariable(const TVariableNode& node) { + WriteZeroInputItem(EJsonPathItemType::Variable, node); + WriteString(node.GetName()); +} + +void TJsonPathBuilder::VisitLastArrayIndex(const TLastArrayIndexNode& node) { + WriteZeroInputItem(EJsonPathItemType::LastArrayIndex, node); +} + +void TJsonPathBuilder::VisitNumberLiteral(const TNumberLiteralNode& node) { + WriteZeroInputItem(EJsonPathItemType::NumberLiteral, node); + WriteDouble(node.GetValue()); +} + +void TJsonPathBuilder::VisitMemberAccess(const TMemberAccessNode& node) { + // Block structure: + // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) char[]> <(6) TUint> + // Components: + // (1) Must be casted to EJsonPathItemType. Member access item type + // (2) Row of the position in the source jsonpath + // (3) Column of the position in the source jsonpath + // (4) Length of member name string + // (5) Member name string + // (6) Offset of the input item + WriteType(EJsonPathItemType::MemberAccess); + WritePos(node); + WriteString(node.GetMember()); + + WriteNextPosition(); + node.GetInput()->Accept(*this); +} + +void TJsonPathBuilder::VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) { + WriteSingleInputItem(EJsonPathItemType::WildcardMemberAccess, node, node.GetInput()); +} + +void TJsonPathBuilder::VisitArrayAccess(const TArrayAccessNode& node) { + // Block structure: + // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) pair<TUint, TUint>[]> <(6) TUint> <(7) items> + // Components: + // (1) Must be casted to EJsonPathItemType. Array access item type + // (2) Row of the position in the source jsonpath + // (3) Column of the position in the source jsonpath + // (4) Count of subscripts stored + // (5) Array of pairs with offsets to subscript items. If subscript is a single index, only first element + // is set to it's offset and second is zero. If subscript is a range, both pair elements are valid offsets + // to the elements of range (lower and upper bound). + // (6) Offset of the input item + // (7) Array of subcsripts. For details about encoding see VisitArraySubscript + WriteType(EJsonPathItemType::ArrayAccess); + WritePos(node); + + // (4) Write count of subscripts stored + const auto& subscripts = node.GetSubscripts(); + const auto count = subscripts.size(); + WriteUint(count); + + // (5) We do not know sizes of each subscript. Write array of zeros for offsets + const auto indexStart = CurrentEndPos(); + TVector<TUint> offsets(2 * count); + WriteUintSequence(offsets); + + // (6) Reserve space for input offset to rewrite it later + const auto inputStart = CurrentEndPos(); + WriteFinishPosition(); + + // (7) Write all subscripts and record offset for each of them + for (size_t i = 0; i < count; i++) { + offsets[2 * i] = CurrentEndPos(); + subscripts[i].From->Accept(*this); + + if (subscripts[i].To) { + offsets[2 * i + 1] = CurrentEndPos(); + subscripts[i].To->Accept(*this); + } + } + + // (5) Rewrite offsets with correct values + RewriteUintSequence(offsets, indexStart); + + // (6) Rewrite input offset + RewriteUint(CurrentEndPos(), inputStart); + node.GetInput()->Accept(*this); +} + +void TJsonPathBuilder::VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) { + WriteSingleInputItem(EJsonPathItemType::WildcardArrayAccess, node, node.GetInput()); +} + +void TJsonPathBuilder::VisitUnaryOperation(const TUnaryOperationNode& node) { + EJsonPathItemType type; + switch (node.GetOp()) { + case EUnaryOperation::Plus: + type = EJsonPathItemType::UnaryPlus; + break; + case EUnaryOperation::Minus: + type = EJsonPathItemType::UnaryMinus; + break; + case EUnaryOperation::Not: + type = EJsonPathItemType::UnaryNot; + break; + } + + WriteSingleInputItem(type, node, node.GetExpr()); +} + +void TJsonPathBuilder::VisitBinaryOperation(const TBinaryOperationNode& node) { + EJsonPathItemType type; + switch (node.GetOp()) { + case EBinaryOperation::Add: + type = EJsonPathItemType::BinaryAdd; + break; + case EBinaryOperation::Substract: + type = EJsonPathItemType::BinarySubstract; + break; + case EBinaryOperation::Multiply: + type = EJsonPathItemType::BinaryMultiply; + break; + case EBinaryOperation::Divide: + type = EJsonPathItemType::BinaryDivide; + break; + case EBinaryOperation::Modulo: + type = EJsonPathItemType::BinaryModulo; + break; + case EBinaryOperation::Less: + type = EJsonPathItemType::BinaryLess; + break; + case EBinaryOperation::LessEqual: + type = EJsonPathItemType::BinaryLessEqual; + break; + case EBinaryOperation::Greater: + type = EJsonPathItemType::BinaryGreater; + break; + case EBinaryOperation::GreaterEqual: + type = EJsonPathItemType::BinaryGreaterEqual; + break; + case EBinaryOperation::Equal: + type = EJsonPathItemType::BinaryEqual; + break; + case EBinaryOperation::NotEqual: + type = EJsonPathItemType::BinaryNotEqual; + break; + case EBinaryOperation::And: + type = EJsonPathItemType::BinaryAnd; + break; + case EBinaryOperation::Or: + type = EJsonPathItemType::BinaryOr; + break; + } + + WriteTwoInputsItem(type, node, node.GetLeftExpr(), node.GetRightExpr()); +} + +void TJsonPathBuilder::VisitBooleanLiteral(const TBooleanLiteralNode& node) { + WriteZeroInputItem(EJsonPathItemType::BooleanLiteral, node); + WriteBool(node.GetValue()); +} + +void TJsonPathBuilder::VisitNullLiteral(const TNullLiteralNode& node) { + WriteZeroInputItem(EJsonPathItemType::NullLiteral, node); +} + +void TJsonPathBuilder::VisitStringLiteral(const TStringLiteralNode& node) { + WriteZeroInputItem(EJsonPathItemType::StringLiteral, node); + WriteString(node.GetValue()); +} + +void TJsonPathBuilder::VisitFilterObject(const TFilterObjectNode& node) { + WriteZeroInputItem(EJsonPathItemType::FilterObject, node); +} + +void TJsonPathBuilder::VisitFilterPredicate(const TFilterPredicateNode& node) { + WriteTwoInputsItem(EJsonPathItemType::FilterPredicate, node, node.GetPredicate(), node.GetInput()); +} + +void TJsonPathBuilder::VisitMethodCall(const TMethodCallNode& node) { + EJsonPathItemType type; + switch (node.GetType()) { + case EMethodType::Abs: + type = EJsonPathItemType::AbsMethod; + break; + case EMethodType::Floor: + type = EJsonPathItemType::FloorMethod; + break; + case EMethodType::Ceiling: + type = EJsonPathItemType::CeilingMethod; + break; + case EMethodType::Double: + type = EJsonPathItemType::DoubleMethod; + break; + case EMethodType::Type: + type = EJsonPathItemType::TypeMethod; + break; + case EMethodType::Size: + type = EJsonPathItemType::SizeMethod; + break; + case EMethodType::KeyValue: + type = EJsonPathItemType::KeyValueMethod; + break; + } + + WriteSingleInputItem(type, node, node.GetInput()); +} + +TJsonPathPtr TJsonPathBuilder::ShrinkAndGetResult() { + Result->ShrinkToFit(); + return Result; +} + +void TJsonPathBuilder::VisitStartsWithPredicate(const TStartsWithPredicateNode& node) { + WriteTwoInputsItem(EJsonPathItemType::StartsWithPredicate, node, node.GetPrefix(), node.GetInput()); +} + +void TJsonPathBuilder::VisitExistsPredicate(const TExistsPredicateNode& node) { + WriteSingleInputItem(EJsonPathItemType::ExistsPredicate, node, node.GetInput()); +} + +void TJsonPathBuilder::VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) { + WriteSingleInputItem(EJsonPathItemType::IsUnknownPredicate, node, node.GetInput()); +} + +void TJsonPathBuilder::VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) { + // Block structure: + // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) char[]> <(6) TUint> + // Components: + // (1) Must be casted to EJsonPathItemType. Member access item type + // (2) Row of the position in the source jsonpath + // (3) Column of the position in the source jsonpath + // (4) Length of serialized Hyperscan database + // (5) Serialized Hyperscan database + // (6) Offset of the input item + WriteType(EJsonPathItemType::LikeRegexPredicate); + WritePos(node); + + const TString serializedRegex = node.GetRegex()->Serialize(); + WriteString(serializedRegex); + + WriteNextPosition(); + node.GetInput()->Accept(*this); +} + +void TJsonPathBuilder::WriteZeroInputItem(EJsonPathItemType type, const TAstNode& node) { + // Block structure: + // <(1) TUint> <(2) TUint> <(3) TUint> + // Components: + // (1) Item type + // (2) Row of the position in the source jsonpath + // (3) Column of the position in the source jsonpath + WriteType(type); + WritePos(node); +} + +void TJsonPathBuilder::WriteSingleInputItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr input) { + // Block structure: + // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) item> + // Components: + // (1) Item type + // (2) Row of the position in the source jsonpath + // (3) Column of the position in the source jsonpath + // (4) Offset of the input item + // (5) Input item + WriteZeroInputItem(type, node); + + WriteNextPosition(); + input->Accept(*this); +} + +void TJsonPathBuilder::WriteTwoInputsItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr firstInput, const TAstNodePtr secondInput) { + // Block structure: + // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) TUint> <(6) item> <(7) item> + // Components: + // (1) Item type + // (2) Row of the position in the source jsonpath + // (3) Column of the position in the source jsonpath + // (4) Offset of the first input + // (5) Offset of the second input + // (6) JsonPath item representing first input + // (7) JsonPath item representing right input + WriteZeroInputItem(type, node); + + // (4) and (5) Fill offsets with zeros + const auto indexStart = CurrentEndPos(); + WriteUint(0); + WriteUint(0); + + // (6) Write first input and record it's offset + const auto firstInputStart = CurrentEndPos(); + firstInput->Accept(*this); + + // (7) Write second input and record it's offset + const auto secondInputStart = CurrentEndPos(); + secondInput->Accept(*this); + + // (4) and (5) Rewrite offsets with correct values + RewriteUintSequence({firstInputStart, secondInputStart}, indexStart); +} + +void TJsonPathBuilder::WritePos(const TAstNode& node) { + WriteUint(node.GetPos().Row); + WriteUint(node.GetPos().Column); +} + +void TJsonPathBuilder::WriteType(EJsonPathItemType type) { + WriteUint(static_cast<TUint>(type)); +} + +void TJsonPathBuilder::WriteMode(EJsonPathMode mode) { + WriteUint(static_cast<TUint>(mode)); +} + +void TJsonPathBuilder::WriteNextPosition() { + WriteUint(CurrentEndPos() + sizeof(TUint)); +} + +void TJsonPathBuilder::WriteFinishPosition() { + WriteUint(0); +} + +void TJsonPathBuilder::WriteString(TStringBuf value) { + WriteUint(value.size()); + Result->Append(value.data(), value.size()); +} + +void TJsonPathBuilder::RewriteUintSequence(const TVector<TUint>& sequence, TUint offset) { + const auto length = sequence.size() * sizeof(TUint); + Y_ASSERT(offset + length < CurrentEndPos()); + + MemCopy(Result->Data() + offset, reinterpret_cast<const char*>(sequence.data()), length); +} + +void TJsonPathBuilder::WriteUintSequence(const TVector<TUint>& sequence) { + const auto length = sequence.size() * sizeof(TUint); + Result->Append(reinterpret_cast<const char*>(sequence.data()), length); +} + +void TJsonPathBuilder::RewriteUint(TUint value, TUint offset) { + Y_ASSERT(offset + sizeof(TUint) < CurrentEndPos()); + + MemCopy(Result->Data() + offset, reinterpret_cast<const char*>(&value), sizeof(TUint)); +} + +void TJsonPathBuilder::WriteUint(TUint value) { + WritePOD(value); +} + +void TJsonPathBuilder::WriteDouble(double value) { + WritePOD(value); +} + +void TJsonPathBuilder::WriteBool(bool value) { + WritePOD(value); +} + +TUint TJsonPathBuilder::CurrentEndPos() const { + return Result->Size(); +} + + +} diff --git a/yql/essentials/minikql/jsonpath/binary.h b/yql/essentials/minikql/jsonpath/binary.h new file mode 100644 index 0000000000..7ce2626152 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/binary.h @@ -0,0 +1,275 @@ +#pragma once + +#include "ast_nodes.h" + +#include <yql/essentials/minikql/jsonpath/rewrapper/re.h> + +#include <util/system/unaligned_mem.h> +#include <util/generic/buffer.h> +#include <util/generic/ptr.h> +#include <util/generic/maybe.h> +#include <util/generic/hash.h> + +#include <variant> +#include <type_traits> + +namespace NYql::NJsonPath { + +class TJsonPath : public TSimpleRefCount<TJsonPath>, public TBuffer { +}; + +using TJsonPathPtr = TIntrusivePtr<TJsonPath>; +using TUint = ui64; + +enum class EJsonPathItemType { + MemberAccess = 0, + WildcardMemberAccess = 1, + ArrayAccess = 2, + WildcardArrayAccess = 3, + ContextObject = 4, + NumberLiteral = 5, + LastArrayIndex = 6, + UnaryPlus = 7, + UnaryMinus = 8, + BinaryAdd = 9, + BinarySubstract = 10, + BinaryMultiply = 11, + BinaryDivide = 12, + BinaryModulo = 13, + Variable = 14, + BinaryLess = 15, + BinaryLessEqual = 16, + BinaryGreater = 17, + BinaryGreaterEqual = 18, + BinaryEqual = 19, + BinaryNotEqual = 20, + BinaryAnd = 21, + BinaryOr = 22, + UnaryNot = 23, + BooleanLiteral = 24, + NullLiteral = 25, + StringLiteral = 26, + FilterObject = 27, + FilterPredicate = 28, + AbsMethod = 29, + FloorMethod = 30, + CeilingMethod = 31, + DoubleMethod = 32, + TypeMethod = 33, + SizeMethod = 34, + KeyValueMethod = 35, + StartsWithPredicate = 36, + ExistsPredicate = 37, + IsUnknownPredicate = 38, + LikeRegexPredicate = 39, +}; + +struct TArraySubscriptOffsets { + TUint FromOffset = 0; + TUint ToOffset = 0; + + bool IsRange() const; +}; + +struct TBinaryOpArgumentsOffset { + TUint LeftOffset = 0; + TUint RightOffset = 0; +}; + +struct TFilterPredicateOffset { + TUint Offset = 0; +}; + +struct TStartsWithPrefixOffset { + TUint Offset = 0; +}; + +struct TJsonPathItem { + // Position in the source jsonpath + TPosition Pos; + + // Type of item + EJsonPathItemType Type; + + // Offset in buffer pointing to the input item + TMaybe<TUint> InputItemOffset; + + // Data associated with this item. To determine which variant + // type was filled callee must examine Type field. + // WARNING: Some item types do not fill Data field at all! You must + // check item type before accesing this field. + std::variant< + TStringBuf, + TVector<TArraySubscriptOffsets>, + TBinaryOpArgumentsOffset, + TFilterPredicateOffset, + TStartsWithPrefixOffset, + NReWrapper::IRePtr, + double, + bool + > Data; + + const TStringBuf GetString() const; + const TVector<TArraySubscriptOffsets>& GetSubscripts() const; + const TBinaryOpArgumentsOffset& GetBinaryOpArguments() const; + const NReWrapper::IRePtr& GetRegex() const; + double GetNumber() const; + bool GetBoolean() const; + TFilterPredicateOffset GetFilterPredicateOffset() const; + TStartsWithPrefixOffset GetStartsWithPrefixOffset() const; + + // Pointer to the binary representation of jsonpath. + // We do not use this directly but Data field can reference to it. + // For example if this item is a string then Data contains TStringBuf + // pointing to some part inside buffer. We must ensure that it is not + // destructed while this item is alive so we keep shared pointer to it. + const TJsonPathPtr JsonPath; +}; + +class TJsonPathBuilder : public IAstNodeVisitor { +public: + TJsonPathBuilder() + : Result(new TJsonPath()) + { + } + + void VisitRoot(const TRootNode& node) override; + + void VisitContextObject(const TContextObjectNode& node) override; + + void VisitVariable(const TVariableNode& node) override; + + void VisitLastArrayIndex(const TLastArrayIndexNode& node) override; + + void VisitNumberLiteral(const TNumberLiteralNode& node) override; + + void VisitMemberAccess(const TMemberAccessNode& node) override; + + void VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) override; + + void VisitArrayAccess(const TArrayAccessNode& node) override; + + void VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) override; + + void VisitUnaryOperation(const TUnaryOperationNode& node) override; + + void VisitBinaryOperation(const TBinaryOperationNode& node) override; + + void VisitBooleanLiteral(const TBooleanLiteralNode& node) override; + + void VisitNullLiteral(const TNullLiteralNode& node) override; + + void VisitStringLiteral(const TStringLiteralNode& node) override; + + void VisitFilterObject(const TFilterObjectNode& node) override; + + void VisitFilterPredicate(const TFilterPredicateNode& node) override; + + void VisitMethodCall(const TMethodCallNode& node) override; + + void VisitStartsWithPredicate(const TStartsWithPredicateNode& node) override; + + void VisitExistsPredicate(const TExistsPredicateNode& node) override; + + void VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) override; + + void VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) override; + + TJsonPathPtr ShrinkAndGetResult(); + +private: + void WriteZeroInputItem(EJsonPathItemType type, const TAstNode& node); + + void WriteSingleInputItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr input); + + void WriteTwoInputsItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr firstInput, const TAstNodePtr secondInput); + + void WritePos(const TAstNode& node); + + void WriteType(EJsonPathItemType type); + + void WriteMode(EJsonPathMode mode); + + void WriteNextPosition(); + + void WriteFinishPosition(); + + void WriteString(TStringBuf value); + + void RewriteUintSequence(const TVector<TUint>& sequence, TUint offset); + + void WriteUintSequence(const TVector<TUint>& sequence); + + void RewriteUint(TUint value, TUint offset); + + void WriteUint(TUint value); + + void WriteDouble(double value); + + void WriteBool(bool value); + + template <typename T> + void WritePOD(const T& value) { + static_assert(std::is_pod_v<T>, "Type must be POD"); + Result->Append(reinterpret_cast<const char*>(&value), sizeof(T)); + } + + TUint CurrentEndPos() const; + + TJsonPathPtr Result; +}; + +class TJsonPathReader { +public: + TJsonPathReader(const TJsonPathPtr path); + + const TJsonPathItem& ReadFirst(); + + const TJsonPathItem& ReadInput(const TJsonPathItem& node); + + const TJsonPathItem& ReadFromSubscript(const TArraySubscriptOffsets& subscript); + + const TJsonPathItem& ReadToSubscript(const TArraySubscriptOffsets& subscript); + + const TJsonPathItem& ReadLeftOperand(const TJsonPathItem& node); + + const TJsonPathItem& ReadRightOperand(const TJsonPathItem& node); + + const TJsonPathItem& ReadFilterPredicate(const TJsonPathItem& node); + + const TJsonPathItem& ReadPrefix(const TJsonPathItem& node); + + EJsonPathMode GetMode() const; + +private: + const TJsonPathItem& ReadFromPos(TUint pos); + + TUint ReadUint(TUint& pos); + + double ReadDouble(TUint& pos); + + bool ReadBool(TUint& pos); + + EJsonPathItemType ReadType(TUint& pos); + + EJsonPathMode ReadMode(TUint& pos); + + const TStringBuf ReadString(TUint& pos); + + TVector<TArraySubscriptOffsets> ReadSubscripts(TUint& pos); + + template <typename T> + T ReadPOD(TUint& pos) { + static_assert(std::is_pod_v<T>, "Type must be POD"); + T value = ReadUnaligned<T>(Path->Begin() + pos); + pos += sizeof(T); + return std::move(value); + } + + const TJsonPathPtr Path; + TUint InitialPos; + EJsonPathMode Mode; + THashMap<TUint, TJsonPathItem> ItemCache; +}; + +} diff --git a/yql/essentials/minikql/jsonpath/executor.cpp b/yql/essentials/minikql/jsonpath/executor.cpp new file mode 100644 index 0000000000..db2ea213f3 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/executor.cpp @@ -0,0 +1,1064 @@ +#include "executor.h" +#include "parse_double.h" + +#include <yql/essentials/core/issue/protos/issue_id.pb.h> +#include <yql/essentials/minikql/dom/node.h> + +#include <util/generic/scope.h> +#include <util/generic/maybe.h> +#include <util/system/compiler.h> + +#include <cmath> + +namespace NYql::NJsonPath { + +using namespace NJson; +using namespace NUdf; +using namespace NDom; + +namespace { + +bool IsObjectOrArray(const TValue& value) { + return value.IsArray() || value.IsObject(); +} + +TIssue MakeError(TPosition pos, TIssueCode code, const TStringBuf message) { + TIssue error(pos, message); + error.SetCode(code, TSeverityIds::S_ERROR); + return error; +} + +TIssue MakeError(const TJsonPathItem& item, TIssueCode code, const TStringBuf message) { + return MakeError(item.Pos, code, message); +} + +} + +TResult::TResult(TJsonNodes&& nodes) + : Result(std::move(nodes)) +{ +} + +TResult::TResult(const TJsonNodes& nodes) + : Result(nodes) +{ +} + +TResult::TResult(TIssue&& issue) + : Result(std::move(issue)) +{ +} + +const TJsonNodes& TResult::GetNodes() const { + return std::get<TJsonNodes>(Result); +} + +TJsonNodes& TResult::GetNodes() { + return std::get<TJsonNodes>(Result); +} + +const TIssue& TResult::GetError() const { + return std::get<TIssue>(Result); +} + +bool TResult::IsError() const { + return std::holds_alternative<TIssue>(Result); +} + +TExecutor::TExecutor( + const TJsonPathPtr path, + const TJsonNodes& input, + const TVariablesMap& variables, + const IValueBuilder* valueBuilder) + : Reader(path) + , Input(input) + , Variables(variables) + , ValueBuilder(valueBuilder) +{ +} + +bool TExecutor::IsZero(double value) { + return -EPSILON <= value && value <= EPSILON; +} + +bool TExecutor::IsLess(double a, double b) { + return (b - a) > EPSILON; +} + +bool TExecutor::IsGreater(double a, double b) { + return (a - b) > EPSILON; +} + +bool TExecutor::IsEqual(double a, double b) { + return IsZero(a - b); +} + +bool TExecutor::IsStrict() const { + return Reader.GetMode() == EJsonPathMode::Strict; +} + +bool TExecutor::IsLax() const { + return Reader.GetMode() == EJsonPathMode::Lax; +} + +TResult TExecutor::Execute() { + return Execute(Reader.ReadFirst()); +} + +TResult TExecutor::Execute(const TJsonPathItem& item) { + switch (item.Type) { + case EJsonPathItemType::MemberAccess: + return MemberAccess(item); + case EJsonPathItemType::WildcardMemberAccess: + return WildcardMemberAccess(item); + case EJsonPathItemType::ContextObject: + return ContextObject(); + case EJsonPathItemType::Variable: + return Variable(item); + case EJsonPathItemType::NumberLiteral: + return NumberLiteral(item); + case EJsonPathItemType::ArrayAccess: + return ArrayAccess(item); + case EJsonPathItemType::WildcardArrayAccess: + return WildcardArrayAccess(item); + case EJsonPathItemType::LastArrayIndex: + return LastArrayIndex(item); + case EJsonPathItemType::UnaryMinus: + case EJsonPathItemType::UnaryPlus: + return UnaryArithmeticOp(item); + case EJsonPathItemType::BinaryAdd: + case EJsonPathItemType::BinarySubstract: + case EJsonPathItemType::BinaryMultiply: + case EJsonPathItemType::BinaryDivide: + case EJsonPathItemType::BinaryModulo: + return BinaryArithmeticOp(item); + case EJsonPathItemType::BinaryAnd: + case EJsonPathItemType::BinaryOr: + return BinaryLogicalOp(item); + case EJsonPathItemType::UnaryNot: + return UnaryLogicalOp(item); + case EJsonPathItemType::BooleanLiteral: + return BooleanLiteral(item); + case EJsonPathItemType::NullLiteral: + return NullLiteral(); + case EJsonPathItemType::StringLiteral: + return StringLiteral(item); + case EJsonPathItemType::FilterObject: + return FilterObject(item); + case EJsonPathItemType::FilterPredicate: + return FilterPredicate(item); + case EJsonPathItemType::BinaryLess: + case EJsonPathItemType::BinaryLessEqual: + case EJsonPathItemType::BinaryGreater: + case EJsonPathItemType::BinaryGreaterEqual: + case EJsonPathItemType::BinaryEqual: + case EJsonPathItemType::BinaryNotEqual: + return CompareOp(item); + case EJsonPathItemType::AbsMethod: + case EJsonPathItemType::FloorMethod: + case EJsonPathItemType::CeilingMethod: + return NumericMethod(item); + case EJsonPathItemType::DoubleMethod: + return DoubleMethod(item); + case EJsonPathItemType::TypeMethod: + return TypeMethod(item); + case EJsonPathItemType::SizeMethod: + return SizeMethod(item); + case EJsonPathItemType::KeyValueMethod: + return KeyValueMethod(item); + case EJsonPathItemType::StartsWithPredicate: + return StartsWithPredicate(item); + case EJsonPathItemType::IsUnknownPredicate: + return IsUnknownPredicate(item); + case EJsonPathItemType::ExistsPredicate: + return ExistsPredicate(item); + case EJsonPathItemType::LikeRegexPredicate: + return LikeRegexPredicate(item); + } +} + +TResult TExecutor::ContextObject() { + return Input; +} + +TResult TExecutor::Variable(const TJsonPathItem& item) { + const auto it = Variables.find(item.GetString()); + if (it == Variables.end()) { + return MakeError(item, TIssuesIds::JSONPATH_UNDEFINED_VARIABLE, TStringBuilder() << "Undefined variable '" << item.GetString() << "'"); + } + + return TJsonNodes({it->second}); +} + +TResult TExecutor::LastArrayIndex(const TJsonPathItem& item) { + if (ArraySubscriptSource.empty()) { + return MakeError(item, TIssuesIds::JSONPATH_LAST_OUTSIDE_OF_ARRAY_SUBSCRIPT, "'last' is only allowed inside array subscripts"); + } + + const auto& array = ArraySubscriptSource.top(); + const i64 arraySize = array.GetSize(); + + // NOTE: For empty arrays `last` equals `-1`. This is intended, PostgreSQL 12 has the same behaviour + return TJsonNodes({TValue(MakeDouble(static_cast<double>(arraySize - 1)))}); +} + +TResult TExecutor::NumberLiteral(const TJsonPathItem& item) { + return TJsonNodes({TValue(MakeDouble(item.GetNumber()))}); +} + +TResult TExecutor::MemberAccess(const TJsonPathItem& item) { + const auto input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + TJsonNodes result; + for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { + if (!node.IsObject()) { + if (IsStrict()) { + return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_OBJECT, "Expected object"); + } else { + continue; + } + } + + if (const auto payload = node.Lookup(item.GetString())) { + result.push_back(*payload); + continue; + } + + if (IsStrict()) { + return MakeError(item, TIssuesIds::JSONPATH_MEMBER_NOT_FOUND, "Member not found"); + } + } + + return std::move(result); +} + +TResult TExecutor::WildcardMemberAccess(const TJsonPathItem& item) { + const auto input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + TJsonNodes result; + for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { + if (!node.IsObject()) { + if (IsStrict()) { + return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_OBJECT, "Expected object"); + } else { + continue; + } + } + + TValue key; + TValue value; + auto it = node.GetObjectIterator(); + while (it.Next(key, value)) { + result.push_back(value); + } + } + + return std::move(result); +} + +TMaybe<TIssue> TExecutor::EnsureSingleSubscript(TPosition pos, const TJsonNodes& index, i64& result) { + if (index.size() != 1) { + return MakeError(pos, TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX, "Expected single number item for array index"); + } + + const auto& indexValue = index[0]; + if (!indexValue.IsNumber()) { + return MakeError(pos, TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX, "Array index must be number"); + } + + result = static_cast<i64>(std::floor(indexValue.GetNumber())); + return Nothing(); +} + +TMaybe<TIssue> TExecutor::EnsureArraySubscripts(const TJsonPathItem& item, TVector<TArraySubscript>& result) { + for (const auto& subscript : item.GetSubscripts()) { + const auto& fromItem = Reader.ReadFromSubscript(subscript); + const auto fromResult = Execute(fromItem); + if (fromResult.IsError()) { + return fromResult.GetError(); + } + + i64 fromIndex = 0; + TMaybe<TIssue> error = EnsureSingleSubscript(fromItem.Pos, fromResult.GetNodes(), fromIndex); + if (error) { + return error; + } + + if (!subscript.IsRange()) { + result.emplace_back(fromIndex, fromItem.Pos); + continue; + } + + const auto& toItem = Reader.ReadToSubscript(subscript); + const auto toResult = Execute(toItem); + if (toResult.IsError()) { + return toResult.GetError(); + } + + i64 toIndex = 0; + error = EnsureSingleSubscript(toItem.Pos, toResult.GetNodes(), toIndex); + if (error) { + return error; + } + + result.emplace_back(fromIndex, fromItem.Pos, toIndex, toItem.Pos); + } + return Nothing(); +} + +TResult TExecutor::ArrayAccess(const TJsonPathItem& item) { + const auto input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + TJsonNodes result; + for (const auto& node : OptionalArrayWrapNodes(input.GetNodes())) { + if (!node.IsArray()) { + return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_ARRAY, "Expected array"); + } + + ArraySubscriptSource.push(node); + Y_DEFER { + ArraySubscriptSource.pop(); + }; + + // Check for "hard" errors in array subscripts. These are forbidden even in lax mode + // NOTE: We intentionally execute subscripts expressions for each array in the input + // because they can contain `last` keyword which value is different for each array + TVector<TArraySubscript> subscripts; + TMaybe<TIssue> error = EnsureArraySubscripts(item, subscripts); + if (error) { + return std::move(*error); + } + + const ui64 arraySize = node.GetSize(); + for (const auto& idx : subscripts) { + // Check bounds for first subscript + if (idx.GetFrom() < 0 || idx.GetFrom() >= static_cast<i64>(arraySize)) { + if (IsStrict()) { + return MakeError(idx.GetFromPos(), TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS, "Array index out of bounds"); + } else { + continue; + } + } + + // If there is no second subcripts, just return corresponding array element + if (!idx.IsRange()) { + result.push_back(node.GetElement(idx.GetFrom())); + continue; + } + + // Check bounds for second subscript + if (idx.GetTo() < 0 || idx.GetTo() >= static_cast<i64>(arraySize)) { + if (IsStrict()) { + return MakeError(idx.GetToPos(), TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS, "Array index out of bounds"); + } else { + continue; + } + } + + // In strict mode invalid ranges are forbidden + if (idx.GetFrom() > idx.GetTo() && IsStrict()) { + return MakeError(idx.GetFromPos(), TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX_RANGE, "Range lower bound is greater than upper bound"); + } + + for (i64 i = idx.GetFrom(); i <= idx.GetTo(); i++) { + result.push_back(node.GetElement(i)); + } + } + } + return std::move(result); +} + +TResult TExecutor::WildcardArrayAccess(const TJsonPathItem& item) { + const auto input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + TJsonNodes result; + for (const auto& node : OptionalArrayWrapNodes(input.GetNodes())) { + if (!node.IsArray()) { + return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_ARRAY, "Expected array"); + } + + auto it = node.GetArrayIterator(); + TValue value; + while (it.Next(value)) { + result.push_back(value); + } + } + return std::move(result); +} + +TResult TExecutor::UnaryArithmeticOp(const TJsonPathItem& item) { + const auto& operandItem = Reader.ReadInput(item); + const auto operandsResult = Execute(operandItem); + if (operandsResult.IsError()) { + return operandsResult; + } + + const auto& operands = operandsResult.GetNodes(); + TJsonNodes result; + result.reserve(operands.size()); + for (const auto& operand : operands) { + if (!operand.IsNumber()) { + return MakeError( + operandItem, TIssuesIds::JSONPATH_INVALID_UNARY_OPERATION_ARGUMENT_TYPE, + TStringBuilder() << "Unsupported type for unary operations" + ); + } + + if (item.Type == EJsonPathItemType::UnaryPlus) { + result.push_back(operand); + continue; + } + + const auto value = operand.GetNumber(); + result.push_back(TValue(MakeDouble(-value))); + } + + return std::move(result); +} + +TMaybe<TIssue> TExecutor::EnsureBinaryArithmeticOpArgument(TPosition pos, const TJsonNodes& nodes, double& result) { + if (nodes.size() != 1) { + return MakeError(pos, TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT, "Expected exactly 1 item as an operand for binary operation"); + } + + const auto& value = nodes[0]; + if (!value.IsNumber()) { + return MakeError( + pos, TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT_TYPE, + TStringBuilder() << "Unsupported type for binary operations" + ); + } + + result = value.GetNumber(); + return Nothing(); +} + +TResult TExecutor::BinaryArithmeticOp(const TJsonPathItem& item) { + const auto& leftItem = Reader.ReadLeftOperand(item); + const auto leftResult = Execute(leftItem); + if (leftResult.IsError()) { + return leftResult; + } + + double left = 0; + TMaybe<TIssue> error = EnsureBinaryArithmeticOpArgument(leftItem.Pos, leftResult.GetNodes(), left); + if (error) { + return std::move(*error); + } + + const auto& rightItem = Reader.ReadRightOperand(item); + const auto rightResult = Execute(rightItem); + if (rightResult.IsError()) { + return rightResult; + } + + double right = 0; + error = EnsureBinaryArithmeticOpArgument(rightItem.Pos, rightResult.GetNodes(), right); + if (error) { + return std::move(*error); + } + + double result = 0; + switch (item.Type) { + case EJsonPathItemType::BinaryAdd: + result = left + right; + break; + case EJsonPathItemType::BinarySubstract: + result = left - right; + break; + case EJsonPathItemType::BinaryMultiply: + result = left * right; + break; + case EJsonPathItemType::BinaryDivide: + if (IsZero(right)) { + return MakeError(rightItem, TIssuesIds::JSONPATH_DIVISION_BY_ZERO, "Division by zero"); + } + result = left / right; + break; + case EJsonPathItemType::BinaryModulo: + if (IsZero(right)) { + return MakeError(rightItem, TIssuesIds::JSONPATH_DIVISION_BY_ZERO, "Division by zero"); + } + result = std::fmod(left, right); + break; + default: + YQL_ENSURE(false, "Expected binary arithmetic operation"); + } + + if (Y_UNLIKELY(std::isinf(result))) { + return MakeError(item, TIssuesIds::JSONPATH_BINARY_OPERATION_RESULT_INFINITY, "Binary operation result is infinity"); + } + + return TJsonNodes({TValue(MakeDouble(result))}); +} + +TMaybe<TIssue> TExecutor::EnsureLogicalOpArgument(TPosition pos, const TJsonNodes& nodes, TMaybe<bool>& result) { + if (nodes.size() != 1) { + return MakeError(pos, TIssuesIds::JSONPATH_INVALID_LOGICAL_OPERATION_ARGUMENT, "Expected exactly 1 item as an operand for logical operation"); + } + + const auto& value = nodes[0]; + if (value.IsNull()) { + result = Nothing(); + } else if (value.IsBool()) { + result = value.GetBool(); + } else { + return MakeError(pos, TIssuesIds::JSONPATH_INVALID_LOGICAL_OPERATION_ARGUMENT, "Unsupported type for logical operation"); + } + + return Nothing(); +} + +TResult TExecutor::BinaryLogicalOp(const TJsonPathItem& item) { + const auto& leftItem = Reader.ReadLeftOperand(item); + const auto leftResult = Execute(leftItem); + if (leftResult.IsError()) { + return leftResult; + } + + TMaybe<bool> left; + TMaybe<TIssue> error = EnsureLogicalOpArgument(leftItem.Pos, leftResult.GetNodes(), left); + if (error) { + return std::move(*error); + } + + const auto& rightItem = Reader.ReadRightOperand(item); + const auto rightResult = Execute(rightItem); + if (rightResult.IsError()) { + return rightResult; + } + + TMaybe<bool> right; + error = EnsureLogicalOpArgument(rightItem.Pos, rightResult.GetNodes(), right); + if (error) { + return std::move(*error); + } + + switch (item.Type) { + case EJsonPathItemType::BinaryAnd: { + /* + AND truth table (taken from SQL JSON standard) + + | && | true | false | null | + | ----- | ----- | ----- | ----- | + | true | true | false | null | + | false | false | false | false | + | null | null | false | null | + */ + if (left.Defined() && right.Defined()) { + return TJsonNodes({TValue(MakeBool(*left && *right))}); + } + + const bool falseVsNull = !left.GetOrElse(true) && !right.Defined(); + const bool nullVsFalse = !right.GetOrElse(true) && !left.Defined(); + if (falseVsNull || nullVsFalse) { + return TJsonNodes({TValue(MakeBool(false))}); + } + return TJsonNodes({TValue(MakeEntity())}); + } + case EJsonPathItemType::BinaryOr: { + /* + OR truth table (taken from SQL JSON standard) + + | || | true | false | null | + | ----- | ----- | ----- | ----- | + | true | true | true | true | + | false | true | false | null | + | null | true | null | null | + */ + if (left.Defined() && right.Defined()) { + return TJsonNodes({TValue(MakeBool(*left || *right))}); + } + + const bool trueVsNull = left.GetOrElse(false) && !right.Defined(); + const bool nullVsTrue = right.GetOrElse(false) && !left.Defined(); + if (trueVsNull || nullVsTrue) { + return TJsonNodes({TValue(MakeBool(true))}); + } + return TJsonNodes({TValue(MakeEntity())}); + } + default: + YQL_ENSURE(false, "Expected binary logical operation"); + } +} + +TResult TExecutor::UnaryLogicalOp(const TJsonPathItem& item) { + /* + NOT truth table (taken from SQL JSON standard) + + | x | !x | + | ----- | ----- | + | true | false | + | false | true | + | null | null | + */ + const auto& operandItem = Reader.ReadInput(item); + const auto operandResult = Execute(operandItem); + if (operandResult.IsError()) { + return operandResult; + } + + TMaybe<bool> operand; + TMaybe<TIssue> error = EnsureLogicalOpArgument(operandItem.Pos, operandResult.GetNodes(), operand); + if (error) { + return std::move(*error); + } + + if (!operand.Defined()) { + return TJsonNodes({TValue(MakeEntity())}); + } + + return TJsonNodes({TValue(MakeBool(!(*operand)))}); +} + +TResult TExecutor::BooleanLiteral(const TJsonPathItem& item) { + return TJsonNodes({TValue(MakeBool(item.GetBoolean()))}); +} + +TResult TExecutor::NullLiteral() { + return TJsonNodes({TValue(MakeEntity())}); +} + +TResult TExecutor::StringLiteral(const TJsonPathItem& item) { + return TJsonNodes({TValue(MakeString(item.GetString(), ValueBuilder))}); +} + +TMaybe<bool> TExecutor::CompareValues(const TValue& left, const TValue& right, EJsonPathItemType operation) { + if (IsObjectOrArray(left) || IsObjectOrArray(right)) { + // Comparisons of objects and arrays are prohibited + return Nothing(); + } + + if (left.IsNull() && right.IsNull()) { + // null == null is true, but all other comparisons are false + return operation == EJsonPathItemType::BinaryEqual; + } + + if (left.IsNull() || right.IsNull()) { + // All operations between null and non-null are false + return false; + } + + auto doCompare = [&operation](const auto& left, const auto& right) { + switch (operation) { + case EJsonPathItemType::BinaryEqual: + return left == right; + case EJsonPathItemType::BinaryNotEqual: + return left != right; + case EJsonPathItemType::BinaryLess: + return left < right; + case EJsonPathItemType::BinaryLessEqual: + return left <= right; + case EJsonPathItemType::BinaryGreater: + return left > right; + case EJsonPathItemType::BinaryGreaterEqual: + return left >= right; + default: + YQL_ENSURE(false, "Expected compare operation"); + } + }; + + if (left.IsBool() && right.IsBool()) { + return doCompare(left.GetBool(), right.GetBool()); + } else if (left.IsString() && right.IsString()) { + // NOTE: Strings are compared as byte arrays. + // YQL does the same thing for UTF-8 strings and according to SQL/JSON + // standard JsonPath must use the same semantics. + // + // However this is not correct in logical meaning. Let us consider strings: + // - U+00e9 (LATIN SMALL LETTER E WITH ACUTE), 'é' + // - U+0065 (LATIN SMALL LETTER E) U+0301 (COMBINING ACUTE ACCENT), `é` + // Even though these two strings are different byte sequences, they are identical + // from UTF-8 perspective. + return doCompare(left.GetString(), right.GetString()); + } + + if (!left.IsNumber() || !right.IsNumber()) { + return Nothing(); + } + + const auto leftNumber = left.GetNumber(); + const auto rightNumber = right.GetNumber(); + switch (operation) { + case EJsonPathItemType::BinaryEqual: + return IsEqual(leftNumber, rightNumber); + case EJsonPathItemType::BinaryNotEqual: + return !IsEqual(leftNumber, rightNumber); + case EJsonPathItemType::BinaryLess: + return IsLess(leftNumber, rightNumber); + case EJsonPathItemType::BinaryLessEqual: + return !IsGreater(leftNumber, rightNumber); + case EJsonPathItemType::BinaryGreater: + return IsGreater(leftNumber, rightNumber); + case EJsonPathItemType::BinaryGreaterEqual: + return !IsLess(leftNumber, rightNumber); + default: + YQL_ENSURE(false, "Expected compare operation"); + } +} + +TResult TExecutor::CompareOp(const TJsonPathItem& item) { + const auto& leftItem = Reader.ReadLeftOperand(item); + const auto leftResult = Execute(leftItem); + if (leftResult.IsError()) { + return TJsonNodes({TValue(MakeEntity())}); + } + + const auto& rightItem = Reader.ReadRightOperand(item); + const auto rightResult = Execute(rightItem); + if (rightResult.IsError()) { + return TJsonNodes({TValue(MakeEntity())}); + } + + const auto leftNodes = OptionalUnwrapArrays(leftResult.GetNodes()); + const auto rightNodes = OptionalUnwrapArrays(rightResult.GetNodes()); + bool error = false; + bool found = false; + for (const auto& left : leftNodes) { + for (const auto& right : rightNodes) { + const auto result = CompareValues(left, right, item.Type); + if (!result.Defined()) { + error = true; + } else { + found |= *result; + } + + if (IsLax() && (error || found)) { + break; + } + } + + if (IsLax() && (error || found)) { + break; + } + } + + if (error) { + return TJsonNodes({TValue(MakeEntity())}); + } + return TJsonNodes({TValue(MakeBool(found))}); +} + +TResult TExecutor::FilterObject(const TJsonPathItem& item) { + if (CurrentFilterObject.empty()) { + return MakeError(item, TIssuesIds::JSONPATH_FILTER_OBJECT_OUTSIDE_OF_FILTER, "'@' is only allowed inside filters"); + } + + return TJsonNodes({CurrentFilterObject.top()}); +} + +TResult TExecutor::FilterPredicate(const TJsonPathItem& item) { + const auto input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + const auto& predicateItem = Reader.ReadFilterPredicate(item); + TJsonNodes result; + for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { + CurrentFilterObject.push(node); + Y_DEFER { + CurrentFilterObject.pop(); + }; + + const auto predicateResult = Execute(predicateItem); + if (predicateResult.IsError()) { + continue; + } + + const auto& predicateNodes = predicateResult.GetNodes(); + if (predicateNodes.size() != 1) { + continue; + } + + const auto& value = predicateNodes[0]; + if (value.IsBool() && value.GetBool()) { + result.push_back(node); + continue; + } + } + return std::move(result); +} + +TResult TExecutor::NumericMethod(const TJsonPathItem& item) { + const auto& input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + TJsonNodes result; + for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { + if (!node.IsNumber()) { + return MakeError(item, TIssuesIds::JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT, "Unsupported type for numeric method"); + } + + double applied = node.GetNumber(); + switch (item.Type) { + case EJsonPathItemType::AbsMethod: + applied = std::fabs(applied); + break; + case EJsonPathItemType::FloorMethod: + applied = std::floor(applied); + break; + case EJsonPathItemType::CeilingMethod: + applied = std::ceil(applied); + break; + default: + YQL_ENSURE(false, "Expected numeric method"); + } + result.push_back(TValue(MakeDouble(applied))); + } + return std::move(result); +} + +TResult TExecutor::DoubleMethod(const TJsonPathItem& item) { + const auto& input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + TJsonNodes result; + for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { + if (!node.IsString()) { + return MakeError(item, TIssuesIds::JSONPATH_INVALID_DOUBLE_METHOD_ARGUMENT, "Unsupported type for double() method"); + } + + const double parsed = ParseDouble(node.GetString()); + if (std::isnan(parsed)) { + return MakeError(item, TIssuesIds::JSONPATH_INVALID_NUMBER_STRING, "Error parsing number from string"); + } + + if (std::isinf(parsed)) { + return MakeError(item, TIssuesIds::JSONPATH_INFINITE_NUMBER_STRING, "Parsed number is infinity"); + } + + result.push_back(TValue(MakeDouble(parsed))); + } + return std::move(result); +} + +TResult TExecutor::TypeMethod(const TJsonPathItem& item) { + const auto& input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + TJsonNodes result; + for (const auto& node : input.GetNodes()) { + TStringBuf type; + switch (node.GetType()) { + case EValueType::Null: + type = "null"; + break; + case EValueType::Bool: + type = "boolean"; + break; + case EValueType::Number: + type = "number"; + break; + case EValueType::String: + type = "string"; + break; + case EValueType::Array: + type = "array"; + break; + case EValueType::Object: + type = "object"; + break; + } + result.push_back(TValue(MakeString(type, ValueBuilder))); + } + return std::move(result); +} + +TResult TExecutor::SizeMethod(const TJsonPathItem& item) { + const auto& input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + TJsonNodes result; + for (const auto& node : input.GetNodes()) { + ui64 size = 1; + if (node.IsArray()) { + size = node.GetSize(); + } + result.push_back(TValue(MakeDouble(static_cast<double>(size)))); + } + return std::move(result); +} + +TResult TExecutor::KeyValueMethod(const TJsonPathItem& item) { + const auto& input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + TJsonNodes result; + TPair row[2]; + TPair& nameEntry = row[0]; + TPair& valueEntry = row[1]; + for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { + if (!node.IsObject()) { + return MakeError(item, TIssuesIds::JSONPATH_INVALID_KEYVALUE_METHOD_ARGUMENT, "Unsupported type for keyvalue() method"); + } + + TValue key; + TValue value; + auto it = node.GetObjectIterator(); + while (it.Next(key, value)) { + nameEntry.first = MakeString("name", ValueBuilder); + nameEntry.second = key.ConvertToUnboxedValue(ValueBuilder); + + valueEntry.first = MakeString("value", ValueBuilder); + valueEntry.second = value.ConvertToUnboxedValue(ValueBuilder); + + result.push_back(TValue(MakeDict(row, 2))); + } + } + return std::move(result); +} + +TResult TExecutor::StartsWithPredicate(const TJsonPathItem& item) { + const auto& input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + + const auto& inputNodes = input.GetNodes(); + if (inputNodes.size() != 1) { + return MakeError(item, TIssuesIds::JSONPATH_INVALID_STARTS_WITH_ARGUMENT, "Expected exactly 1 item as input argument for starts with predicate"); + } + + const auto& inputString = inputNodes[0]; + if (!inputString.IsString()) { + return MakeError(item, TIssuesIds::JSONPATH_INVALID_STARTS_WITH_ARGUMENT, "Type of input argument for starts with predicate must be string"); + } + + const auto prefix = Execute(Reader.ReadPrefix(item)); + if (prefix.IsError()) { + return prefix; + } + + bool error = false; + bool found = false; + for (const auto& node : prefix.GetNodes()) { + if (node.IsString()) { + found |= inputString.GetString().StartsWith(node.GetString()); + } else { + error = true; + } + + if (IsLax() && (found || error)) { + break; + } + } + + if (error) { + return TJsonNodes({TValue(MakeEntity())}); + } + return TJsonNodes({TValue(MakeBool(found))}); +} + +TResult TExecutor::IsUnknownPredicate(const TJsonPathItem& item) { + const auto input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + + const auto& nodes = input.GetNodes(); + if (nodes.size() != 1) { + return MakeError(item, TIssuesIds::JSONPATH_INVALID_IS_UNKNOWN_ARGUMENT, "Expected exactly 1 item as an argument for is unknown predicate"); + } + + const auto& node = nodes[0]; + if (node.IsNull()) { + return TJsonNodes({TValue(MakeBool(true))}); + } + + if (!node.IsBool()) { + return MakeError(item, TIssuesIds::JSONPATH_INVALID_IS_UNKNOWN_ARGUMENT, "is unknown predicate supports only bool and null types for its argument"); + } + return TJsonNodes({TValue(MakeBool(false))}); +} + +TResult TExecutor::ExistsPredicate(const TJsonPathItem& item) { + const auto input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return TJsonNodes({TValue(MakeEntity())}); + } + + const auto& nodes = input.GetNodes(); + return TJsonNodes({TValue(MakeBool(!nodes.empty()))}); +} + +TResult TExecutor::LikeRegexPredicate(const TJsonPathItem& item) { + const auto input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + + const auto& regex = item.GetRegex(); + bool error = false; + bool found = false; + for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { + if (node.IsString()) { + found |= regex->Matches(node.GetString()); + } else { + error = true; + } + + if (IsLax() && (found || error)) { + break; + } + } + + if (error) { + return TJsonNodes({TValue(MakeEntity())}); + } + return TJsonNodes({TValue(MakeBool(found))}); +} + +TJsonNodes TExecutor::OptionalUnwrapArrays(const TJsonNodes& input) { + if (IsStrict()) { + return input; + } + + TJsonNodes result; + for (const auto& node : input) { + if (!node.IsArray()) { + result.push_back(node); + continue; + } + + auto it = node.GetArrayIterator(); + TValue value; + while (it.Next(value)) { + result.push_back(value); + } + } + return result; +} + +TJsonNodes TExecutor::OptionalArrayWrapNodes(const TJsonNodes& input) { + if (IsStrict()) { + return input; + } + + TJsonNodes result; + for (const auto& node : input) { + if (node.IsArray()) { + result.push_back(node); + continue; + } + + TUnboxedValue nodeCopy(node.ConvertToUnboxedValue(ValueBuilder)); + result.push_back(TValue(MakeList(&nodeCopy, 1, ValueBuilder))); + } + return result; +} + +} + diff --git a/yql/essentials/minikql/jsonpath/executor.h b/yql/essentials/minikql/jsonpath/executor.h new file mode 100644 index 0000000000..9b80a21133 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/executor.h @@ -0,0 +1,198 @@ +#pragma once + +#include "binary.h" +#include "value.h" + +#include <yql/essentials/public/issue/yql_issue.h> +#include <yql/essentials/utils/yql_panic.h> +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_allocator.h> + +#include <library/cpp/json/json_value.h> +#include <library/cpp/containers/stack_vector/stack_vec.h> + +#include <util/generic/ptr.h> +#include <util/generic/stack.h> +#include <util/generic/hash.h> +#include <util/generic/maybe.h> + +#include <variant> + +namespace NYql::NJsonPath { + +using TJsonNodes = TSmallVec<TValue>; + +class TResult { +public: + TResult(TJsonNodes&& nodes); + + TResult(const TJsonNodes& nodes); + + TResult(TIssue&& issue); + + const TJsonNodes& GetNodes() const; + + TJsonNodes& GetNodes(); + + const TIssue& GetError() const; + + bool IsError() const; + +private: + std::variant<TJsonNodes, TIssue> Result; +}; + +class TArraySubscript { +public: + TArraySubscript(i64 from, TPosition fromPos) + : From(from) + , FromPos(fromPos) + , HasTo(false) + { + } + + TArraySubscript(i64 from, TPosition fromPos, i64 to, TPosition toPos) + : From(from) + , FromPos(fromPos) + , To(to) + , ToPos(toPos) + , HasTo(true) + { + } + + i64 GetFrom() const { + return From; + } + + TPosition GetFromPos() const { + return FromPos; + } + + i64 GetTo() const { + YQL_ENSURE(IsRange()); + return To; + } + + TPosition GetToPos() const { + return ToPos; + } + + bool IsRange() const { + return HasTo; + } + +private: + i64 From = 0; + TPosition FromPos; + i64 To = 0; + TPosition ToPos; + bool HasTo; +}; + +using TVariablesMap = THashMap<TString, TValue>; + +class TExecutor { +public: + TExecutor( + const TJsonPathPtr path, + const TJsonNodes& input, + const TVariablesMap& variables, + const NUdf::IValueBuilder* valueBuilder); + + TResult Execute(); + +private: + constexpr static double EPSILON = 1e-20; + + static bool IsZero(double value); + + static bool IsEqual(double a, double b); + + static bool IsLess(double a, double b); + + static bool IsGreater(double a, double b); + + bool IsStrict() const; + + bool IsLax() const; + + TResult Execute(const TJsonPathItem& item); + + TResult ContextObject(); + + TResult Variable(const TJsonPathItem& item); + + TResult LastArrayIndex(const TJsonPathItem& item); + + TResult NumberLiteral(const TJsonPathItem& item); + + TResult MemberAccess(const TJsonPathItem& item); + + TResult WildcardMemberAccess(const TJsonPathItem& item); + + TMaybe<TIssue> EnsureSingleSubscript(TPosition pos, const TJsonNodes& index, i64& result); + + TMaybe<TIssue> EnsureArraySubscripts(const TJsonPathItem& item, TVector<TArraySubscript>& result); + + TResult ArrayAccess(const TJsonPathItem& item); + + TResult WildcardArrayAccess(const TJsonPathItem& item); + + TResult UnaryArithmeticOp(const TJsonPathItem& item); + + TMaybe<TIssue> EnsureBinaryArithmeticOpArgument(TPosition pos, const TJsonNodes& nodes, double& result); + + TResult BinaryArithmeticOp(const TJsonPathItem& item); + + TMaybe<TIssue> EnsureLogicalOpArgument(TPosition pos, const TJsonNodes& nodes, TMaybe<bool>& result); + + TResult BinaryLogicalOp(const TJsonPathItem& item); + + TResult UnaryLogicalOp(const TJsonPathItem& item); + + TResult BooleanLiteral(const TJsonPathItem& item); + + TResult NullLiteral(); + + TResult StringLiteral(const TJsonPathItem& item); + + TMaybe<bool> CompareValues(const TValue& left, const TValue& right, EJsonPathItemType operation); + + TResult CompareOp(const TJsonPathItem& item); + + TResult FilterObject(const TJsonPathItem& item); + + TResult FilterPredicate(const TJsonPathItem& item); + + TResult NumericMethod(const TJsonPathItem& item); + + TResult DoubleMethod(const TJsonPathItem& item); + + TResult TypeMethod(const TJsonPathItem& item); + + TResult SizeMethod(const TJsonPathItem& item); + + TResult KeyValueMethod(const TJsonPathItem& item); + + TResult StartsWithPredicate(const TJsonPathItem& item); + + TResult IsUnknownPredicate(const TJsonPathItem& item); + + TResult ExistsPredicate(const TJsonPathItem& item); + + TResult LikeRegexPredicate(const TJsonPathItem& item); + + TJsonNodes OptionalUnwrapArrays(const TJsonNodes& input); + + TJsonNodes OptionalArrayWrapNodes(const TJsonNodes& input); + + TStack<TValue> ArraySubscriptSource; + TStack<TValue> CurrentFilterObject; + TJsonPathReader Reader; + TJsonNodes Input; + const TVariablesMap& Variables; + const NUdf::IValueBuilder* ValueBuilder; +}; + +} diff --git a/yql/essentials/minikql/jsonpath/jsonpath.cpp b/yql/essentials/minikql/jsonpath/jsonpath.cpp new file mode 100644 index 0000000000..c48bb08cf1 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/jsonpath.cpp @@ -0,0 +1,129 @@ +#include "jsonpath.h" + +#include "binary.h" +#include "ast_builder.h" +#include "executor.h" +#include "type_check.h" +#include "value.h" + +#include <yql/essentials/core/issue/protos/issue_id.pb.h> +#include <yql/essentials/parser/proto_ast/gen/jsonpath/JsonPathLexer.h> +#include <yql/essentials/parser/proto_ast/gen/jsonpath/JsonPathParser.h> +#include <yql/essentials/parser/proto_ast/gen/jsonpath/JsonPathParser.pb.h> +#include <yql/essentials/parser/proto_ast/antlr3/proto_ast_antlr3.h> + +#include <google/protobuf/message.h> + +#include <util/string/strip.h> + +#if defined(_tsan_enabled_) +#include <util/system/mutex.h> +#endif + +using namespace NYql; +using namespace NYql::NUdf; +using namespace NJson; + +namespace { + +#if defined(_tsan_enabled_) +TMutex SanitizerJsonPathTranslationMutex; +#endif + +class TParseErrorsCollector : public NProtoAST::IErrorCollector { +public: + TParseErrorsCollector(TIssues& issues, size_t maxErrors) + : IErrorCollector(maxErrors) + , Issues(issues) + { + } + +private: + void AddError(ui32 line, ui32 column, const TString& message) override { + Issues.AddIssue(TPosition(column, line, "jsonpath"), StripString(message)); + Issues.back().SetCode(TIssuesIds::JSONPATH_PARSE_ERROR, TSeverityIds::S_ERROR); + } + + TIssues& Issues; +}; + +} + +namespace NYql::NJsonPath { + +const TAstNodePtr ParseJsonPathAst(const TStringBuf path, TIssues& issues, size_t maxParseErrors) { + if (!IsUtf(path)) { + issues.AddIssue(TPosition(1, 1, "jsonpath"), "JsonPath must be UTF-8 encoded string"); + issues.back().SetCode(TIssuesIds::JSONPATH_PARSE_ERROR, TSeverityIds::S_ERROR); + return {}; + } + + google::protobuf::Arena arena; + const google::protobuf::Message* rawAst = nullptr; + { + #if defined(_tsan_enabled_) + TGuard<TMutex> guard(SanitizerJsonPathTranslationMutex); + #endif + NProtoAST::TProtoASTBuilder3<NALP::JsonPathParser, NALP::JsonPathLexer> builder(path, "JsonPath", &arena); + TParseErrorsCollector collector(issues, maxParseErrors); + rawAst = builder.BuildAST(collector); + } + + if (rawAst == nullptr) { + return nullptr; + } + + const google::protobuf::Descriptor* descriptor = rawAst->GetDescriptor(); + if (descriptor && descriptor->name() != "TJsonPathParserAST") { + return nullptr; + } + + const auto* protoAst = static_cast<const NJsonPathGenerated::TJsonPathParserAST*>(rawAst); + TAstBuilder astBuilder(issues); + TAstNodePtr ast = astBuilder.Build(*protoAst); + if (!issues.Empty()) { + return nullptr; + } + + // At this point AST is guaranteed to be valid. We return it even if + // type checker finds some logical errors. + TJsonPathTypeChecker checker(issues); + ast->Accept(checker); + return ast; +} + +const TJsonPathPtr PackBinaryJsonPath(const TAstNodePtr ast) { + TJsonPathBuilder builder; + ast->Accept(builder); + return builder.ShrinkAndGetResult(); +} + +const TJsonPathPtr ParseJsonPath(const TStringBuf path, TIssues& issues, size_t maxParseErrors) { + const auto ast = ParseJsonPathAst(path, issues, maxParseErrors); + if (!issues.Empty()) { + return {}; + } + return PackBinaryJsonPath(ast); +} + +TResult ExecuteJsonPath( + const TJsonPathPtr jsonPath, + const TValue& json, + const TVariablesMap& variables, + const NUdf::IValueBuilder* valueBuilder) { + TExecutor executor(jsonPath, {json}, variables, valueBuilder); + return executor.Execute(); +} + +TVariablesMap DictToVariables(const NUdf::TUnboxedValue& dict) { + TVariablesMap variables; + TUnboxedValue key; + TUnboxedValue payload; + auto it = dict.GetDictIterator(); + while (it.NextPair(key, payload)) { + variables[key.AsStringRef()] = TValue(payload); + } + return variables; +} + +} diff --git a/yql/essentials/minikql/jsonpath/jsonpath.h b/yql/essentials/minikql/jsonpath/jsonpath.h new file mode 100644 index 0000000000..455739b005 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/jsonpath.h @@ -0,0 +1,24 @@ +#pragma once + +#include "executor.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> + +namespace NYql::NJsonPath { + +const TAstNodePtr ParseJsonPathAst(const TStringBuf path, TIssues& issues, size_t maxParseErrors); + +const TJsonPathPtr PackBinaryJsonPath(const TAstNodePtr ast, TIssues& issues); + +const TJsonPathPtr ParseJsonPath(const TStringBuf path, TIssues& issues, size_t maxParseErrors); + +TVariablesMap DictToVariables(const NUdf::TUnboxedValue& dict); + +TResult ExecuteJsonPath( + const TJsonPathPtr jsonPath, + const TValue& json, + const TVariablesMap& variables, + const NUdf::IValueBuilder* valueBuilder); + +} diff --git a/yql/essentials/minikql/jsonpath/parse_double.cpp b/yql/essentials/minikql/jsonpath/parse_double.cpp new file mode 100644 index 0000000000..f20476bfe9 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/parse_double.cpp @@ -0,0 +1,33 @@ + +#include "parse_double.h" + +#include <contrib/libs/double-conversion/double-conversion/double-conversion.h> + +#include <cmath> + +namespace NYql::NJsonPath { + +using double_conversion::StringToDoubleConverter; + +double ParseDouble(const TStringBuf literal) { + // FromString<double> from util/string/cast.h is permissive to junk in string. + // In our case junk in string means bug in grammar. + // See https://a.yandex-team.ru/arc/trunk/arcadia/util/string/cast.cpp?rev=6456750#L692 + struct TStringToNumberConverter: public StringToDoubleConverter { + inline TStringToNumberConverter() + : StringToDoubleConverter( + NO_FLAGS, + /* empty_string_value */ 0.0, + /* junk_string_value */ NAN, + /* infinity_symbol */ nullptr, + /* nan_symbol */ nullptr + ) + { + } + }; + + int parsedCharactersCount = 0; + return Singleton<TStringToNumberConverter>()->StringToDouble(literal.data(), literal.length(), &parsedCharactersCount); +} + +} diff --git a/yql/essentials/minikql/jsonpath/parse_double.h b/yql/essentials/minikql/jsonpath/parse_double.h new file mode 100644 index 0000000000..8481bf7e82 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/parse_double.h @@ -0,0 +1,10 @@ +#include <util/generic/string.h> + +namespace NYql::NJsonPath { + +// Parses double literal. Respects exponential format like `-23.5e-10`. +// On parsing error returns NaN double value (can be checked using `std::isnan`). +// On double overflow returns INF double value (can be checked using `std::isinf`). +double ParseDouble(const TStringBuf literal); + +}
\ No newline at end of file diff --git a/yql/essentials/minikql/jsonpath/rewrapper/dispatcher.cpp b/yql/essentials/minikql/jsonpath/rewrapper/dispatcher.cpp new file mode 100644 index 0000000000..da670f4485 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/rewrapper/dispatcher.cpp @@ -0,0 +1,67 @@ +#include "registrator.h" +#include "re.h" + +#include <util/generic/fwd.h> +#include <util/generic/vector.h> +#include <util/generic/singleton.h> +#include <util/generic/yexception.h> + +#include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h> + +namespace NReWrapper { + +namespace NRegistrator { + +struct TLib { + ui64 Id; + TCompiler Compiler; + TDeserializer Deserializer; +}; + +using TModules = TVector<TLib>; + +TModules* GetModules() { + return Singleton<TModules>(); +} + +void AddLibrary(ui32 id, TCompiler compiler, TDeserializer deserializer) { + Y_ABORT_UNLESS(id > 0); + if (GetModules()->size() < id) { + GetModules()->resize(id); + } + GetModules()->at(id - 1) = TLib{id, compiler, deserializer}; +} + +} + +namespace NDispatcher { + +void ThrowOnOutOfRange(ui32 id) { + if (NRegistrator::GetModules()->size() < id || id == 0) { + ythrow yexception() + << "Libs with id: " << id + << " was not found. Total added libs: " << NRegistrator::GetModules()->size(); + } +} + +IRePtr Deserialize(const TStringBuf& serializedRegex) { + TSerialization proto; + TString str(serializedRegex); + auto res = proto.ParseFromString(str); + if (!res) { + proto.SetHyperscan(str); + } + + ui64 id = (ui64)proto.GetDataCase();; + ThrowOnOutOfRange(id); + return NRegistrator::GetModules()->at(id - 1).Deserializer(proto); +} + +IRePtr Compile(const TStringBuf& regex, unsigned int flags, ui32 id) { + ThrowOnOutOfRange(id); + return NRegistrator::GetModules()->at(id - 1).Compiler(regex, flags); +} + +} + +} diff --git a/yql/essentials/minikql/jsonpath/rewrapper/hyperscan/hyperscan.cpp b/yql/essentials/minikql/jsonpath/rewrapper/hyperscan/hyperscan.cpp new file mode 100644 index 0000000000..2fc490b6f4 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/rewrapper/hyperscan/hyperscan.cpp @@ -0,0 +1,69 @@ +#include <yql/essentials/minikql/jsonpath/rewrapper/re.h> +#include <yql/essentials/minikql/jsonpath/rewrapper/registrator.h> +#include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h> +#include <library/cpp/regex/hyperscan/hyperscan.h> +#include <util/charset/utf8.h> + +namespace NReWrapper { +namespace NHyperscan { + +namespace { + +class THyperscan : public IRe { +public: + THyperscan(::NHyperscan::TDatabase&& db) + : Database(std::move(db)) + { } + + bool Matches(const TStringBuf& text) const override { + if (!Scratch) { + Scratch = ::NHyperscan::MakeScratch(Database); + } + return ::NHyperscan::Matches(Database, Scratch, text); + } + + TString Serialize() const override { + // Compatibility with old versions + return ::NHyperscan::Serialize(Database); +/* + * TSerialization proto; + * proto.SetHyperscan(::NHyperscan::Serialize(Database)); + * TString data; + * auto res = proto.SerializeToString(&data); + * Y_ABORT_UNLESS(res); + * return data; + */ + } +private: + ::NHyperscan::TDatabase Database; + mutable ::NHyperscan::TScratch Scratch; +}; + +} + +IRePtr Compile(const TStringBuf& regex, unsigned int flags) { + unsigned int hyperscanFlags = 0; + try { + if (UTF8Detect(regex)) { + hyperscanFlags |= HS_FLAG_UTF8; + } + if (NX86::HaveAVX2()) { + hyperscanFlags |= HS_CPU_FEATURES_AVX2; + } + if (flags & FLAGS_CASELESS) { + hyperscanFlags |= HS_FLAG_CASELESS; + } + return std::make_unique<THyperscan>(::NHyperscan::Compile(regex, hyperscanFlags)); + } catch (const ::NHyperscan::TCompileException& ex) { + ythrow TCompileException() << ex.what(); + } +} + +IRePtr Deserialize(const TSerialization& proto) { + return std::make_unique<THyperscan>(::NHyperscan::Deserialize(proto.GetHyperscan())); +} + +REGISTER_RE_LIB(TSerialization::kHyperscan, Compile, Deserialize) + +} +} diff --git a/yql/essentials/minikql/jsonpath/rewrapper/hyperscan/ya.make b/yql/essentials/minikql/jsonpath/rewrapper/hyperscan/ya.make new file mode 100644 index 0000000000..4cc999c064 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/rewrapper/hyperscan/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +PEERDIR( + library/cpp/regex/hyperscan + yql/essentials/minikql/jsonpath/rewrapper +) + +SRCS( + GLOBAL hyperscan.cpp +) + +END() + diff --git a/yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.proto b/yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.proto new file mode 100644 index 0000000000..922ec74b26 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.proto @@ -0,0 +1,15 @@ +syntax = "proto3"; + +package NReWrapper; + +message TRe2Serialization { + string Regexp = 1; + uint64 Flags = 2; +}; + +message TSerialization { + oneof Data { + bytes Hyperscan = 1; + TRe2Serialization Re2 = 2; + } +}; diff --git a/yql/essentials/minikql/jsonpath/rewrapper/proto/ya.make b/yql/essentials/minikql/jsonpath/rewrapper/proto/ya.make new file mode 100644 index 0000000000..ca4cf4aae1 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/rewrapper/proto/ya.make @@ -0,0 +1,9 @@ +PROTO_LIBRARY() + +SRCS( + serialization.proto +) + +EXCLUDE_TAGS(GO_PROTO) + +END() diff --git a/yql/essentials/minikql/jsonpath/rewrapper/re.h b/yql/essentials/minikql/jsonpath/rewrapper/re.h new file mode 100644 index 0000000000..3f564ad1ad --- /dev/null +++ b/yql/essentials/minikql/jsonpath/rewrapper/re.h @@ -0,0 +1,31 @@ +#pragma once + +#include <memory> + +#include <util/generic/fwd.h> +#include <util/generic/yexception.h> + +namespace NReWrapper { + +class TCompileException : public yexception { +}; + +enum EFlags { + FLAGS_CASELESS = 1, +}; + +class IRe { +public: + virtual ~IRe() = default; + virtual bool Matches(const TStringBuf& text) const = 0; + virtual TString Serialize() const = 0; +}; + +using IRePtr = std::unique_ptr<IRe>; + +namespace NDispatcher { + IRePtr Compile(const TStringBuf& regex, unsigned int flags, ui32 id); + IRePtr Deserialize(const TStringBuf& serializedRegex); +} + +} diff --git a/yql/essentials/minikql/jsonpath/rewrapper/re2/re2.cpp b/yql/essentials/minikql/jsonpath/rewrapper/re2/re2.cpp new file mode 100644 index 0000000000..694472f632 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/rewrapper/re2/re2.cpp @@ -0,0 +1,89 @@ +#include <yql/essentials/minikql/jsonpath/rewrapper/re.h> +#include <yql/essentials/minikql/jsonpath/rewrapper/registrator.h> +#include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h> +#include <contrib/libs/re2/re2/re2.h> +#include <util/charset/utf8.h> + +namespace NReWrapper { + +using namespace re2; + +namespace NRe2 { + +namespace { + +RE2::Options CreateOptions(const TStringBuf& regex, unsigned int flags) { + RE2::Options options; + bool needUtf8 = (UTF8Detect(regex) == UTF8); + options.set_encoding( + needUtf8 + ? RE2::Options::Encoding::EncodingUTF8 + : RE2::Options::Encoding::EncodingLatin1 + ); + options.set_case_sensitive(!(flags & FLAGS_CASELESS)); + return options; +} + +class TRe2 : public IRe { +public: + TRe2(const TStringBuf& regex, unsigned int flags) + : Regexp(StringPiece(regex.data(), regex.size()), CreateOptions(regex, flags)) + { + auto re2 = RawRegexp.MutableRe2(); + re2->set_regexp(TString(regex)); + re2->set_flags(flags); + } + + TRe2(const TSerialization& proto) + : Regexp(StringPiece(proto.GetRe2().GetRegexp().data(), proto.GetRe2().GetRegexp().size()), + CreateOptions(proto.GetRe2().GetRegexp(), proto.GetRe2().GetFlags())) + , RawRegexp(proto) + { } + + bool Matches(const TStringBuf& text) const override { + const StringPiece piece(text.data(), text.size()); + RE2::Anchor anchor = RE2::UNANCHORED; + + return Regexp.Match(piece, 0, text.size(), anchor, nullptr, 0); + } + + TString Serialize() const override { + TString data; + auto res = RawRegexp.SerializeToString(&data); + Y_ABORT_UNLESS(res); + return data; + } + + bool Ok(TString* error) const { + if (Regexp.ok()) { + return true; + } else { + *error = Regexp.error(); + return false; + } + } +private: + RE2 Regexp; + TSerialization RawRegexp; +}; + +} + +IRePtr Compile(const TStringBuf& regex, unsigned int flags) { + auto ptr = std::make_unique<TRe2>(regex, flags); + TString error; + if (!ptr->Ok(&error)) { + ythrow TCompileException() << error; + } + return ptr; +} + +IRePtr Deserialize(const TSerialization& p) { + return std::make_unique<TRe2>(p); +} + +REGISTER_RE_LIB(TSerialization::kRe2, Compile, Deserialize) + +} + +} diff --git a/yql/essentials/minikql/jsonpath/rewrapper/re2/ya.make b/yql/essentials/minikql/jsonpath/rewrapper/re2/ya.make new file mode 100644 index 0000000000..5520d19414 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/rewrapper/re2/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +PEERDIR( + contrib/libs/re2 + yql/essentials/minikql/jsonpath/rewrapper +) + +SRCS( + GLOBAL re2.cpp +) + +END() + diff --git a/yql/essentials/minikql/jsonpath/rewrapper/registrator.h b/yql/essentials/minikql/jsonpath/rewrapper/registrator.h new file mode 100644 index 0000000000..724b529910 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/rewrapper/registrator.h @@ -0,0 +1,28 @@ +#pragma once + +#include <util/generic/fwd.h> + +#define REGISTER_RE_LIB(...) \ + namespace { \ + struct TReWrapperStaticRegistrator { \ + inline TReWrapperStaticRegistrator() { \ + NRegistrator::AddLibrary(__VA_ARGS__); \ + } \ + } RE_REGISTRATOR; \ + } + +namespace NReWrapper { + +class IRe; +class TSerialization; +using IRePtr = std::unique_ptr<IRe>; + +namespace NRegistrator { + +using TCompiler = IRePtr(*)(const TStringBuf&, unsigned int); +using TDeserializer = IRePtr(*)(const TSerialization&); + +void AddLibrary(ui32 id, TCompiler compiler, TDeserializer deserializer); + +} +} diff --git a/yql/essentials/minikql/jsonpath/rewrapper/ut/hyperscan_ut.cpp b/yql/essentials/minikql/jsonpath/rewrapper/ut/hyperscan_ut.cpp new file mode 100644 index 0000000000..3df53e44b0 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/rewrapper/ut/hyperscan_ut.cpp @@ -0,0 +1,37 @@ +#include <yql/essentials/minikql/jsonpath/rewrapper/re.h> +#include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h> + +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/regex/hyperscan/hyperscan.h> + +namespace NReWrapper { +namespace NDispatcher { + +Y_UNIT_TEST_SUITE(ReWrapperDispatcherTestHyperscan) { + Y_UNIT_TEST(LegacySerialization) { + unsigned int hyperscanFlags = 0; + hyperscanFlags |= HS_FLAG_UTF8; + if (NX86::HaveAVX2()) { + hyperscanFlags |= HS_CPU_FEATURES_AVX2; + } + auto database = ::NHyperscan::Compile("[0-9]+", hyperscanFlags); + auto string = ::NHyperscan::Serialize(database); + + auto wrapper = Deserialize(string); + UNIT_ASSERT_VALUES_EQUAL(wrapper->Matches("123"), true); + UNIT_ASSERT_VALUES_EQUAL(wrapper->Matches("abc"), false); + } + Y_UNIT_TEST(Serialization) { + auto w1 = Compile("[0-9]+", 0, NReWrapper::TSerialization::kHyperscan); + auto string = w1->Serialize(); + + auto w2 = Deserialize(string); + UNIT_ASSERT_VALUES_EQUAL(w1->Matches("123"), true); + UNIT_ASSERT_VALUES_EQUAL(w1->Matches("abc"), false); + UNIT_ASSERT_VALUES_EQUAL(w2->Matches("123"), true); + UNIT_ASSERT_VALUES_EQUAL(w2->Matches("abc"), false); + } +} + +} +} diff --git a/yql/essentials/minikql/jsonpath/rewrapper/ut/re2_ut.cpp b/yql/essentials/minikql/jsonpath/rewrapper/ut/re2_ut.cpp new file mode 100644 index 0000000000..0e4d9e2889 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/rewrapper/ut/re2_ut.cpp @@ -0,0 +1,23 @@ +#include <yql/essentials/minikql/jsonpath/rewrapper/re.h> +#include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h> + +#include <library/cpp/testing/unittest/registar.h> + +namespace NReWrapper { +namespace NDispatcher { + +Y_UNIT_TEST_SUITE(ReWrapperDispatcherRe2) { + Y_UNIT_TEST(Serialization) { + auto w1 = Compile("[0-9]+", 0, NReWrapper::TSerialization::kRe2); + auto string = w1->Serialize(); + + auto w2 = Deserialize(string); + UNIT_ASSERT_VALUES_EQUAL(w1->Matches("123"), true); + UNIT_ASSERT_VALUES_EQUAL(w1->Matches("abc"), false); + UNIT_ASSERT_VALUES_EQUAL(w2->Matches("123"), true); + UNIT_ASSERT_VALUES_EQUAL(w2->Matches("abc"), false); + } +} + +} +} diff --git a/yql/essentials/minikql/jsonpath/rewrapper/ut/ya.make b/yql/essentials/minikql/jsonpath/rewrapper/ut/ya.make new file mode 100644 index 0000000000..26f57235a8 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/rewrapper/ut/ya.make @@ -0,0 +1,25 @@ +UNITTEST_FOR(yql/essentials/minikql/jsonpath/rewrapper) + +IF(ARCH_X86_64) + SRCS( + hyperscan_ut.cpp + re2_ut.cpp + ) + + PEERDIR( + yql/essentials/minikql/jsonpath/rewrapper + yql/essentials/minikql/jsonpath/rewrapper/hyperscan + yql/essentials/minikql/jsonpath/rewrapper/re2 + ) +ELSE() + SRCS( + re2_ut.cpp + ) + + PEERDIR( + yql/essentials/minikql/jsonpath/rewrapper + yql/essentials/minikql/jsonpath/rewrapper/re2 + ) +ENDIF() + +END() diff --git a/yql/essentials/minikql/jsonpath/rewrapper/ya.make b/yql/essentials/minikql/jsonpath/rewrapper/ya.make new file mode 100644 index 0000000000..92e7b8e9b9 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/rewrapper/ya.make @@ -0,0 +1,21 @@ +LIBRARY() + +PEERDIR( + yql/essentials/minikql/jsonpath/rewrapper/proto +) + +SRCS( + dispatcher.cpp +) + +END() + +RECURSE( + hyperscan + proto + re2 +) + +RECURSE_FOR_TESTS( + ut +) diff --git a/yql/essentials/minikql/jsonpath/type_check.cpp b/yql/essentials/minikql/jsonpath/type_check.cpp new file mode 100644 index 0000000000..f6ef00c9b2 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/type_check.cpp @@ -0,0 +1,132 @@ +#include "type_check.h" + +#include <yql/essentials/core/issue/protos/issue_id.pb.h> + +namespace NYql::NJsonPath { + +TJsonPathTypeChecker::TJsonPathTypeChecker(TIssues& issues) + : Issues(issues) +{ +} + +void TJsonPathTypeChecker::VisitRoot(const TRootNode& node) { + node.GetExpr()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitContextObject(const TContextObjectNode& node) { + Y_UNUSED(node); +} + +void TJsonPathTypeChecker::VisitVariable(const TVariableNode& node) { + Y_UNUSED(node); +} + +void TJsonPathTypeChecker::VisitLastArrayIndex(const TLastArrayIndexNode& node) { + Y_UNUSED(node); +} + +void TJsonPathTypeChecker::VisitNumberLiteral(const TNumberLiteralNode& node) { + Y_UNUSED(node); +} + +void TJsonPathTypeChecker::VisitMemberAccess(const TMemberAccessNode& node) { + node.GetInput()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) { + node.GetInput()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitArrayAccess(const TArrayAccessNode& node) { + node.GetInput()->Accept(*this); + + for (const auto& subscript : node.GetSubscripts()) { + subscript.From->Accept(*this); + if (subscript.To) { + subscript.To->Accept(*this); + } + } +} + +void TJsonPathTypeChecker::VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) { + node.GetInput()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitUnaryOperation(const TUnaryOperationNode& node) { + if (node.GetOp() == EUnaryOperation::Not && node.GetExpr()->GetReturnType() != EReturnType::Bool) { + Error(node.GetExpr(), "Logical not needs boolean argument"); + } + + node.GetExpr()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitBinaryOperation(const TBinaryOperationNode& node) { + if (node.GetOp() == EBinaryOperation::And || node.GetOp() == EBinaryOperation::Or) { + if (node.GetLeftExpr()->GetReturnType() != EReturnType::Bool) { + Error(node.GetLeftExpr(), "Left argument of logical operation needs to be boolean"); + } + if (node.GetRightExpr()->GetReturnType() != EReturnType::Bool) { + Error(node.GetRightExpr(), "Right argument of logical operation needs to be boolean"); + } + } + + node.GetLeftExpr()->Accept(*this); + node.GetRightExpr()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitBooleanLiteral(const TBooleanLiteralNode& node) { + Y_UNUSED(node); +} + +void TJsonPathTypeChecker::VisitNullLiteral(const TNullLiteralNode& node) { + Y_UNUSED(node); +} + +void TJsonPathTypeChecker::VisitStringLiteral(const TStringLiteralNode& node) { + Y_UNUSED(node); +} + +void TJsonPathTypeChecker::VisitFilterObject(const TFilterObjectNode& node) { + Y_UNUSED(node); +} + +void TJsonPathTypeChecker::VisitFilterPredicate(const TFilterPredicateNode& node) { + node.GetInput()->Accept(*this); + + if (node.GetPredicate()->GetReturnType() != EReturnType::Bool) { + Error(node.GetPredicate(), "Filter must return boolean value"); + } + + node.GetPredicate()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitMethodCall(const TMethodCallNode& node) { + node.GetInput()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitStartsWithPredicate(const TStartsWithPredicateNode& node) { + node.GetInput()->Accept(*this); + node.GetPrefix()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitExistsPredicate(const TExistsPredicateNode& node) { + node.GetInput()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) { + if (node.GetInput()->GetReturnType() != EReturnType::Bool) { + Error(node.GetInput(), "is unknown predicate expectes boolean argument"); + } + node.GetInput()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) { + node.GetInput()->Accept(*this); +} + +void TJsonPathTypeChecker::Error(const TAstNodePtr node, const TStringBuf message) { + Issues.AddIssue(node->GetPos(), message); + Issues.back().SetCode(TIssuesIds::JSONPATH_TYPE_CHECK_ERROR, TSeverityIds::S_ERROR); +} + +} diff --git a/yql/essentials/minikql/jsonpath/type_check.h b/yql/essentials/minikql/jsonpath/type_check.h new file mode 100644 index 0000000000..0a02828a6e --- /dev/null +++ b/yql/essentials/minikql/jsonpath/type_check.h @@ -0,0 +1,59 @@ +#pragma once + +#include "ast_nodes.h" + +namespace NYql::NJsonPath { + +class TJsonPathTypeChecker : public IAstNodeVisitor { +public: + TJsonPathTypeChecker(TIssues& Issues); + + void VisitRoot(const TRootNode& node) override; + + void VisitContextObject(const TContextObjectNode& node) override; + + void VisitVariable(const TVariableNode& node) override; + + void VisitLastArrayIndex(const TLastArrayIndexNode& node) override; + + void VisitNumberLiteral(const TNumberLiteralNode& node) override; + + void VisitMemberAccess(const TMemberAccessNode& node) override; + + void VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) override; + + void VisitArrayAccess(const TArrayAccessNode& node) override; + + void VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) override; + + void VisitUnaryOperation(const TUnaryOperationNode& node) override; + + void VisitBinaryOperation(const TBinaryOperationNode& node) override; + + void VisitBooleanLiteral(const TBooleanLiteralNode& node) override; + + void VisitNullLiteral(const TNullLiteralNode& node) override; + + void VisitStringLiteral(const TStringLiteralNode& node) override; + + void VisitFilterObject(const TFilterObjectNode& node) override; + + void VisitFilterPredicate(const TFilterPredicateNode& node) override; + + void VisitMethodCall(const TMethodCallNode& node) override; + + void VisitStartsWithPredicate(const TStartsWithPredicateNode& node) override; + + void VisitExistsPredicate(const TExistsPredicateNode& node) override; + + void VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) override; + + void VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) override; + + void Error(const TAstNodePtr node, const TStringBuf message); + +private: + TIssues& Issues; +}; + +}
\ No newline at end of file diff --git a/yql/essentials/minikql/jsonpath/ut/common_ut.cpp b/yql/essentials/minikql/jsonpath/ut/common_ut.cpp new file mode 100644 index 0000000000..a32389a768 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/ut/common_ut.cpp @@ -0,0 +1,972 @@ +#include "test_base.h" + +#include <util/string/builder.h> + +#include <cmath> + +class TJsonPathCommonTest : public TJsonPathTestBase { +public: + TJsonPathCommonTest() + : TJsonPathTestBase() + { + } + + UNIT_TEST_SUITE(TJsonPathCommonTest); + UNIT_TEST(TestPrimary); + UNIT_TEST(TestMemberAccess); + UNIT_TEST(TestWildcardMemberAccess); + UNIT_TEST(TestArrayAccess); + UNIT_TEST(TestLastArrayIndex); + UNIT_TEST(TestLastArrayIndexInvalid); + UNIT_TEST(TestNonIntegerArrayIndex); + UNIT_TEST(TestWildcardArrayAccess); + UNIT_TEST(TestUnaryOperations); + UNIT_TEST(TestUnaryOperationsErrors); + UNIT_TEST(TestBinaryArithmeticOperations); + UNIT_TEST(TestBinaryArithmeticOperationsErrors); + UNIT_TEST(TestParseErrors); + UNIT_TEST(TestVariables); + UNIT_TEST(TestDivisionByZero); + UNIT_TEST(TestInfinityResult); + UNIT_TEST(TestLogicalOperations); + UNIT_TEST(TestCompareOperations); + UNIT_TEST(TestFilter); + UNIT_TEST(TestFilterInvalid); + UNIT_TEST(TestNumericMethods); + UNIT_TEST(TestNumericMethodsErrors); + UNIT_TEST(TestDoubleMethod); + UNIT_TEST(TestDoubleMethodErrors); + UNIT_TEST(TestTypeMethod); + UNIT_TEST(TestSizeMethod); + UNIT_TEST(TestKeyValueMethod); + UNIT_TEST(TestKeyValueMethodErrors); + UNIT_TEST(TestStartsWithPredicate); + UNIT_TEST(TestStartsWithPredicateErrors); + UNIT_TEST(TestExistsPredicate); + UNIT_TEST(TestIsUnknownPredicate); + UNIT_TEST(TestLikeRegexPredicate); + UNIT_TEST_SUITE_END(); + + void TestPrimary() { + const TVector<TMultiOutputTestCase> testCases = { + // Context object $ must return whole JSON when used alone + {R"({"key": 123})", "$", {R"({"key":123})"}}, + {R"([1, 2, 3])", "$", {R"([1,2,3])"}}, + {"1.234", "$", {"1.234"}}, + {R"("some string")", "$", {R"("some string")"}}, + + // Literal must not depend on input + {R"({"key": 123})", "123", {"123"}}, + {R"([1, 2, 3])", "123", {"123"}}, + {"1.234", "123", {"123"}}, + {R"("some string")", "123", {"123"}}, + + // Check various ways to define number literal + {"1", "123.4", {"123.4"}}, + {"1", "0.567", {"0.567"}}, + + {"1", "1234e-1", {"123.4"}}, + {"1", "567e-3", {"0.567"}}, + {"1", "123.4e-1", {"12.34"}}, + + {"1", "123e3", {"123000"}}, + {"1", "123e+3", {"123000"}}, + {"1", "1.23e+1", {"12.3"}}, + {"1", "1.23e1", {"12.3"}}, + + {"1", "12e0", {"12"}}, + {"1", "12.3e0", {"12.3"}}, + + {"1", "0", {"0"}}, + {"1", "0.0", {"0"}}, + {"1", "0.0e0", {"0"}}, + + // Check boolean and null literals + {"1", "null", {"null"}}, + {"1", "false", {"false"}}, + {"1", "true", {"true"}}, + + // Check string literals + {"1", "\"string\"", {"\"string\""}}, + {"1", "\" space another space \"", {"\" space another space \""}}, + {"1", "\"привет\"", {"\"привет\""}}, + // NOTE: escaping is added by library/cpp/json + {"1", "\"\r\n\t\"", {"\"\\r\\n\\t\""}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestMemberAccess() { + const TVector<TMultiOutputTestCase> testCases = { + {R"({"key": 123, "another_key": 456})", "$.key", {"123"}}, + {R"({"key": 123, "_another_28_key_$_": 456})", "$._another_28_key_$_", {"456"}}, + {R"({"key": 123, "another_key": 456})", " $.another_key ", {"456"}}, + + {R"({"key": 123, "another_key": 456})", "$.key", {"123"}}, + {R"({"k\"ey": 123, "another_key": 456})", "$.\"k\\\"ey\"", {"123"}}, + {R"({"k\"ey": 123, "another_key": 456})", "$.'k\\\"ey'", {"123"}}, + + {R"({"key": 123, "another_key": 456})", "$.'key'", {"123"}}, + {R"({"key": 123, "_another_28_key_$_": 456})", "$.'_another_28_key_$_'", {"456"}}, + {R"({"key": 123, "another_key": 456})", " $.'another_key' ", {"456"}}, + + {R"({"key": 123, "another_key": 456})", "$.\"key\"", {"123"}}, + {R"({"key": 123, "_another_28_key_$_": 456})", "$.\"_another_28_key_$_\"", {"456"}}, + {R"({"key": 123, "another_key": 456})", " $.\"another_key\" ", {"456"}}, + + {R"({"key": 123, "another key": 456})", "$.'another key'", {"456"}}, + {R"({"key": 123, "another key": 456})", "$.\"another key\"", {"456"}}, + + {R"({"key": 123, "прием отбой": 456})", "$.'прием отбой'", {"456"}}, + {R"({"key": 123, "прием отбой": 456})", "$.\"прием отбой\"", {"456"}}, + + {R"({"key": {"another": 456}})", "$.key.another", {"456"}}, + {R"({"key": {"another key": 456}})", "$.'key'.\"another key\"", {"456"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestWildcardMemberAccess() { + const TVector<TMultiOutputTestCase> testCases = { + {R"({ + "first": 12, + "second": 72 + })", "$.*", {"12", "72"}}, + {R"({ + "friends": { + "Nik": {"age": 18}, + "Kate": {"age": 72} + } + })", "$.friends.*.age", {"72", "18"}}, + {R"({ + "friends": { + "Nik": {"age": 18}, + "Kate": {"age": 72} + } + })", "$.*.*.*", {"72", "18"}}, + {R"({})", "$.*.key", {}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestArrayAccess() { + const TVector<TMultiOutputTestCase> testCases = { + {R"([1, 2, 3])", "$[0]", {"1"}}, + {R"([1, 2, 3, 4, 5, 6])", "$[0 to 2]", {"1", "2", "3"}}, + {R"([1, 2, 3, 4, 5, 6])", "$[5, 0 to 2, 0, 0, 3 to 5, 2]", {"6", "1", "2", "3", "1", "1", "4", "5", "6", "3"}}, + {R"({ + "friends": [ + {"name": "Nik", "age": 18}, + {"name": "Kate", "age": 72}, + {"name": "Foma", "age": 50}, + {"name": "Jora", "age": 60} + ] + })", "$.friends[1 to 3, 0].age", {"72", "50", "60", "18"}}, + {R"({ + "range": { + "from": 1, + "to": 2 + }, + "friends": [ + {"name": "Nik", "age": 18}, + {"name": "Kate", "age": 72}, + {"name": "Foma", "age": 50}, + {"name": "Jora", "age": 60} + ] + })", "$.friends[$.range.from to $.range.to].age", {"72", "50"}}, + {R"({ + "range": { + "from": [1, 3, 4], + "to": {"key1": 1, "key2": 2, "key3": 3} + }, + "friends": [ + {"name": "Nik", "age": 18}, + {"name": "Kate", "age": 72}, + {"name": "Foma", "age": 50}, + {"name": "Jora", "age": 60} + ] + })", "$.friends[$.range.from[1] to $.range.to.key3].age", {"60"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestLastArrayIndex() { + const TVector<TMultiOutputTestCase> testCases = { + {R"([1, 2, 3])", "$[last]", {"3"}}, + {R"([1, 2, 3])", "$[1 to last]", {"2", "3"}}, + {R"([1, 2, 3])", "$[last to last]", {"3"}}, + {R"([1, 2, 3, 5, 6])", "$[1, last, last, 0, 2 to last, 3]", {"2", "6", "6", "1", "3", "5", "6", "5"}}, + {R"([ + [1, 2, 3, 4], + [5, 6, 7, 8] + ])", "$[*][last]", {"4", "8"}}, + {R"({ + "ranges": [ + {"from": 1, "to": 3}, + {"from": 0, "to": 1} + ], + "friends": [ + {"name": "Nik", "age": 18}, + {"name": "Kate", "age": 72}, + {"name": "Foma", "age": 50}, + {"name": "Jora", "age": 60} + ] + })", "$.friends[last, $.ranges[last].from to $.ranges[last].to, 2 to last].age", {"60", "18", "72", "50", "60"}}, + {R"({ + "ranges": [ + {"from": 1.23, "to": 3.75}, + {"from": 0.58, "to": 1.00001} + ], + "friends": [ + {"name": "Nik", "age": 18}, + {"name": "Kate", "age": 72}, + {"name": "Foma", "age": 50}, + {"name": "Jora", "age": 60} + ] + })", "$.friends[last, $.ranges[last].from to $.ranges[last].to, 2 to last].age", {"60", "18", "72", "50", "60"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestLastArrayIndexInvalid() { + const TVector<TRuntimeErrorTestCase> testCases = { + {R"({})", "last", C(TIssuesIds::JSONPATH_LAST_OUTSIDE_OF_ARRAY_SUBSCRIPT)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestNonIntegerArrayIndex() { + const TVector<TRuntimeErrorTestCase> testCases = { + {R"({ + "range": { + "from": [1, 3, 4], + "to": {"key1": 1, "key2": 2, "key3": 3} + }, + "friends": [1, 2, 3] + })", "$.friends[$.range.from[*] to $.range.to.*]", C(TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestWildcardArrayAccess() { + const TVector<TMultiOutputTestCase> testCases = { + {R"([1, 2, 3])", "$[*]", {"1", "2", "3"}}, + {R"([[1], [2], [3, 4, 5]])", "$[*][*]", {"1", "2", "3", "4", "5"}}, + {R"({ + "friends": [ + {"name": "Nik", "age": 18}, + {"name": "Kate", "age": 72}, + {"name": "Foma", "age": 50}, + {"name": "Jora", "age": 60} + ] + })", "$.friends[*].age", {"18", "72", "50", "60"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestUnaryOperations() { + const TVector<TMultiOutputTestCase> testCases = { + {R"([])", "-3", {"-3"}}, + {R"([])", "+3", {"3"}}, + {R"(-1)", "-$", {"1"}}, + {R"(-1)", "+$", {"-1"}}, + {R"({ + "range": { + "from": -1, + "to": -2 + }, + "array": [1, 2, 3, 4] + })", "$.array[-$.range.from to -$.range.to]", {"2", "3"}}, + {R"({ + "range": { + "from": 1, + "to": -2 + }, + "array": [1, 2, 3, 4] + })", "$.array[+$.range.from to -$.range.to]", {"2", "3"}}, + {R"({ + "range": { + "from": -1, + "to": 2 + }, + "array": [1, 2, 3, 4] + })", "$.array[-$.range.from to +$.range.to]", {"2", "3"}}, + {R"({ + "range": { + "from": 1, + "to": 2 + }, + "array": [1, 2, 3, 4] + })", "$.array[+$.range.from to +$.range.to]", {"2", "3"}}, + {R"([1, 2, 3])", "-$[*]", {"-1", "-2", "-3"}}, + {"30000000000000000000000000", "-$", {"-3e+25"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestUnaryOperationsErrors() { + const TVector<TRuntimeErrorTestCase> testCases = { + {R"({})", "-$", C(TIssuesIds::JSONPATH_INVALID_UNARY_OPERATION_ARGUMENT_TYPE)}, + {R"([1, 2, [], 4])", "-$[*]", C(TIssuesIds::JSONPATH_INVALID_UNARY_OPERATION_ARGUMENT_TYPE)}, + {R"([1, 2, {}, 4])", "-$[*]", C(TIssuesIds::JSONPATH_INVALID_UNARY_OPERATION_ARGUMENT_TYPE)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestBinaryArithmeticOperations() { + const TVector<TMultiOutputTestCase> testCases = { + {"[]", "1 + 2", {"3"}}, + {"[]", "1 - 2", {"-1"}}, + {"[]", "10 * 5", {"50"}}, + {"[]", "10 / 5", {"2"}}, + {"[]", "13 % 5", {"3"}}, + + {"[]", "20 * 2 + 5", {"45"}}, + {"[]", "20 / 2 + 5", {"15"}}, + {"[]", "20 % 2 + 5", {"5"}}, + + {"[]", "20 * (2 + 5)", {"140"}}, + {"[]", "20 / (2 + 3)", {"4"}}, + {"[]", "20 % (2 + 5)", {"6"}}, + + {"[]", "5 / 2", {"2.5"}}, + {"[5.24 , 2.62]", "$[0] / $[1]", {"2"}}, + {"[5.24, 2.62]", "$[0] % $[1]", {"0"}}, + {"[3.753, 2.35]", "$[0] % $[1]", {"1.403"}}, + + {"[]", "- 1 + 1", {"0"}}, + {"[]", "+ 1 + 1", {"2"}}, + + {"[1, 2, 3, 4]", "$[last, last-1, last-2, last-3]", {"4", "3", "2", "1"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestBinaryArithmeticOperationsErrors() { + const TVector<TRuntimeErrorTestCase> testCases = { + {"[1, 2, 3]", "$[*] + 1", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT)}, + {"[1, 2, 3]", "1 + $[*]", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT)}, + {"[1, 2, 3]", "$[*] + $[*]", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT)}, + + {"[1, 2, 3]", "$ + 1", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT_TYPE)}, + {"[1, 2, 3]", "1 + $", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT_TYPE)}, + {"[1, 2, 3]", "$ + $", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT_TYPE)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestParseErrors() { + const TVector<TString> testCases = { + "strict", + "strict smth.key", + "strict $.", + "strict $.$key", + "strict $.28key", + "strict $.ke^y", + "strict $.привет", + "strict $._пока_28_ключ_$_", + " strict $.пока ", + "lax", + "lax smth.key", + "lax $.", + "lax $.$key", + "lax $.28key", + "lax $.ke^y", + "lax $.привет", + "lax $._пока_28_ключ_$_", + " lax $.пока ", + "12.", + "12..3", + "12.3e", + "12.3e++1", + "12.3e--1", + "1e100000000000000000000000000000000", + "true || false", + "1 && (true == true)", + "!true", + "$[*] ? (@.active) . id", + "!(1 > 2).type()", + "(null) is unknown", + "(12 * 12) is unknown", + R"($ like_regex "[[[")", + R"($ like_regex "[0-9]+" flag "x")", + "$.first fjrfrfq fqijrhfqiwrjhfqrf qrfqr", + }; + + for (const auto& testCase : testCases) { + RunParseErrorTestCase(testCase); + } + } + + void TestVariables() { + TVector<TVariablesTestCase> testCases = { + {"123", {{"var", "456"}}, "$ + $var", {"579"}}, + {"123", {{"var", "456"}}, "$var", {"456"}}, + {"123", {{"var", R"({"key": [1, 2, 3, 4, 5]})"}}, "$var.key[2 to last]", {"3", "4", "5"}}, + {"123", {{"to", "1"}, {"strict", "2"}}, "$to + $strict", {"3"}}, + }; + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunVariablesTestCase(testCase.Json, testCase.Variables, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestDivisionByZero() { + const TVector<TRuntimeErrorTestCase> testCases = { + {"0", "1 / $", C(TIssuesIds::JSONPATH_DIVISION_BY_ZERO)}, + {"0.00000000000000000001", "1 / $", C(TIssuesIds::JSONPATH_DIVISION_BY_ZERO)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestInfinityResult() { + const double step = 1000000000; + double current = step; + TStringBuilder literal; + TStringBuilder query; + literal << '"' << step; + query << step; + while (!std::isinf(current)) { + query << " * " << step; + literal << "000000000"; + current *= step; + } + literal << '"'; + + const TVector<TRuntimeErrorTestCase> testCases = { + {"0", TString(query), C(TIssuesIds::JSONPATH_BINARY_OPERATION_RESULT_INFINITY)}, + {TString(literal), "$.double()", C(TIssuesIds::JSONPATH_INFINITE_NUMBER_STRING)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestLogicalOperations() { + const TVector<TMultiOutputTestCase> testCases = { + // JsonPath does not allow to use boolean literals in boolean operators. + // Here we use their replacements: + // 1. "(1 < true)" for "null" + // 2. "(true == true)" for "true" + // 3. "(true != true)" for "false" + {"1", "(1 < true) || (1 < true)", {"null"}}, + {"1", "(1 < true) || (true != true)", {"null"}}, + {"1", "(1 < true) || (true == true)", {"true"}}, + {"1", "(true != true) || (1 < true)", {"null"}}, + {"1", "(true != true) || (true != true)", {"false"}}, + {"1", "(true != true) || (true == true)", {"true"}}, + {"1", "(true == true) || (1 < true)", {"true"}}, + {"1", "(true == true) || (true != true)", {"true"}}, + {"1", "(true == true) || (true == true)", {"true"}}, + + {"1", "(1 < true) && (1 < true)", {"null"}}, + {"1", "(1 < true) && (true != true)", {"false"}}, + {"1", "(1 < true) && (true == true)", {"null"}}, + {"1", "(true != true) && (1 < true)", {"false"}}, + {"1", "(true != true) && (true != true)", {"false"}}, + {"1", "(true != true) && (true == true)", {"false"}}, + {"1", "(true == true) && (1 < true)", {"null"}}, + {"1", "(true == true) && (true != true)", {"false"}}, + {"1", "(true == true) && (true == true)", {"true"}}, + + {"1", "(true != true) && (true != true) || (true == true)", {"true"}}, + {"1", "(true != true) && ((true != true) || (true == true))", {"false"}}, + {"1", "(true != true) || (true != true) || (true == true)", {"true"}}, + {"1", "(true == true) && (true == true) && (true == true) && (true != true)", {"false"}}, + + {"1", "!(1 < true)", {"null"}}, + {"1", "!(true != true)", {"true"}}, + {"1", "!(true == true)", {"false"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestCompareOperations() { + const TVector<TString> operations = {"==", "<", "<=", ">", ">=", "!=", "<>"}; + // All compare operations between null and non-null operands are false + for (const auto& op : operations) { + RunTestCase("1", TStringBuilder() << "null " << op << " 1", {"false"}); + RunTestCase("1", TStringBuilder() << "1 " << op << " null", {"false"}); + } + + // If one of the operands is not scalar, comparison results to null + for (const auto& op : operations) { + RunTestCase("[[]]", TStringBuilder() << "$ " << op << " 1", {"null"}); + RunTestCase("[[]]", TStringBuilder() << "1 " << op << " $", {"null"}); + RunTestCase("[[]]", TStringBuilder() << "$ " << op << " $", {"null"}); + + RunTestCase("{}", TStringBuilder() << "$ " << op << " 1", {"null"}); + RunTestCase("{}", TStringBuilder() << "1 " << op << " $", {"null"}); + RunTestCase("{}", TStringBuilder() << "$ " << op << " $", {"null"}); + } + + // If both operands are null, only == is true + for (const auto& op : operations) { + const TString result = op == "==" ? "true" : "false"; + RunTestCase("1", TStringBuilder() << "null " << op << " null", {result}); + } + + const TVector<TMultiOutputTestCase> testCases = { + // Check comparison of numbers + {"1", "1.23 < 4.56", {"true"}}, + {"1", "1.23 > 4.56", {"false"}}, + {"1", "1.23 <= 4.56", {"true"}}, + {"1", "1.23 >= 4.56", {"false"}}, + {"1", "1.23 == 1.23", {"true"}}, + {"1", "1.23 != 1.23", {"false"}}, + {"1", "1.23 <> 4.56", {"true"}}, + {"1", "1.00000000000000000001 == 1.00000000000000000002", {"true"}}, + + // Check numbers of different kinds (int64 vs double) + {"1", "1 < 2.33", {"true"}}, + {"1", "1 > 4.56", {"false"}}, + {"1", "1 <= 4.56", {"true"}}, + {"1", "1 >= 4.56", {"false"}}, + {"1", "1 == 1.23", {"false"}}, + {"1", "1 != 1.23", {"true"}}, + {"1", "1 <> 4.56", {"true"}}, + + // Check comparison of strings + {"1", R"("abc" < "def")", {"true"}}, + {"1", R"("abc" > "def")", {"false"}}, + {"1", R"("abc" <= "def")", {"true"}}, + {"1", R"("abc" >= "def")", {"false"}}, + {"1", R"("abc" == "abc")", {"true"}}, + {"1", R"("abc" != "abc")", {"false"}}, + {"1", R"("abc" <> "def")", {"true"}}, + + // Check comparison of UTF8 strings + // First string is U+00e9 (LATIN SMALL LETTER E WITH ACUTE), "é" + // Second string is U+0065 (LATIN SMALL LETTER E) U+0301 (COMBINING ACUTE ACCENT), "é" + {"1", R"("é" < "é")", {"false"}}, + {"1", R"("é" > "é")", {"true"}}, + {"1", R"("привет" == "привет")", {"true"}}, + + // Check cross-product comparison + {R"({ + "left": [1], + "right": [4, 5, 6] + })", "$.left[*] < $.right[*]", {"true"}}, + {R"({ + "left": [4, 5, 6], + "right": [1] + })", "$.left[*] < $.right[*]", {"false"}}, + {R"({ + "left": [1, 2, 3], + "right": [4, 5, 6] + })", "$.left[*] < $.right[*]", {"true"}}, + {R"({ + "left": [10, 30, 40], + "right": [1, 2, 15] + })", "$.left[*] < $.right[*]", {"true"}}, + {R"({ + "left": [10, 30, 40], + "right": [1, 2, 3] + })", "$.left[*] < $.right[*]", {"false"}}, + + // Check incomparable types + {"1", "1 < true", {"null"}}, + {"1", R"(true <> "def")", {"null"}}, + + // Check error in arguments + {R"({ + "array": [1, 2, 3, 4, 5], + "invalid_index": { + "key": 1 + } + })", "$.array[$.invalid_index] < 3", {"null"}}, + {R"({ + "array": [1, 2, 3, 4, 5], + "invalid_index": { + "key": 1 + } + })", "5 >= $.array[$.invalid_index]", {"null"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestFilter() { + const TVector<TMultiOutputTestCase> testCases = { + {"[1, 2, 3]", "$[*] ? (@ > 2)", {"3"}}, + {R"([ + {"age": 18}, + {"age": 25}, + {"age": 50}, + {"age": 5} + ])", "$[*] ? (@.age >= 18)", {R"({"age":18})", R"({"age":25})", R"({"age":50})"}}, + {R"([ + {"age": 18}, + {"age": 25}, + {"age": 50}, + {"age": 5} + ])", "$[*] ? (@.age >= 18) ? (@.age <= 30)", {R"({"age":18})", R"({"age":25})"}}, + {R"([ + {"age": 18}, + {"age": 25}, + {"age": 50}, + {"age": 5} + ])", "$[*] ? (@.age >= 18) ? (@.age <= 30) . age", {"18", "25"}}, + {R"([ + {"age": 18}, + {"age": 25}, + {"age": 50}, + {"age": 5} + ])", "$[*] ? (@.age >= 18 && @.age <= 30) . age", {"18", "25"}}, + {R"([ + {"age": 18}, + {"age": 25}, + {"age": 50}, + {"age": 5} + ])", "$[*] ? (@.age >= 18 || @.age <= 30) . age", {"18", "25", "50", "5"}}, + {R"([ + { + "id": 1, + "is_valid": false, + "days_till_doom": 11, + "age_estimation": 4 + }, + { + "id": 2, + "is_valid": true, + "days_till_doom": 5, + "age_estimation": 3 + }, + { + "id": 3, + "is_valid": true, + "days_till_doom": 20, + "age_estimation": 10 + }, + { + "id": 4, + "is_valid": true, + "days_till_doom": 30, + "age_estimation": 2 + } + ])", "$[*] ? (@.is_valid == true && @.days_till_doom > 10 && 2 * @.age_estimation <= 12).id", {"4"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestFilterInvalid() { + const TVector<TRuntimeErrorTestCase> testCases = { + {R"({})", "@", C(TIssuesIds::JSONPATH_FILTER_OBJECT_OUTSIDE_OF_FILTER)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestNumericMethods() { + const TVector<TMultiOutputTestCase> testCases = { + {"[-1.23, 4.56, 3, 0]", "$[*].abs()", {"1.23", "4.56", "3", "0"}}, + {"[-1.23, 4.56, 3, 0]", "$[*].floor()", {"-2", "4", "3", "0"}}, + {"[-1.23, 4.56, 3, 0]", "$[*].ceiling()", {"-1", "5", "3", "0"}}, + {"-123.45", "$.ceiling().abs().floor()", {"123"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestNumericMethodsErrors() { + const TVector<TRuntimeErrorTestCase> testCases = { + {R"(["1", true, null])", "$[*].abs()", C(TIssuesIds::JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT)}, + {R"(["1", true, null])", "$[*].floor()", C(TIssuesIds::JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT)}, + {R"(["1", true, null])", "$[*].ceiling()", C(TIssuesIds::JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestDoubleMethod() { + const TVector<TMultiOutputTestCase> testCases = { + {R"([ + "123", "123.4", "0.567", "1234e-1", "567e-3", "123.4e-1", + "123e3", "123e+3", "1.23e+1", "1.23e1", + "12e0", "12.3e0", "0", "0.0", "0.0e0" + ])", "$[*].double()", { + "123", "123.4", "0.567", "123.4", "0.567", "12.34", + "123000", "123000", "12.3", "12.3", + "12", "12.3", "0", "0", "0", + }}, + {R"("-123.45e1")", "$.double().abs().floor()", {"1234"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestDoubleMethodErrors() { + const TVector<TRuntimeErrorTestCase> testCases = { + {R"(["1", true, null])", "$[*].double()", C(TIssuesIds::JSONPATH_INVALID_DOUBLE_METHOD_ARGUMENT)}, + {R"("hi stranger")", "$.double()", C(TIssuesIds::JSONPATH_INVALID_NUMBER_STRING)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestTypeMethod() { + const TVector<TMultiOutputTestCase> testCases = { + {"null", "$.type()", {"\"null\""}}, + {"true", "$.type()", {"\"boolean\""}}, + {"false", "$.type()", {"\"boolean\""}}, + {"1", "$.type()", {"\"number\""}}, + {"-1", "$.type()", {"\"number\""}}, + {"4.56", "$.type()", {"\"number\""}}, + {"-4.56", "$.type()", {"\"number\""}}, + {"\"some string\"", "$.type()", {"\"string\""}}, + {"[]", "$.type()", {"\"array\""}}, + {"[1, 2, 3, 4]", "$.type()", {"\"array\""}}, + {"{}", "$.type()", {"\"object\""}}, + {"{\"key\": 123}", "$.type()", {"\"object\""}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestSizeMethod() { + const TVector<TMultiOutputTestCase> testCases = { + {"null", "$.size()", {"1"}}, + {"true", "$.size()", {"1"}}, + {"false", "$.size()", {"1"}}, + {"1", "$.size()", {"1"}}, + {"-1", "$.size()", {"1"}}, + {"4.56", "$.size()", {"1"}}, + {"-4.56", "$.size()", {"1"}}, + {"\"some string\"", "$.size()", {"1"}}, + {"[]", "$.size()", {"0"}}, + {"[1, 2, 3, 4]", "$.size()", {"4"}}, + {"{}", "$.size()", {"1"}}, + {"{\"key\": 123}", "$.size()", {"1"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestKeyValueMethod() { + const TVector<TMultiOutputTestCase> testCases = { + {R"({ + "one": 1, + "two": 2, + "three": 3 + })", "$.keyvalue()", { + R"({"name":"one","value":1})", + R"({"name":"three","value":3})", + R"({"name":"two","value":2})", + }}, + {R"({ + "one": "string", + "two": [1, 2, 3, 4], + "three": [4, 5] + })", R"($.keyvalue() ? (@.value.type() == "array" && @.value.size() > 2).name)", {"\"two\""}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestKeyValueMethodErrors() { + const TVector<TRuntimeErrorTestCase> testCases = { + {"\"string\"", "$.keyvalue()", C(TIssuesIds::JSONPATH_INVALID_KEYVALUE_METHOD_ARGUMENT)}, + {"[1, 2, 3, 4]", "$.keyvalue()", C(TIssuesIds::JSONPATH_INVALID_KEYVALUE_METHOD_ARGUMENT)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestStartsWithPredicate() { + const TVector<TMultiOutputTestCase> testCases = { + {"1", R"("some string" starts with "some")", {"true"}}, + {"1", R"("some string" starts with "string")", {"false"}}, + {R"(["some string", "string"])", R"($[*] ? (@ starts with "string"))", {"\"string\""}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestStartsWithPredicateErrors() { + const TVector<TRuntimeErrorTestCase> testCases = { + {R"(["first", "second"])", R"($[*] starts with "first")", C(TIssuesIds::JSONPATH_INVALID_STARTS_WITH_ARGUMENT)}, + {"1", R"(1 starts with "string")", C(TIssuesIds::JSONPATH_INVALID_STARTS_WITH_ARGUMENT)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestExistsPredicate() { + const TVector<TMultiOutputTestCase> testCases = { + {R"({ + "key": 123 + })", "exists ($.key)", {"true"}}, + {"\"string\"", "exists ($ * 2)", {"null"}}, + {R"(["some string", 2])", "$[*] ? (exists (@ * 2))", {"2"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestIsUnknownPredicate() { + const TVector<TMultiOutputTestCase> testCases = { + {"1", "(1 < true) is unknown", {"true"}}, + {"1", "(true == true) is unknown", {"false"}}, + {"1", "(true == false) is unknown", {"false"}}, + {R"(["some string", -20])", "$[*] ? ((1 < @) is unknown)", {"\"some string\""}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestLikeRegexPredicate() { + const TVector<TMultiOutputTestCase> testCases = { + {R"(["string", "123", "456"])", R"($[*] like_regex "[0-9]+")", {"true"}}, + {R"(["string", "another string"])", R"($[*] like_regex "[0-9]+")", {"false"}}, + + // Case insensitive flag + {R"("AbCd")", R"($ like_regex "abcd")", {"false"}}, + {R"("AbCd")", R"($ like_regex "abcd" flag "i")", {"true"}}, + + {R"(["string", "123", "456"])", R"($[*] ? (@ like_regex "[0-9]+"))", {"\"123\"", "\"456\""}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TJsonPathCommonTest); diff --git a/yql/essentials/minikql/jsonpath/ut/examples_ut.cpp b/yql/essentials/minikql/jsonpath/ut/examples_ut.cpp new file mode 100644 index 0000000000..3b964e28b5 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/ut/examples_ut.cpp @@ -0,0 +1,81 @@ +#include "test_base.h" + +/* + These examples are taken from [ISO/IEC TR 19075-6:2017] standard (https://www.iso.org/standard/67367.html) +*/ + +class TJsonPathExamplesTest : public TJsonPathTestBase { +public: + TJsonPathExamplesTest() + : TJsonPathTestBase() + { + } + + UNIT_TEST_SUITE(TJsonPathExamplesTest); + UNIT_TEST(TestMemberAccessExamples); + UNIT_TEST(TestElementAccessExamples); + UNIT_TEST(TestFilterExamples); + UNIT_TEST_SUITE_END(); + + void TestMemberAccessExamples() { + TString input = R"({ + "phones": [ + {"type": "cell", "number": "abc-defg"}, + {"number": "pqr-wxyz"}, + {"type": "home", "number": "hij-klmn"} + ] + })"; + + RunTestCase(input, "lax $.phones.type", {"\"cell\"", "\"home\""}); + RunRuntimeErrorTestCase(input, "strict $.phones[*].type", C(TIssuesIds::JSONPATH_MEMBER_NOT_FOUND)); + // NOTE: Example in standard has different order of elements. This is okay because order of elements after + // wildcard member access is implementation-defined + RunTestCase(input, "lax $.phones.*", {"\"abc-defg\"", "\"cell\"", "\"pqr-wxyz\"", "\"hij-klmn\"", "\"home\""}); + } + + void TestElementAccessExamples() { + // NOTE: Example in standard has different order of elements. This is okay because order of elements after + // wildcard member access is implementation-defined + RunTestCase(R"({ + "sensors": { + "SF": [10, 11, 12, 13, 15, 16, 17], + "FC": [20, 22, 24], + "SJ": [30, 33] + } + })", "lax $.sensors.*[0, last, 2]", {"20", "24", "24", "10", "17", "12", "30", "33"}); + + RunTestCase(R"({ + "x": [12, 30], + "y": [8], + "z": ["a", "b", "c"] + })", "lax $.*[1 to last]", {"30", "\"b\"", "\"c\""}); + } + + void TestFilterExamples() { + RunParseErrorTestCase("$ ? (@.skilled)"); + + TString json = R"({"name":"Portia","skilled":true})"; + RunTestCase(json, "$ ? (@.skilled == true)", {json}); + + // Standard also mentions this example in lax mode. It is invalid because + // in this case automatic unwrapping on arrays before filters will be performed + // and query will finish with error + RunTestCase(R"({ + "x": [1, "one"] + })", "strict $.x ? (2 > @[*])", {}); + + RunTestCase(R"({ + "name": { + "first": "Manny", + "last": "Moe" + }, + "points": 123 + })", "strict $ ? (exists (@.name)).name", {R"({"first":"Manny","last":"Moe"})"}); + + RunTestCase(R"({ + "points": 41 + })", "strict $ ? (exists (@.name)).name", {}); + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TJsonPathExamplesTest);
\ No newline at end of file diff --git a/yql/essentials/minikql/jsonpath/ut/lax_ut.cpp b/yql/essentials/minikql/jsonpath/ut/lax_ut.cpp new file mode 100644 index 0000000000..4d5dda83ac --- /dev/null +++ b/yql/essentials/minikql/jsonpath/ut/lax_ut.cpp @@ -0,0 +1,283 @@ +#include "test_base.h" + +class TJsonPathLaxTest : public TJsonPathTestBase { +public: + TJsonPathLaxTest() + : TJsonPathTestBase() + { + } + + UNIT_TEST_SUITE(TJsonPathLaxTest); + UNIT_TEST(TestArrayUnwrap); + UNIT_TEST(TestArrayWrap); + UNIT_TEST(TestInvalidArrayIndices); + UNIT_TEST(TestStructuralErrorsHandling); + UNIT_TEST(TestCompareOperations); + UNIT_TEST(TestFilter); + UNIT_TEST(TestNumericMethods); + UNIT_TEST(TestDoubleMethod); + UNIT_TEST(TestKeyValueMethod); + UNIT_TEST(TestExistsPredicate); + UNIT_TEST(TestLikeRegexPredicate); + UNIT_TEST(TestStartsWithPredicate); + UNIT_TEST_SUITE_END(); + + void TestArrayUnwrap() { + const TVector<TMultiOutputTestCase> testCases = { + {R"([ + {"key": 1}, + {"key": 2} + ])", "$.key", {"1", "2"}}, + {R"([ + {"key": 1}, + {"key": 2} + ])", "$.*", {"1", "2"}}, + {R"({ + "first": {"key": 1}, + "second": [] + })", "$.*.key", {"1"}}, + {R"({ + "first": {"key": 1}, + "second": [] + })", "$.*.*", {"1"}}, + {R"({"another_key": 123})", "$.key", {}}, + {R"([ + {"key": [{"nested": 28}]}, + {"key": [{"nested": 29}]} + ])", "$.key.nested", {"28", "29"}}, + {R"([ + {"key": [{"nested": 28}]}, + {"key": [{"nested": 29}]} + ])", "$.*.*", {"28", "29"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestArrayWrap() { + const TVector<TMultiOutputTestCase> testCases = { + {R"([1, 2])", "$[*][0]", {"1", "2"}}, + {R"([[1], 2, [3]])", "$[*][0]", {"1", "2", "3"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestInvalidArrayIndices() { + const TVector<TMultiOutputTestCase> testCases = { + {R"({ + "idx": -1, + "array": [1, 2, 3] + })", "$.array[$.idx]", {}}, + {R"({ + "from": -1, + "to": 3, + "array": [1, 2, 3] + })", "$.array[$.from to $.to]", {}}, + {R"({ + "from": 0, + "to": -1, + "array": [1, 2, 3] + })", "$.array[$.from to $.to]", {}}, + {R"([1, 2, 3, 4, 5])", "$[3 to 0]", {}}, + {R"({ + "idx": -1, + "array": [1, 2, 3] + })", "$.array[$.idx, 1 to 2]", {"2", "3"}}, + {R"({ + "from": -1, + "to": 3, + "array": [1, 2, 3] + })", "$.array[0, $.from to $.to, 2 to 2]", {"1", "3"}}, + {R"({ + "from": 0, + "to": -1, + "array": [1, 2, 3] + })", "$.array[0, $.from to $.to, 1 to 1]", {"1", "2"}}, + {R"([1, 2, 3, 4, 5])", "$[0, 3 to 0, 1]", {"1", "2"}}, + {R"([[1, 2], [3, 4, 5], []])", "$[*][2]", {"5"}}, + {"[]", "$[last]", {}}, + {"[]", "$[last to 0]", {}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestStructuralErrorsHandling() { + const TVector<TMultiOutputTestCase> testCases = { + {R"([[{"key": 1}]])", "$.key", {}}, + {R"([[{"key": 1}]])", "$.*", {}}, + {R"([ + {"key": 1}, + {"not_key": 2}, + {"key": 3} + ])", "$[*].key", {"1", "3"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestCompareOperations() { + const TVector<TMultiOutputTestCase> testCases = { + // Check unwrap + {R"({ + "left": [1, 2, 3], + "right": [4, 5, 6] + })", "$.left < $.right", {"true"}}, + // Check incomparable types + // NOTE: Even though values of types string and number are incomparable, + // pair 1 < 4 is true and was found first, so the overall result is true + {R"({ + "left": [1, 2, "string"], + "right": [4, 5, 6] + })", "$.left < $.right", {"true"}}, + // NOTE: In this example pair "string" < 4 results in error and was found first, + // so overall result is null + {R"({ + "left": ["string", 2, 3], + "right": [4, 5, 6] + })", "$.left < $.right", {"null"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestFilter() { + const TVector<TMultiOutputTestCase> testCases = { + // Check unwrap + {R"([ + {"age": 18}, + {"age": 25}, + {"age": 50}, + {"age": 5} + ])", "$ ? (@.age >= 18 && @.age <= 30) . age", {"18", "25"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestNumericMethods() { + const TVector<TMultiOutputTestCase> testCases = { + // Check unwrap + {"[-1.23, 4.56, 3, 0]", "$.abs()", {"1.23", "4.56", "3", "0"}}, + {"[-1.23, 4.56, 3, 0]", "$.floor()", {"-2", "4", "3", "0"}}, + {"[-1.23, 4.56, 3, 0]", "$.ceiling()", {"-1", "5", "3", "0"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestDoubleMethod() { + const TVector<TMultiOutputTestCase> testCases = { + // Check unwrap + {R"([ + "123", "123.4", "0.567", "1234e-1", "567e-3", "123.4e-1", + "123e3", "123e+3", "1.23e+1", "1.23e1", + "12e0", "12.3e0", "0", "0.0", "0.0e0" + ])", "$.double()", { + "123", "123.4", "0.567", "123.4", "0.567", "12.34", + "123000", "123000", "12.3", "12.3", + "12", "12.3", "0", "0", "0", + }}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestKeyValueMethod() { + const TVector<TMultiOutputTestCase> testCases = { + // Check unwrap + {R"([{ + "one": 1, + "two": 2, + "three": 3 + }])", "$.keyvalue().name", {"\"one\"", "\"three\"", "\"two\""}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestExistsPredicate() { + const TVector<TMultiOutputTestCase> testCases = { + {R"({ + "key": 123 + })", "exists ($.another_key)", {"false"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestLikeRegexPredicate() { + const TVector<TMultiOutputTestCase> testCases = { + // Check unwrapping + {R"(["string", "123", "456"])", R"($ like_regex "[0-9]+")", {"true"}}, + + // Check early stopping + {R"([123, "123", "456"])", R"($ like_regex "[0-9]+")", {"null"}}, + {R"(["123", "456", 123])", R"($ like_regex "[0-9]+")", {"true"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestStartsWithPredicate() { + const TVector<TMultiOutputTestCase> testCases = { + {R"(["a", "b", "c"])", R"("abcd" starts with $[*])", {"true"}}, + {R"(["a", 1.45, 50])", R"("abcd" starts with $[*])", {"true"}}, + {R"([1.45, 50, "a"])", R"("abcd" starts with $[*])", {"null"}}, + {R"(["b", "c"])", R"("abcd" starts with $[*])", {"false"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TJsonPathLaxTest);
\ No newline at end of file diff --git a/yql/essentials/minikql/jsonpath/ut/lib_id_ut.cpp b/yql/essentials/minikql/jsonpath/ut/lib_id_ut.cpp new file mode 100644 index 0000000000..cea2d8577b --- /dev/null +++ b/yql/essentials/minikql/jsonpath/ut/lib_id_ut.cpp @@ -0,0 +1,23 @@ +#include <library/cpp/testing/unittest/registar.h> +#include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h> +#include <util/system/platform.h> + +/* + * Paranoid test to check correct regexp library is used + */ + +namespace NYql::NJsonPath { + +extern ui32 GetReLibId(); + +Y_UNIT_TEST_SUITE(RegexpLib) { + Y_UNIT_TEST(DefaultLib) { +#ifdef __x86_64__ + UNIT_ASSERT_VALUES_EQUAL(GetReLibId(), (ui32)NReWrapper::TSerialization::kHyperscan); +#else + UNIT_ASSERT_VALUES_EQUAL(GetReLibId(), (ui32)NReWrapper::TSerialization::kRe2); +#endif + } +} + +} diff --git a/yql/essentials/minikql/jsonpath/ut/strict_ut.cpp b/yql/essentials/minikql/jsonpath/ut/strict_ut.cpp new file mode 100644 index 0000000000..c8414581e4 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/ut/strict_ut.cpp @@ -0,0 +1,118 @@ +#include "test_base.h" + +class TJsonPathStrictTest : public TJsonPathTestBase { +public: + TJsonPathStrictTest() + : TJsonPathTestBase() + { + } + + UNIT_TEST_SUITE(TJsonPathStrictTest); + UNIT_TEST(TestRuntimeErrors); + UNIT_TEST(TestIncomparableTypes); + UNIT_TEST(TestLikeRegexPredicate); + UNIT_TEST(TestStartsWithPredicate); + UNIT_TEST_SUITE_END(); + + void TestRuntimeErrors() { + const TVector<TRuntimeErrorTestCase> testCases = { + {R"([ + {"key": 1}, + {"key": 2} + ])", "$.key", C(TIssuesIds::JSONPATH_EXPECTED_OBJECT)}, + {R"([ + {"key": 1}, + {"key": 2} + ])", "$.*", C(TIssuesIds::JSONPATH_EXPECTED_OBJECT)}, + {R"({ + "first": {"key": 1}, + "second": [] + })", "$.*.key", C(TIssuesIds::JSONPATH_EXPECTED_OBJECT)}, + {R"({ + "first": {"key": 1}, + "second": [] + })", "$.*.*", C(TIssuesIds::JSONPATH_EXPECTED_OBJECT)}, + {R"({"another_key": 123})", "$.key", C(TIssuesIds::JSONPATH_MEMBER_NOT_FOUND)}, + {R"([1, 2])", "$[*][0]", C(TIssuesIds::JSONPATH_EXPECTED_ARRAY)}, + {R"([[1], 2, [3]])", "$[*][0]", C(TIssuesIds::JSONPATH_EXPECTED_ARRAY)}, + {R"({ + "idx": -1, + "array": [1, 2, 3] + })", "$.array[$.idx]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, + {R"({ + "from": -1, + "to": 3, + "array": [1, 2, 3] + })", "$.array[$.from to $.to]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, + {R"({ + "from": 0, + "to": -1, + "array": [1, 2, 3] + })", "$.array[$.from to $.to]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, + {R"({ + "from": -20, + "to": -10, + "array": [1, 2, 3] + })", "$.array[$.from to $.to]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, + {R"([1, 2, 3, 4, 5])", "$[3 to 0]", C(TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX_RANGE)}, + {R"([[1, 2], [3, 4, 5], []])", "$[*][2]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, + {"[]", "$[last]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, + {"[]", "$[last to 0]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : STRICT_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestIncomparableTypes() { + const TVector<TMultiOutputTestCase> testCases = { + {R"({ + "left": [1, 2, "string"], + "right": [4, 5, 6] + })", "$.left < $.right", {"null"}}, + {R"({ + "left": ["string", 2, 3], + "right": [4, 5, 6] + })", "$.left < $.right", {"null"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : STRICT_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestLikeRegexPredicate() { + const TVector<TMultiOutputTestCase> testCases = { + {R"(["123", 123])", R"($[*] like_regex "[0-9]+")", {"null"}}, + {R"([123, "123"])", R"($[*] like_regex "[0-9]+")", {"null"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : STRICT_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestStartsWithPredicate() { + const TVector<TMultiOutputTestCase> testCases = { + {R"(["a", "b", "c"])", R"("abcd" starts with $[*])", {"true"}}, + {R"(["a", 1.45, 50])", R"("abcd" starts with $[*])", {"null"}}, + {R"([1.45, 50, "a"])", R"("abcd" starts with $[*])", {"null"}}, + {R"(["b", "c"])", R"("abcd" starts with $[*])", {"false"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : STRICT_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TJsonPathStrictTest);
\ No newline at end of file diff --git a/yql/essentials/minikql/jsonpath/ut/test_base.cpp b/yql/essentials/minikql/jsonpath/ut/test_base.cpp new file mode 100644 index 0000000000..feceecddb1 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/ut/test_base.cpp @@ -0,0 +1,167 @@ +#include "test_base.h" + +#include <yql/essentials/types/binary_json/write.h> + +using namespace NKikimr::NBinaryJson; + +TJsonPathTestBase::TJsonPathTestBase() + : FunctionRegistry(CreateFunctionRegistry(CreateBuiltinRegistry())) + , Alloc(__LOCATION__) + , Env(Alloc) + , MemInfo("Memory") + , HolderFactory(Alloc.Ref(), MemInfo, FunctionRegistry.Get()) + , ValueBuilder(HolderFactory) +{ +} + +TIssueCode TJsonPathTestBase::C(TIssuesIds::EIssueCode code) { + return static_cast<TIssueCode>(code); +} + +TUnboxedValue TJsonPathTestBase::ParseJson(TStringBuf raw) { + return TryParseJsonDom(raw, &ValueBuilder); +} + +void TJsonPathTestBase::RunTestCase(const TString& rawJson, const TString& rawJsonPath, const TVector<TString>& expectedResult) { + try { + const auto unboxedValueJson = TValue(ParseJson(rawJson)); + + const auto binaryJson = *SerializeToBinaryJson(rawJson);; + auto reader = TBinaryJsonReader::Make(binaryJson); + auto binaryJsonRoot = TValue(reader->GetRootCursor()); + + TIssues issues; + const TJsonPathPtr jsonPath = ParseJsonPath(rawJsonPath, issues, MAX_PARSE_ERRORS); + UNIT_ASSERT_C(issues.Empty(), "Parse errors found"); + + for (const auto& json : {unboxedValueJson, binaryJsonRoot}) { + const auto result = ExecuteJsonPath(jsonPath, json, TVariablesMap{}, &ValueBuilder); + UNIT_ASSERT_C(!result.IsError(), "Runtime errors found"); + + const auto& nodes = result.GetNodes(); + UNIT_ASSERT_VALUES_EQUAL(nodes.size(), expectedResult.size()); + for (size_t i = 0; i < nodes.size(); i++) { + const auto converted = nodes[i].ConvertToUnboxedValue(&ValueBuilder); + UNIT_ASSERT_VALUES_EQUAL(SerializeJsonDom(converted), expectedResult[i]); + } + } + } catch (...) { + TStringBuilder message; + message << "Exception: " << CurrentExceptionMessage() << Endl + << "Input JSON: " << rawJson << Endl + << "Jsonpath: " << rawJsonPath << Endl + << "Expected output:"; + for (const auto& item : expectedResult) { + message << " " << item; + } + message << Endl; + + UNIT_FAIL(message); + } +} + +void TJsonPathTestBase::RunParseErrorTestCase(const TString& rawJsonPath) { + try { + TIssues issues; + const TJsonPathPtr jsonPath = ParseJsonPath(rawJsonPath, issues, 2); + UNIT_ASSERT_C(!issues.Empty(), "Expected parse errors"); + } catch (...) { + UNIT_FAIL( + "Exception: " << CurrentExceptionMessage() << Endl + << "Jsonpath: " << rawJsonPath << Endl + ); + } +} + +void TJsonPathTestBase::RunRuntimeErrorTestCase(const TString& rawJson, const TString& rawJsonPath, TIssueCode error) { + try { + const auto unboxedValueJson = TValue(ParseJson(rawJson)); + + const auto binaryJson = *SerializeToBinaryJson(rawJson); + auto reader = TBinaryJsonReader::Make(binaryJson); + auto binaryJsonRoot = TValue(reader->GetRootCursor()); + + TIssues issues; + const TJsonPathPtr jsonPath = ParseJsonPath(rawJsonPath, issues, MAX_PARSE_ERRORS); + UNIT_ASSERT_C(issues.Empty(), "Parse errors found"); + + for (const auto& json : {unboxedValueJson, binaryJsonRoot}) { + const auto result = ExecuteJsonPath(jsonPath, json, TVariablesMap{}, &ValueBuilder); + UNIT_ASSERT_C(result.IsError(), "Expected runtime error"); + UNIT_ASSERT_VALUES_EQUAL(result.GetError().GetCode(), error); + } + } catch (...) { + UNIT_FAIL( + TStringBuilder() + << "Exception: " << CurrentExceptionMessage() << Endl + << "Input JSON: " << rawJson << Endl + << "Jsonpath: " << rawJsonPath << Endl + << "Expected error: " << error << Endl + ); + } +} + +void TJsonPathTestBase::RunVariablesTestCase(const TString& rawJson, const THashMap<TStringBuf, TStringBuf>& variables, const TString& rawJsonPath, const TVector<TString>& expectedResult) { + try { + const auto unboxedValueJson = TValue(ParseJson(rawJson)); + + const auto binaryJson = *SerializeToBinaryJson(rawJson); + auto reader = TBinaryJsonReader::Make(binaryJson); + auto binaryJsonRoot = TValue(reader->GetRootCursor()); + + TVariablesMap unboxedValueVariables; + for (const auto& it : variables) { + unboxedValueVariables[it.first] = TValue(ParseJson(it.second)); + } + + TVariablesMap binaryJsonVariables; + TVector<TBinaryJson> storage; + TVector<TBinaryJsonReaderPtr> readers; + storage.reserve(variables.size()); + readers.reserve(variables.size()); + for (const auto& it : variables) { + storage.push_back(*SerializeToBinaryJson(it.second)); + readers.push_back(TBinaryJsonReader::Make(storage.back())); + binaryJsonVariables[it.first] = TValue(readers.back()->GetRootCursor()); + } + + TIssues issues; + const TJsonPathPtr jsonPath = ParseJsonPath(rawJsonPath, issues, MAX_PARSE_ERRORS); + UNIT_ASSERT_C(issues.Empty(), "Parse errors found"); + + TVector<std::pair<TValue, TVariablesMap>> testCases = { + {unboxedValueJson, unboxedValueVariables}, + {binaryJsonRoot, binaryJsonVariables}, + }; + for (const auto& testCase : testCases) { + const auto result = ExecuteJsonPath(jsonPath, testCase.first, testCase.second, &ValueBuilder); + UNIT_ASSERT_C(!result.IsError(), "Runtime errors found"); + + const auto& nodes = result.GetNodes(); + UNIT_ASSERT_VALUES_EQUAL(nodes.size(), expectedResult.size()); + for (size_t i = 0; i < nodes.size(); i++) { + const auto converted = nodes[i].ConvertToUnboxedValue(&ValueBuilder); + UNIT_ASSERT_VALUES_EQUAL(SerializeJsonDom(converted), expectedResult[i]); + } + } + } catch (...) { + TStringBuilder message; + message << "Exception: " << CurrentExceptionMessage() << Endl + << "Input JSON: " << rawJson << Endl + << "Variables:" << Endl; + for (const auto& it : variables) { + message << "\t" << it.first << " = " << it.second; + } + + message << Endl + << "Jsonpath: " << rawJsonPath << Endl + << "Expected output:"; + for (const auto& item : expectedResult) { + message << " " << item; + } + message << Endl; + + UNIT_FAIL(message); + } +} + diff --git a/yql/essentials/minikql/jsonpath/ut/test_base.h b/yql/essentials/minikql/jsonpath/ut/test_base.h new file mode 100644 index 0000000000..59e654f290 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/ut/test_base.h @@ -0,0 +1,75 @@ +#pragma once + +#include <yql/essentials/core/issue/protos/issue_id.pb.h> +#include <yql/essentials/minikql/jsonpath/jsonpath.h> +#include <yql/essentials/minikql/dom/json.h> + +#include <contrib/ydb/library/yql/minikql/computation/mkql_value_builder.h> +#include <contrib/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h> +#include <contrib/ydb/library/yql/minikql/invoke_builtins/mkql_builtins.h> +#include <contrib/ydb/library/yql/minikql/mkql_mem_info.h> +#include <contrib/ydb/library/yql/minikql/mkql_function_registry.h> +#include <contrib/ydb/library/yql/minikql/mkql_alloc.h> +#include <contrib/ydb/library/yql/minikql/mkql_node.h> + +#include <library/cpp/json/json_reader.h> +#include <library/cpp/testing/unittest/registar.h> + +#include <util/generic/yexception.h> + +using namespace NYql; +using namespace NYql::NDom; +using namespace NYql::NUdf; +using namespace NYql::NJsonPath; +using namespace NJson; +using namespace NKikimr::NMiniKQL; + +class TJsonPathTestBase: public TTestBase { +public: + TJsonPathTestBase(); + +protected: + const TVector<TStringBuf> LAX_MODES = {"", "lax "}; + const TVector<TStringBuf> STRICT_MODES = {"strict "}; + const TVector<TStringBuf> ALL_MODES = {"", "lax ", "strict "}; + + TIntrusivePtr<IFunctionRegistry> FunctionRegistry; + TScopedAlloc Alloc; + TTypeEnvironment Env; + TMemoryUsageInfo MemInfo; + THolderFactory HolderFactory; + TDefaultValueBuilder ValueBuilder; + + const int MAX_PARSE_ERRORS = 100; + + TIssueCode C(TIssuesIds::EIssueCode code); + + TUnboxedValue ParseJson(TStringBuf raw); + + struct TMultiOutputTestCase { + TString Json; + TString JsonPath; + TVector<TString> Result; + }; + + void RunTestCase(const TString& rawJson, const TString& rawJsonPath, const TVector<TString>& expectedResult); + + void RunParseErrorTestCase(const TString& rawJsonPath); + + struct TRuntimeErrorTestCase { + TString Json; + TString JsonPath; + TIssueCode Error; + }; + + void RunRuntimeErrorTestCase(const TString& rawJson, const TString& rawJsonPath, TIssueCode error); + + struct TVariablesTestCase { + TString Json; + THashMap<TStringBuf, TStringBuf> Variables; + TString JsonPath; + TVector<TString> Result; + }; + + void RunVariablesTestCase(const TString& rawJson, const THashMap<TStringBuf, TStringBuf>& variables, const TString& rawJsonPath, const TVector<TString>& expectedResult); +}; diff --git a/yql/essentials/minikql/jsonpath/ut/ya.make b/yql/essentials/minikql/jsonpath/ut/ya.make new file mode 100644 index 0000000000..0da935241b --- /dev/null +++ b/yql/essentials/minikql/jsonpath/ut/ya.make @@ -0,0 +1,28 @@ +UNITTEST_FOR(yql/essentials/minikql/jsonpath) + + + +SRCS( + common_ut.cpp + examples_ut.cpp + lax_ut.cpp + strict_ut.cpp + test_base.cpp + lib_id_ut.cpp +) + +PEERDIR( + library/cpp/json + yql/essentials/types/binary_json + contrib/ydb/library/yql/minikql + contrib/ydb/library/yql/minikql/computation/llvm14 + yql/essentials/minikql/dom + contrib/ydb/library/yql/minikql/invoke_builtins/llvm14 + yql/essentials/public/udf/service/exception_policy + yql/essentials/core/issue/protos + contrib/ydb/library/yql/sql/pg_dummy +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/yql/essentials/minikql/jsonpath/value.cpp b/yql/essentials/minikql/jsonpath/value.cpp new file mode 100644 index 0000000000..356543baf8 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/value.cpp @@ -0,0 +1,383 @@ +#include "value.h" + +#include <yql/essentials/minikql/dom/node.h> + +namespace NYql::NJsonPath { + +using namespace NUdf; +using namespace NDom; +using namespace NKikimr; +using namespace NKikimr::NBinaryJson; + +TArrayIterator::TArrayIterator() + : Iterator(TEmptyMarker()) +{ +} + +TArrayIterator::TArrayIterator(const TUnboxedValue& iterator) + : Iterator(iterator) +{ +} + +TArrayIterator::TArrayIterator(TUnboxedValue&& iterator) + : Iterator(std::move(iterator)) +{ +} + +TArrayIterator::TArrayIterator(const NBinaryJson::TArrayIterator& iterator) + : Iterator(iterator) +{ +} + +TArrayIterator::TArrayIterator(NBinaryJson::TArrayIterator&& iterator) + : Iterator(std::move(iterator)) +{ +} + +bool TArrayIterator::Next(TValue& value) { + if (std::holds_alternative<TEmptyMarker>(Iterator)) { + return false; + } else if (auto* iterator = std::get_if<NBinaryJson::TArrayIterator>(&Iterator)) { + if (!iterator->HasNext()) { + return false; + } + value = TValue(iterator->Next()); + return true; + } else if (auto* iterator = std::get_if<TUnboxedValue>(&Iterator)) { + TUnboxedValue result; + const bool success = iterator->Next(result); + if (success) { + value = TValue(result); + } + return success; + } else { + Y_ABORT("Unexpected variant case in Next"); + } +} + +TObjectIterator::TObjectIterator() + : Iterator(TEmptyMarker()) +{ +} + +TObjectIterator::TObjectIterator(const TUnboxedValue& iterator) + : Iterator(iterator) +{ +} + +TObjectIterator::TObjectIterator(TUnboxedValue&& iterator) + : Iterator(std::move(iterator)) +{ +} + +TObjectIterator::TObjectIterator(const NBinaryJson::TObjectIterator& iterator) + : Iterator(iterator) +{ +} + +TObjectIterator::TObjectIterator(NBinaryJson::TObjectIterator&& iterator) + : Iterator(std::move(iterator)) +{ +} + +bool TObjectIterator::Next(TValue& key, TValue& value) { + if (std::holds_alternative<TEmptyMarker>(Iterator)) { + return false; + } else if (auto* iterator = std::get_if<NBinaryJson::TObjectIterator>(&Iterator)) { + if (!iterator->HasNext()) { + return false; + } + const auto [itKey, itValue] = iterator->Next(); + key = TValue(itKey); + value = TValue(itValue); + return true; + } else if (auto* iterator = std::get_if<TUnboxedValue>(&Iterator)) { + TUnboxedValue itKey; + TUnboxedValue itValue; + const bool success = iterator->NextPair(itKey, itValue); + if (success) { + key = TValue(itKey); + value = TValue(itValue); + } + return success; + } else { + Y_ABORT("Unexpected variant case in Next"); + } +} + +TValue::TValue() + : Value(MakeEntity()) +{ +} + +TValue::TValue(const TUnboxedValue& value) + : Value(value) +{ +} + +TValue::TValue(TUnboxedValue&& value) + : Value(std::move(value)) +{ +} + +TValue::TValue(const TEntryCursor& value) + : Value(value) +{ + UnpackInnerValue(); +} + +TValue::TValue(TEntryCursor&& value) + : Value(std::move(value)) +{ + UnpackInnerValue(); +} + +TValue::TValue(const TContainerCursor& value) + : Value(value) +{ + UnpackInnerValue(); +} + +TValue::TValue(TContainerCursor&& value) + : Value(std::move(value)) +{ + UnpackInnerValue(); +} + +EValueType TValue::GetType() const { + if (const auto* value = std::get_if<TEntryCursor>(&Value)) { + switch (value->GetType()) { + case EEntryType::BoolFalse: + case EEntryType::BoolTrue: + return EValueType::Bool; + case EEntryType::Null: + return EValueType::Null; + case EEntryType::Number: + return EValueType::Number; + case EEntryType::String: + return EValueType::String; + case EEntryType::Container: + Y_ABORT("Logical error: TEntryCursor with Container type must be converted to TContainerCursor"); + } + } else if (const auto* value = std::get_if<TContainerCursor>(&Value)) { + switch (value->GetType()) { + case EContainerType::Array: + return EValueType::Array; + case EContainerType::Object: + return EValueType::Object; + case EContainerType::TopLevelScalar: + Y_ABORT("Logical error: TContainerCursor with TopLevelScalar type must be converted to TEntryCursor"); + } + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + switch (GetNodeType(*value)) { + case ENodeType::Bool: + return EValueType::Bool; + case ENodeType::Double: + case ENodeType::Int64: + case ENodeType::Uint64: + return EValueType::Number; + case ENodeType::Dict: + case ENodeType::Attr: + return EValueType::Object; + case ENodeType::List: + return EValueType::Array; + case ENodeType::String: + return EValueType::String; + case ENodeType::Entity: + return EValueType::Null; + } + } else { + Y_ABORT("Unexpected variant case in GetType"); + } +} + +bool TValue::Is(EValueType type) const { + return GetType() == type; +} + +bool TValue::IsBool() const { + return Is(EValueType::Bool); +} + +bool TValue::IsNumber() const { + return Is(EValueType::Number); +} + +bool TValue::IsString() const { + return Is(EValueType::String); +} + +bool TValue::IsNull() const { + return Is(EValueType::Null); +} + +bool TValue::IsObject() const { + return Is(EValueType::Object); +} + +bool TValue::IsArray() const { + return Is(EValueType::Array); +} + +double TValue::GetNumber() const { + Y_DEBUG_ABORT_UNLESS(IsNumber()); + + if (const auto* value = std::get_if<TEntryCursor>(&Value)) { + return value->GetNumber(); + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + if (IsNodeType(*value, ENodeType::Double)) { + return value->Get<double>(); + } else if (IsNodeType(*value, ENodeType::Int64)) { + return static_cast<double>(value->Get<i64>()); + } else { + return static_cast<double>(value->Get<ui64>()); + } + } else { + Y_ABORT("Unexpected variant case in GetNumber"); + } +} + +bool TValue::GetBool() const { + Y_DEBUG_ABORT_UNLESS(IsBool()); + + if (const auto* value = std::get_if<TEntryCursor>(&Value)) { + return value->GetType() == EEntryType::BoolTrue; + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + return value->Get<bool>(); + } else { + Y_ABORT("Unexpected variant case in GetBool"); + } +} + +const TStringBuf TValue::GetString() const { + Y_DEBUG_ABORT_UNLESS(IsString()); + + if (const auto* value = std::get_if<TEntryCursor>(&Value)) { + return value->GetString(); + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + return value->AsStringRef(); + } else { + Y_ABORT("Unexpected variant case in GetString"); + } +} + +ui32 TValue::GetSize() const { + Y_DEBUG_ABORT_UNLESS(IsArray() || IsObject()); + + if (const auto* value = std::get_if<TContainerCursor>(&Value)) { + return value->GetSize(); + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + if (value->IsEmbedded()) { + return 0; + } + + if (IsNodeType(*value, ENodeType::List)) { + return value->GetListLength(); + } else { + return value->GetDictLength(); + } + } else { + Y_ABORT("Unexpected variant case in GetSize"); + } +} + +TValue TValue::GetElement(ui32 index) const { + Y_DEBUG_ABORT_UNLESS(IsArray()); + + if (const auto* value = std::get_if<TContainerCursor>(&Value)) { + return TValue(value->GetElement(index)); + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + return TValue(value->Lookup(TUnboxedValuePod(index))); + } else { + Y_ABORT("Unexpected variant case in GetElement"); + } +} + +TArrayIterator TValue::GetArrayIterator() const { + Y_DEBUG_ABORT_UNLESS(IsArray()); + + if (const auto* value = std::get_if<TContainerCursor>(&Value)) { + return TArrayIterator(value->GetArrayIterator()); + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + if (value->IsEmbedded()) { + return TArrayIterator(); + } + return TArrayIterator(value->GetListIterator()); + } else { + Y_ABORT("Unexpected variant case in GetArrayIterator"); + } +} + +TMaybe<TValue> TValue::Lookup(const TStringBuf key) const { + Y_DEBUG_ABORT_UNLESS(IsObject()); + + if (const auto* value = std::get_if<TContainerCursor>(&Value)) { + const auto payload = value->Lookup(key); + if (!payload.Defined()) { + return Nothing(); + } + return TValue(*payload); + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + if (value->IsEmbedded()) { + return Nothing(); + } + + // Lookup on TUnboxedValue can be performed only with TUnboxedValue key. + // To avoid allocating new string we use our custom Lookup method defined + // on underlying TMapNode that accepts TStringRef + const auto* dict = static_cast<const TMapNode*>(value->AsBoxed().Get()); + if (const auto payload = dict->Lookup(key)) { + return {TValue(payload)}; + } else { + return Nothing(); + } + } else { + Y_ABORT("Unexpected variant case in Lookup"); + } +} + +TObjectIterator TValue::GetObjectIterator() const { + Y_DEBUG_ABORT_UNLESS(IsObject()); + + if (const auto* value = std::get_if<TContainerCursor>(&Value)) { + return TObjectIterator(value->GetObjectIterator()); + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + if (value->IsEmbedded()) { + return TObjectIterator(); + } + return TObjectIterator(value->GetDictIterator()); + } else { + Y_ABORT("Unexpected variant case in GetObjectIterator"); + } +} + +TUnboxedValue TValue::ConvertToUnboxedValue(const NUdf::IValueBuilder* valueBuilder) const { + if (const auto* value = std::get_if<TEntryCursor>(&Value)) { + return ReadElementToJsonDom(*value, valueBuilder); + } else if (const auto* value = std::get_if<TContainerCursor>(&Value)) { + return ReadContainerToJsonDom(*value, valueBuilder); + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + return *value; + } else { + Y_ABORT("Unexpected variant case in ConvertToUnboxedValue"); + } +} + +void TValue::UnpackInnerValue() { + // If TEntryCursor points to container, we need to extract TContainerCursor + if (const auto* value = std::get_if<TEntryCursor>(&Value)) { + if (value->GetType() == EEntryType::Container) { + Value = value->GetContainer(); + } + } + + // If TContainerCursor points to top level scalar, we need to extract TEntryCursor + if (const auto* value = std::get_if<TContainerCursor>(&Value)) { + if (value->GetType() == EContainerType::TopLevelScalar) { + Value = value->GetElement(0); + } + } +} + +} + diff --git a/yql/essentials/minikql/jsonpath/value.h b/yql/essentials/minikql/jsonpath/value.h new file mode 100644 index 0000000000..ca663ad5c4 --- /dev/null +++ b/yql/essentials/minikql/jsonpath/value.h @@ -0,0 +1,101 @@ +#pragma once + +#include <yql/essentials/types/binary_json/read.h> + +#include <yql/essentials/public/udf/udf_value.h> + +#include <util/generic/maybe.h> + +#include <variant> + +namespace NYql::NJsonPath { + +enum class EValueType { + Bool = 0, + Number = 1, + String = 2, + Null = 4, + Object = 5, + Array = 6, +}; + +struct TEmptyMarker { +}; + +class TValue; + +class TArrayIterator { +public: + TArrayIterator(); + explicit TArrayIterator(const NUdf::TUnboxedValue& iterator); + explicit TArrayIterator(NUdf::TUnboxedValue&& iterator); + + explicit TArrayIterator(const NKikimr::NBinaryJson::TArrayIterator& iterator); + explicit TArrayIterator(NKikimr::NBinaryJson::TArrayIterator&& iterator); + + bool Next(TValue& value); + +private: + std::variant<TEmptyMarker, NUdf::TUnboxedValue, NKikimr::NBinaryJson::TArrayIterator> Iterator; +}; + +class TObjectIterator { +public: + TObjectIterator(); + explicit TObjectIterator(const NUdf::TUnboxedValue& iterator); + explicit TObjectIterator(NUdf::TUnboxedValue&& iterator); + + explicit TObjectIterator(const NKikimr::NBinaryJson::TObjectIterator& iterator); + explicit TObjectIterator(NKikimr::NBinaryJson::TObjectIterator&& iterator); + + bool Next(TValue& key, TValue& value); + +private: + std::variant<TEmptyMarker, NUdf::TUnboxedValue, NKikimr::NBinaryJson::TObjectIterator> Iterator; +}; + +class TValue { +public: + TValue(); + explicit TValue(const NUdf::TUnboxedValue& value); + explicit TValue(NUdf::TUnboxedValue&& value); + + explicit TValue(const NKikimr::NBinaryJson::TEntryCursor& value); + explicit TValue(NKikimr::NBinaryJson::TEntryCursor&& value); + + explicit TValue(const NKikimr::NBinaryJson::TContainerCursor& value); + explicit TValue(NKikimr::NBinaryJson::TContainerCursor&& value); + + EValueType GetType() const; + bool Is(EValueType type) const; + bool IsBool() const; + bool IsNumber() const; + bool IsString() const; + bool IsNull() const; + bool IsObject() const; + bool IsArray() const; + + // Scalar value methods + double GetNumber() const; + bool GetBool() const; + const TStringBuf GetString() const; + + ui32 GetSize() const; + + // Array methods + TValue GetElement(ui32 index) const; + TArrayIterator GetArrayIterator() const; + + // Object methods + TMaybe<TValue> Lookup(const TStringBuf key) const; + TObjectIterator GetObjectIterator() const; + + NUdf::TUnboxedValue ConvertToUnboxedValue(const NUdf::IValueBuilder* valueBuilder) const; + +private: + void UnpackInnerValue(); + + std::variant<NUdf::TUnboxedValue, NKikimr::NBinaryJson::TEntryCursor, NKikimr::NBinaryJson::TContainerCursor> Value; +}; + +} diff --git a/yql/essentials/minikql/jsonpath/ya.make b/yql/essentials/minikql/jsonpath/ya.make new file mode 100644 index 0000000000..3ae29b36cf --- /dev/null +++ b/yql/essentials/minikql/jsonpath/ya.make @@ -0,0 +1,63 @@ +LIBRARY() + +YQL_ABI_VERSION( + 2 + 27 + 0 +) + +IF (ARCH_X86_64) + CFLAGS( + -DYDB_REWRAPPER_LIB_ID=kHyperscan + ) + + PEERDIR( + yql/essentials/minikql/jsonpath/rewrapper/hyperscan + ) + +ELSE() + CFLAGS( + -DYDB_REWRAPPER_LIB_ID=kRe2 + ) + +ENDIF() + +PEERDIR( + contrib/libs/double-conversion + library/cpp/json + yql/essentials/minikql/jsonpath/rewrapper/re2 + yql/essentials/minikql/jsonpath/rewrapper + yql/essentials/types/binary_json + yql/essentials/minikql/dom + yql/essentials/public/issue + yql/essentials/public/udf + yql/essentials/ast + yql/essentials/utils + yql/essentials/core/issue/protos + yql/essentials/parser/proto_ast/antlr3 + yql/essentials/parser/proto_ast/gen/jsonpath +) + +SRCS( + ast_builder.cpp + ast_nodes.cpp + binary.cpp + executor.cpp + jsonpath.cpp + parse_double.cpp + type_check.cpp + value.cpp +) + +GENERATE_ENUM_SERIALIZATION(ast_nodes.h) + +END() + +RECURSE( + benchmark + rewrapper +) + +RECURSE_FOR_TESTS( + ut +) diff --git a/yql/essentials/minikql/ya.make b/yql/essentials/minikql/ya.make new file mode 100644 index 0000000000..e45eb4a543 --- /dev/null +++ b/yql/essentials/minikql/ya.make @@ -0,0 +1,6 @@ +RECURSE( + dom + jsonpath +) + + |