path: root/yql/essentials/minikql
diff options
authorvvvv <vvvv@yandex-team.com>2024-11-06 23:54:28 +0300
committervvvv <vvvv@yandex-team.com>2024-11-07 00:04:25 +0300
commitcf2a23963ac10add28c50cc114fbf48953eca5aa (patch)
tree174b849b8ecfa96b0c8e4409ab3287721a9210c8 /yql/essentials/minikql
parent3a3113a2bf5a7fab32bde414932082b264c559fc (diff)
Prepare move yql/minikql YQL-19206
types,jsonpath,dom commit_hash:6b54be5968b6a30b6d97fe3a1611574bcefc749e
Diffstat (limited to 'yql/essentials/minikql')
58 files changed, 13123 insertions, 0 deletions
diff --git a/yql/essentials/minikql/dom/convert.h b/yql/essentials/minikql/dom/convert.h
new file mode 100644
index 0000000000..e562d0381f
--- /dev/null
+++ b/yql/essentials/minikql/dom/convert.h
@@ -0,0 +1,388 @@
+#pragma once
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/utils/utf8.h>
+#include <util/string/escape.h>
+#include <util/string/cast.h>
+#include <util/string/builder.h>
+#include <functional>
+namespace NYql::NDom {
+template<bool Strict, bool AutoConvert>
+TUnboxedValuePod ConvertToBool(TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) {
+ switch (GetNodeType(x)) {
+ case ENodeType::Bool:
+ return TUnboxedValuePod(x.Get<bool>());
+ case ENodeType::String:
+ if (const std::string_view str = x.AsStringRef(); str == "true")
+ return TUnboxedValuePod(true);
+ else if (str == "false")
+ return TUnboxedValuePod(false);
+ else if constexpr (AutoConvert)
+ return TUnboxedValuePod(x.AsStringRef().Size() > 0U);
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::Uint64:
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod(x.Get<ui64>() != 0ULL);
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::Int64:
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod(x.Get<i64>() != 0LL);
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::Double:
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod(x.Get<double>() != 0.);
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::Entity:
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod(false);
+ else if constexpr (Strict)
+ break;
+ else if constexpr (AutoConvert)
+ return TUnboxedValuePod(false);
+ else
+ return {};
+ case ENodeType::List:
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod(x.IsBoxed() && x.HasListItems());
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::Dict:
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod(x.IsBoxed() && x.HasDictItems());
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::Attr:
+ return ConvertToBool<Strict, AutoConvert>(x.GetVariantItem().Release(), valueBuilder, pos);
+ }
+ UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Cannot parse boolean value from " << TDebugPrinter(x)).c_str());
+template<typename TDst, typename TSrc>
+constexpr inline bool InBounds(const TSrc v) {
+ if constexpr (std::is_same<TSrc, TDst>())
+ return true;
+ if constexpr (sizeof(TSrc) > sizeof(TDst))
+ if constexpr (std::is_signed<TSrc>())
+ return v <= TSrc(std::numeric_limits<TDst>::max()) && v >= TSrc(std::numeric_limits<TDst>::min());
+ else
+ return v <= TSrc(std::numeric_limits<TDst>::max());
+ else
+ if constexpr (std::is_signed<TSrc>())
+ return v >= TSrc(std::numeric_limits<TDst>::min());
+ else
+ return v <= TSrc(std::numeric_limits<TDst>::max());
+ static_assert(sizeof(TSrc) >= sizeof(TDst), "Expects wide to short.");
+template<bool Strict, bool AutoConvert, typename TargetType>
+TUnboxedValuePod ConvertToIntegral(TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) {
+ switch (GetNodeType(x)) {
+ case ENodeType::Int64: {
+ const auto s = x.Get<i64>();
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod(TargetType(s));
+ else if (InBounds<TargetType>(s))
+ return TUnboxedValuePod(TargetType(s));
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ }
+ case ENodeType::Uint64: {
+ const auto u = x.Get<ui64>();
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod(TargetType(u));
+ else if (InBounds<TargetType>(u))
+ return TUnboxedValuePod(TargetType(u));
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ }
+ case ENodeType::Bool:
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod(TargetType(x.Get<bool>() ? 1 : 0));
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::Double:
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod(TargetType(x.Get<double>()));
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::String:
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod(FromStringWithDefault(std::string_view(x.AsStringRef()), TargetType(0)));
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::Entity:
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod::Zero();
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::List:
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod::Zero();
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::Dict:
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod::Zero();
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::Attr:
+ return ConvertToIntegral<Strict, AutoConvert, TargetType>(x.GetVariantItem().Release(), valueBuilder, pos);
+ }
+ UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Cannot parse integer value from " << TDebugPrinter(x)).c_str());
+ static_assert(std::is_integral<TargetType>(), "Expect integral.");
+template<bool Strict, bool AutoConvert, typename TargetType>
+TUnboxedValuePod ConvertToFloat(TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) {
+ switch (GetNodeType(x)) {
+ case ENodeType::Double:
+ return TUnboxedValuePod(TargetType(x.Get<double>()));
+ case ENodeType::Uint64:
+ return TUnboxedValuePod(TargetType(x.Get<ui64>()));
+ case ENodeType::Int64:
+ return TUnboxedValuePod(TargetType(x.Get<i64>()));
+ case ENodeType::Bool:
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod(x.Get<bool>() ? TargetType(1) : TargetType(0));
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::String:
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod(FromStringWithDefault(std::string_view(x.AsStringRef()), TargetType(0)));
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::Entity:
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod(TargetType(0));
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::List:
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod(TargetType(0));
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::Dict:
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod(TargetType(0));
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::Attr:
+ return ConvertToFloat<Strict, AutoConvert, TargetType>(x.GetVariantItem().Release(), valueBuilder, pos);
+ }
+ UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Cannot parse floating point value from " << TDebugPrinter(x)).c_str());
+ static_assert(std::is_floating_point<TargetType>(), "Expect float.");
+template<bool Strict, bool AutoConvert, bool Utf8>
+TUnboxedValuePod ConvertToString(TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) {
+ switch (GetNodeType(x)) {
+ case ENodeType::String:
+ if constexpr (Utf8)
+ if (IsUtf8(x.AsStringRef()))
+ return x;
+ else
+ if (AutoConvert)
+ return valueBuilder->NewString(EscapeC(TStringBuf(x.AsStringRef()))).Release();
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ else
+ return x;
+ case ENodeType::Uint64:
+ if constexpr (AutoConvert)
+ return valueBuilder->NewString(ToString(x.Get<ui64>())).Release();
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::Int64:
+ if constexpr (AutoConvert)
+ return valueBuilder->NewString(ToString(x.Get<i64>())).Release();
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::Bool:
+ if constexpr (AutoConvert)
+ return x.Get<bool>() ? TUnboxedValuePod::Embedded("true") : TUnboxedValuePod::Embedded("false");
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::Double:
+ if constexpr (AutoConvert)
+ return valueBuilder->NewString(::FloatToString(x.Get<double>())).Release();
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::Entity:
+ case ENodeType::List:
+ case ENodeType::Dict:
+ if constexpr (AutoConvert)
+ return TUnboxedValuePod::Embedded("");
+ else if constexpr (Strict)
+ break;
+ else
+ return {};
+ case ENodeType::Attr:
+ return ConvertToString<Strict, AutoConvert, Utf8>(x.GetVariantItem().Release(), valueBuilder, pos);
+ }
+ UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Cannot parse string value from " << TDebugPrinter(x)).c_str());
+class TLazyConveter : public TManagedBoxedValue {
+ using TConverter = std::function<TUnboxedValuePod(TUnboxedValuePod)>;
+ TLazyConveter(TUnboxedValue&& original, TConverter&& converter)
+ : Original(std::move(original)), Converter(std::move(converter))
+ {}
+ template <bool NoSwap>
+ class TIterator: public TManagedBoxedValue {
+ public:
+ TIterator(TUnboxedValue&& original, const TConverter& converter)
+ : Original(std::move(original)), Converter(converter)
+ {}
+ private:
+ bool Skip() final {
+ return Original.Skip();
+ }
+ bool Next(TUnboxedValue& value) final {
+ if (Original.Next(value)) {
+ if constexpr (!NoSwap) {
+ value = Converter(value.Release());
+ }
+ return true;
+ }
+ return false;
+ }
+ bool NextPair(TUnboxedValue& key, TUnboxedValue& payload) final {
+ if (Original.NextPair(key, payload)) {
+ if constexpr (NoSwap) {
+ payload = Converter(payload.Release());
+ } else {
+ key = Converter(key.Release());
+ }
+ return true;
+ }
+ return false;
+ }
+ const TUnboxedValue Original;
+ const TConverter Converter;
+ };
+ ui64 GetDictLength() const final {
+ return Original.GetDictLength();
+ }
+ ui64 GetListLength() const final {
+ return Original.GetListLength();
+ }
+ bool HasFastListLength() const final {
+ return Original.HasFastListLength();
+ }
+ bool HasDictItems() const final {
+ return Original.HasDictItems();
+ }
+ bool HasListItems() const final {
+ return Original.HasListItems();
+ }
+ TUnboxedValue GetListIterator() const final {
+ return TUnboxedValuePod(new TIterator<false>(Original.GetListIterator(), Converter));
+ }
+ TUnboxedValue GetDictIterator() const final {
+ return TUnboxedValuePod(new TIterator<true>(Original.GetDictIterator(), Converter));
+ }
+ TUnboxedValue GetKeysIterator() const final {
+ return TUnboxedValuePod(new TIterator<true>(Original.GetKeysIterator(), Converter));
+ }
+ TUnboxedValue GetPayloadsIterator() const {
+ return TUnboxedValuePod(new TIterator<false>(Original.GetPayloadsIterator(), Converter));
+ }
+ bool Contains(const TUnboxedValuePod& key) const final {
+ return Original.Contains(key);
+ }
+ TUnboxedValue Lookup(const TUnboxedValuePod& key) const final {
+ if (auto lookup = Original.Lookup(key)) {
+ return Converter(lookup.Release().GetOptionalValue()).MakeOptional();
+ }
+ return {};
+ }
+ bool IsSortedDict() const final {
+ return Original.IsSortedDict();
+ }
+ const TUnboxedValue Original;
+ const TConverter Converter;
diff --git a/yql/essentials/minikql/dom/hash.cpp b/yql/essentials/minikql/dom/hash.cpp
new file mode 100644
index 0000000000..ba4d8d0146
--- /dev/null
+++ b/yql/essentials/minikql/dom/hash.cpp
@@ -0,0 +1,151 @@
+#include "node.h"
+#include "hash.h"
+#include <yql/essentials/public/udf/udf_type_ops.h>
+namespace NYql::NDom {
+using namespace NUdf;
+namespace {
+THashType HashList(const NUdf::TUnboxedValuePod x) {
+ THashType hash = 0ULL;
+ if (x.IsBoxed()) {
+ if (const auto elements = x.GetElements()) {
+ const auto size = x.GetListLength();
+ for (ui32 i = 0U; i < size; ++i) {
+ hash = CombineHashes(hash, HashDom(elements[i]));
+ }
+ } else {
+ const auto it = x.GetListIterator();
+ for (TUnboxedValue v; it.Next(v); hash = CombineHashes(hash, HashDom(v)))
+ continue;
+ }
+ }
+ return hash;
+THashType HashDict(const NUdf::TUnboxedValuePod x) {
+ THashType hash = 0ULL;
+ if (x.IsBoxed()) {
+ const auto it = x.GetDictIterator();
+ for (TUnboxedValue k, v; it.NextPair(k, v);) {
+ hash = CombineHashes(hash, CombineHashes(GetStringHash(k), HashDom(v)));
+ }
+ }
+ return hash;
+bool EquateLists(const NUdf::TUnboxedValuePod x, const NUdf::TUnboxedValuePod y) {
+ if (x.IsBoxed() && y.IsBoxed()) {
+ const auto ex = x.GetElements();
+ const auto ey = y.GetElements();
+ if (ex && ey) {
+ const auto size = x.GetListLength();
+ if (size != y.GetListLength()) {
+ return false;
+ }
+ for (ui32 i = 0U; i < size; ++i) {
+ if (!EquateDoms(ex[i], ey[i]))
+ return false;
+ }
+ } else {
+ const auto itx = x.GetListIterator();
+ const auto ity = y.GetListIterator();
+ for (TUnboxedValue vx, vy; itx.Next(vx);) {
+ if (!ity.Next(vy))
+ return false;
+ if (!EquateDoms(vx, vy))
+ return false;
+ }
+ }
+ return true;
+ }
+ return x.IsBoxed() == y.IsBoxed();
+bool EquateDicts(const NUdf::TUnboxedValuePod x, const NUdf::TUnboxedValuePod y) {
+ if (x.IsBoxed() && y.IsBoxed()) {
+ const auto size = x.GetDictLength();
+ if (size != y.GetDictLength()) {
+ return false;
+ }
+ const auto xr = static_cast<const TPair*>(x.GetResource());
+ const auto yr = static_cast<const TPair*>(y.GetResource());
+ // clone dict as attrnode
+ if (xr && yr) {
+ for (ui32 i = 0U; i < size; ++i) {
+ if (!EquateStrings(xr[i].first, yr[i].first))
+ return false;
+ if (!EquateDoms(xr[i].second, yr[i].second))
+ return false;
+ }
+ } else {
+ const auto it = x.GetDictIterator();
+ for (TUnboxedValue k, v; it.NextPair(k, v);) {
+ if (auto l = y.Lookup(k))
+ if (EquateDoms(v, l.GetOptionalValue()))
+ continue;
+ return false;
+ }
+ }
+ return true;
+ }
+ return x.IsBoxed() == y.IsBoxed();
+THashType HashDom(const NUdf::TUnboxedValuePod x) {
+ switch (const auto type = GetNodeType(x); type) {
+ case ENodeType::Double:
+ return CombineHashes(THashType(type), GetFloatHash<double>(x));
+ case ENodeType::Uint64:
+ return CombineHashes(THashType(type), GetIntegerHash<ui64>(x));
+ case ENodeType::Int64:
+ return CombineHashes(THashType(type), GetIntegerHash<i64>(x));
+ case ENodeType::Bool:
+ return CombineHashes(THashType(type), std::hash<bool>()(x.Get<bool>()));
+ case ENodeType::String:
+ return CombineHashes(THashType(type), GetStringHash(x));
+ case ENodeType::Entity:
+ return CombineHashes(THashType(type), THashType(~0ULL));
+ case ENodeType::List:
+ return CombineHashes(THashType(type), HashList(x));
+ case ENodeType::Dict:
+ return CombineHashes(THashType(type), HashDict(x));
+ case ENodeType::Attr:
+ return CombineHashes(THashType(type), CombineHashes(HashDict(x), HashDom(x.GetVariantItem().Release())));
+ }
+bool EquateDoms(const NUdf::TUnboxedValuePod x, const NUdf::TUnboxedValuePod y) {
+ if (const auto type = GetNodeType(x); type == GetNodeType(y)) {
+ switch (type) {
+ case ENodeType::Double:
+ return EquateFloats<double>(x, y);
+ case ENodeType::Uint64:
+ return EquateIntegers<ui64>(x, y);
+ case ENodeType::Int64:
+ return EquateIntegers<i64>(x, y);
+ case ENodeType::Bool:
+ return x.Get<bool>() == y.Get<bool>();
+ case ENodeType::String:
+ return EquateStrings(x, y);
+ case ENodeType::Entity:
+ return true;
+ case ENodeType::List:
+ return EquateLists(x, y);
+ case ENodeType::Dict:
+ return EquateDicts(x, y);
+ case ENodeType::Attr:
+ return EquateDicts(x, y) && EquateDoms(x.GetVariantItem().Release(), y.GetVariantItem().Release());
+ }
+ }
+ return false;
diff --git a/yql/essentials/minikql/dom/hash.h b/yql/essentials/minikql/dom/hash.h
new file mode 100644
index 0000000000..870435ff4e
--- /dev/null
+++ b/yql/essentials/minikql/dom/hash.h
@@ -0,0 +1,13 @@
+#pragma once
+#include <yql/essentials/public/udf/udf_types.h>
+#include <yql/essentials/public/udf/udf_type_ops.h>
+namespace NYql::NDom {
+NUdf::THashType HashDom(const NUdf::TUnboxedValuePod value);
+bool EquateDoms(const NUdf::TUnboxedValuePod lhs, const NUdf::TUnboxedValuePod rhs);
diff --git a/yql/essentials/minikql/dom/json.cpp b/yql/essentials/minikql/dom/json.cpp
new file mode 100644
index 0000000000..a29d044adf
--- /dev/null
+++ b/yql/essentials/minikql/dom/json.cpp
@@ -0,0 +1,349 @@
+#include "json.h"
+#include "node.h"
+#include <library/cpp/containers/stack_vector/stack_vec.h>
+#include <library/cpp/json/json_reader.h>
+#include <library/cpp/json/json_writer.h>
+#include <util/stream/input.h>
+#include <util/stream/str.h>
+#include <util/generic/stack.h>
+#include <util/system/yassert.h>
+#include <util/system/compiler.h>
+#include <cmath>
+#include <ctype.h>
+namespace NYql::NDom {
+using namespace NUdf;
+using namespace NJson;
+namespace {
+size_t AsciiSize(const TStringBuf& str) {
+ size_t s = 0U;
+ while (s < str.size() && isascii(str[s]))
+ ++s;
+ return s;
+TString EncodeUtf(const TStringBuf& str, size_t from)
+ TString result(str.substr(0, from));
+ while (from < str.size()) {
+ const auto c = str[from++];
+ if (isascii(c)) {
+ result.append(c);
+ } else {
+ result.append((c >> '\x06') & '\x03' | '\xC0');
+ result.append(c & '\x3F' | '\x80');
+ }
+ }
+ return result;
+TString DecodeUtf(const TStringBuf& str, size_t from)
+ TString result(str);
+ auto i = from;
+ while (from < str.size()) {
+ const auto c = str[from++];
+ if (isascii(c)) {
+ result[i++] = c;
+ } else if ((c & '\xFC') == '\xC0') {
+ result[i++] = ((c & '\x03') << '\x06') | (str[from++] & '\x3F');
+ } else {
+ ythrow yexception() << "Unicode symbols with codes greater than 255 are not supported.";
+ }
+ }
+ result.resize(i);
+ return result;
+template<bool DecodeUtf8>
+class TDomCallbacks : public TJsonCallbacks {
+ TDomCallbacks(const IValueBuilder* valueBuilder, bool throwException)
+ : TJsonCallbacks(throwException)
+ , ValueBuilder(valueBuilder)
+ {
+ Result.push({});
+ }
+ bool OnNull() override {
+ return PushToCurrentCollection(MakeEntity());
+ }
+ bool OnBoolean(bool value) override {
+ return PushToCurrentCollection(MakeBool(value));
+ }
+ bool OnInteger(long long value) override {
+ return PushToCurrentCollection(MakeInt64(static_cast<i64>(value)));
+ }
+ bool OnUInteger(unsigned long long value) override {
+ return PushToCurrentCollection(MakeUint64(static_cast<ui64>(value)));
+ }
+ bool OnDouble(double value) override {
+ if (Y_UNLIKELY(std::isinf(value))) {
+ ythrow yexception() << "JSON number is infinite";
+ }
+ return PushToCurrentCollection(MakeDouble(value));
+ }
+ bool OnString(const TStringBuf& value) override {
+ if constexpr (DecodeUtf8) {
+ if (const auto from = AsciiSize(value); from < value.size()) {
+ return PushToCurrentCollection(MakeString(DecodeUtf(value, from), ValueBuilder));
+ }
+ }
+ return PushToCurrentCollection(MakeString(value, ValueBuilder));
+ }
+ bool OnOpenMap() override {
+ return OnCollectionOpen();
+ }
+ bool OnMapKey(const TStringBuf& value) override {
+ return OnString(value);
+ }
+ bool OnCloseMap() override {
+ Y_DEBUG_ABORT_UNLESS(!Result.empty());
+ auto& items = Result.top();
+ Y_DEBUG_ABORT_UNLESS(items.size() % 2 == 0);
+ TSmallVec<TPair, TStdAllocatorForUdf<TPair>> pairs;
+ for (size_t i = 0; i < items.size(); i += 2) {
+ pairs.emplace_back(std::move(items[i]), std::move(items[i + 1]));
+ }
+ Result.pop();
+ return PushToCurrentCollection(MakeDict(pairs.data(), pairs.size()));
+ }
+ bool OnOpenArray() override {
+ return OnCollectionOpen();
+ }
+ bool OnCloseArray() override {
+ Y_DEBUG_ABORT_UNLESS(!Result.empty());
+ auto& items = Result.top();
+ TUnboxedValue list = MakeList(items.data(), items.size(), ValueBuilder);
+ Result.pop();
+ return PushToCurrentCollection(std::move(list));
+ }
+ bool OnEnd() override {
+ return IsResultSingle();
+ }
+ TUnboxedValue GetResult() && {
+ Y_DEBUG_ABORT_UNLESS(IsResultSingle());
+ return std::move(Result.top()[0]);
+ }
+ bool OnCollectionOpen() {
+ Result.emplace();
+ return true;
+ }
+ bool PushToCurrentCollection(TUnboxedValue&& value) {
+ Y_DEBUG_ABORT_UNLESS(!Result.empty());
+ Result.top().emplace_back(std::move(value));
+ return true;
+ }
+ bool IsResultSingle() {
+ return Result.size() == 1 && Result.top().size() == 1;
+ }
+ const IValueBuilder* ValueBuilder;
+ using TUnboxedValues = TSmallVec<TUnboxedValue, TStdAllocatorForUdf<TUnboxedValue>>;
+ std::stack<TUnboxedValues, TSmallVec<TUnboxedValues, TStdAllocatorForUdf<TUnboxedValues>>> Result;
+class TTestCallbacks : public TJsonCallbacks {
+ TTestCallbacks()
+ : TJsonCallbacks(false)
+ {}
+ bool OnNull() final { return true; }
+ bool OnBoolean(bool) final { return true; }
+ bool OnInteger(long long) final { return true; }
+ bool OnUInteger(unsigned long long) final { return true; }
+ bool OnDouble(double value) final { return !std::isinf(value); }
+ bool OnString(const TStringBuf&) final { return true; }
+ bool OnOpenMap() final { return true; }
+ bool OnMapKey(const TStringBuf&) final { return true; }
+ bool OnCloseMap() final { return true; }
+ bool OnOpenArray() final { return true; }
+ bool OnCloseArray() final { return true; }
+ bool OnEnd() final {
+ if (HasResult)
+ return false;
+ return HasResult = true;
+ }
+ private:
+ bool HasResult = false;
+bool IsEntity(const TUnboxedValuePod value) {
+ switch (GetNodeType(value)) {
+ case ENodeType::Entity: return true;
+ case ENodeType::Attr: return IsEntity(value.GetVariantItem().Release());
+ default: return false;
+ }
+template<bool SkipMapEntity, bool EncodeUtf8>
+void WriteValue(const TUnboxedValuePod value, TJsonWriter& writer);
+template<bool SkipMapEntity, bool EncodeUtf8>
+void WriteArray(const TUnboxedValuePod value, TJsonWriter& writer) {
+ writer.OpenArray();
+ if (value.IsBoxed()) {
+ if (const auto elements = value.GetElements()) {
+ const auto size = value.GetListLength();
+ for (ui64 i = 0; i < size; ++i) {
+ WriteValue<SkipMapEntity, EncodeUtf8>(elements[i], writer);
+ }
+ } else {
+ const auto it = value.GetListIterator();
+ for (TUnboxedValue v; it.Next(v); WriteValue<SkipMapEntity, EncodeUtf8>(v, writer))
+ continue;
+ }
+ }
+ writer.CloseArray();
+template<bool SkipMapEntity, bool EncodeUtf8>
+void WriteMap(const TUnboxedValuePod value, TJsonWriter& writer) {
+ writer.OpenMap();
+ if (value.IsBoxed()) {
+ TUnboxedValue key, payload;
+ for (const auto it = value.GetDictIterator(); it.NextPair(key, payload);) {
+ if constexpr (SkipMapEntity)
+ if (IsEntity(payload))
+ continue;
+ const TStringBuf str = key.AsStringRef();
+ if constexpr (EncodeUtf8)
+ if (const auto from = AsciiSize(str); from < str.size())
+ writer.WriteKey(EncodeUtf(str, from));
+ else
+ writer.WriteKey(str);
+ else
+ writer.WriteKey(str);
+ WriteValue<SkipMapEntity, EncodeUtf8>(payload, writer);
+ }
+ }
+ writer.CloseMap();
+template<bool SkipMapEntity, bool EncodeUtf8>
+void WriteValue(const TUnboxedValuePod value, TJsonWriter& writer) {
+ switch (GetNodeType(value)) {
+ case ENodeType::String: {
+ const TStringBuf str = value.AsStringRef();
+ if constexpr (EncodeUtf8) {
+ if (const auto from = AsciiSize(str); from < str.size()) {
+ return writer.Write(EncodeUtf(str, from));
+ }
+ }
+ return writer.Write(str);
+ }
+ case ENodeType::Bool:
+ return writer.Write(value.Get<bool>());
+ case ENodeType::Int64:
+ return writer.Write(value.Get<i64>());
+ case ENodeType::Uint64:
+ return writer.Write(value.Get<ui64>());
+ case ENodeType::Double:
+ return writer.Write(value.Get<double>());
+ case ENodeType::Entity:
+ return writer.WriteNull();
+ case ENodeType::List:
+ return WriteArray<SkipMapEntity, EncodeUtf8>(value, writer);
+ case ENodeType::Dict:
+ return WriteMap<SkipMapEntity, EncodeUtf8>(value, writer);
+ case ENodeType::Attr:
+ writer.OpenMap();
+ writer.WriteKey("$attributes");
+ WriteMap<SkipMapEntity, EncodeUtf8>(value, writer);
+ writer.WriteKey("$value");
+ WriteValue<SkipMapEntity, EncodeUtf8>(value.GetVariantItem().Release(), writer);
+ writer.CloseMap();
+ }
+bool IsValidJson(const TStringBuf json) {
+ TMemoryInput input(json.data(), json.size());
+ TTestCallbacks callbacks;
+ return ReadJson(&input, &callbacks);
+TUnboxedValue TryParseJsonDom(const TStringBuf json, const IValueBuilder* valueBuilder, bool dencodeUtf8) {
+ TMemoryInput input(json.data(), json.size());
+ if (dencodeUtf8) {
+ TDomCallbacks<true> callbacks(valueBuilder, /* throwException */ true);
+ if (!ReadJson(&input, &callbacks)) {
+ UdfTerminate("Internal error: parser error occurred but corresponding callback was not called");
+ }
+ return std::move(callbacks).GetResult();
+ } else {
+ TDomCallbacks<false> callbacks(valueBuilder, /* throwException */ true);
+ if (!ReadJson(&input, &callbacks)) {
+ UdfTerminate("Internal error: parser error occurred but corresponding callback was not called");
+ }
+ return std::move(callbacks).GetResult();
+ }
+TString SerializeJsonDom(const NUdf::TUnboxedValuePod dom, bool skipMapEntity, bool encodeUtf8, bool writeNanAsString) {
+ TStringStream output;
+ TJsonWriterConfig config;
+ config.SetFormatOutput(false);
+ config.WriteNanAsString = writeNanAsString;
+ config.FloatToStringMode = EFloatToStringMode::PREC_AUTO;
+ TJsonWriter writer(&output, config);
+ if (skipMapEntity)
+ if (encodeUtf8)
+ WriteValue<true, true>(dom, writer);
+ else
+ WriteValue<true, false>(dom, writer);
+ else
+ if (encodeUtf8)
+ WriteValue<false, true>(dom, writer);
+ else
+ WriteValue<false, false>(dom, writer);
+ writer.Flush();
+ return output.Str();
diff --git a/yql/essentials/minikql/dom/json.h b/yql/essentials/minikql/dom/json.h
new file mode 100644
index 0000000000..ea95807de8
--- /dev/null
+++ b/yql/essentials/minikql/dom/json.h
@@ -0,0 +1,14 @@
+#pragma once
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+namespace NYql::NDom {
+bool IsValidJson(const TStringBuf json);
+NUdf::TUnboxedValue TryParseJsonDom(const TStringBuf json, const NUdf::IValueBuilder* valueBuilder, bool decodeUtf8 = false);
+TString SerializeJsonDom(const NUdf::TUnboxedValuePod dom, bool skipMapEntity = false, bool encodeUtf8 = false, bool writeNanAsString = false);
diff --git a/yql/essentials/minikql/dom/make.cpp b/yql/essentials/minikql/dom/make.cpp
new file mode 100644
index 0000000000..ca6864f759
--- /dev/null
+++ b/yql/essentials/minikql/dom/make.cpp
@@ -0,0 +1,170 @@
+#include "make.h"
+#include "node.h"
+#include "yson.h"
+#include "json.h"
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <util/string/builder.h>
+namespace NYql::NDom {
+using namespace NUdf;
+namespace {
+TUnboxedValuePod MakeData(const TDataTypeId nodeType, const TUnboxedValuePod value, const IValueBuilder* valueBuilder) {
+ switch (nodeType) {
+ case TDataType<char*>::Id: return value;
+ case TDataType<TUtf8>::Id: return value;
+ case TDataType<bool>::Id: return SetNodeType<ENodeType::Bool>(value);
+ case TDataType<i8>::Id: return SetNodeType<ENodeType::Int64>(TUnboxedValuePod(i64(value.Get<i8>())));
+ case TDataType<i16>::Id: return SetNodeType<ENodeType::Int64>(TUnboxedValuePod(i64(value.Get<i16>())));
+ case TDataType<i32>::Id: return SetNodeType<ENodeType::Int64>(TUnboxedValuePod(i64(value.Get<i32>())));
+ case TDataType<i64>::Id: return SetNodeType<ENodeType::Int64>(value);
+ case TDataType<ui8>::Id: return SetNodeType<ENodeType::Uint64>(TUnboxedValuePod(ui64(value.Get<ui8>())));
+ case TDataType<ui16>::Id: return SetNodeType<ENodeType::Uint64>(TUnboxedValuePod(ui64(value.Get<ui16>())));
+ case TDataType<ui32>::Id: return SetNodeType<ENodeType::Uint64>(TUnboxedValuePod(ui64(value.Get<ui32>())));
+ case TDataType<ui64>::Id: return SetNodeType<ENodeType::Uint64>(value);
+ case TDataType<float>::Id: return SetNodeType<ENodeType::Double>(TUnboxedValuePod(double(value.Get<float>())));
+ case TDataType<double>::Id: return SetNodeType<ENodeType::Double>(value);
+ case TDataType<TYson>::Id: return TryParseYsonDom(value.AsStringRef(), valueBuilder).Release();
+ case TDataType<TJson>::Id: return TryParseJsonDom(value.AsStringRef(), valueBuilder).Release();
+ default: break;
+ }
+ Y_ABORT("Unsupported data type.");
+TUnboxedValuePod MakeList(const ITypeInfoHelper* typeHelper, const TType* itemType, const TUnboxedValuePod value, const IValueBuilder* valueBuilder) {
+ if (const auto elements = value.GetElements()) {
+ if (const auto size = value.GetListLength()) {
+ TUnboxedValue* items = nullptr;
+ auto res = valueBuilder->NewArray(size, items);
+ for (ui64 i = 0ULL; i < size; ++i) {
+ *items++ = MakeDom(typeHelper, itemType, elements[i], valueBuilder);
+ }
+ return SetNodeType<ENodeType::List>(res.Release());
+ }
+ } else {
+ TSmallVec<TUnboxedValue> items;
+ if (value.HasFastListLength()) {
+ items.reserve(value.GetListLength());
+ }
+ const auto iterator = value.GetListIterator();
+ for (TUnboxedValue current; iterator.Next(current);) {
+ items.emplace_back(MakeDom(typeHelper, itemType, current, valueBuilder));
+ }
+ if (!items.empty()) {
+ auto res = valueBuilder->NewList(items.data(), items.size());
+ return SetNodeType<ENodeType::List>(res.Release());
+ }
+ }
+ return SetNodeType<ENodeType::List>(TUnboxedValuePod::Void());
+TUnboxedValuePod MakeDict(const ITypeInfoHelper* typeHelper, const TType* itemType, const TUnboxedValuePod value, const IValueBuilder* valueBuilder) {
+ TSmallVec<TPair, TStdAllocatorForUdf<TPair>> items;
+ items.reserve(value.GetDictLength());
+ const auto it = value.GetDictIterator();
+ for (TUnboxedValue x, y; it.NextPair(x, y);) {
+ items.emplace_back(x, MakeDom(typeHelper, itemType, y, valueBuilder));
+ }
+ if (items.empty()) {
+ return SetNodeType<ENodeType::Dict>(TUnboxedValuePod::Void());
+ }
+ return SetNodeType<ENodeType::Dict>(TUnboxedValuePod(new TMapNode(items.data(), items.size())));
+TUnboxedValuePod MakeTuple(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder) {
+ if (const auto tupleTypeInspector = TTupleTypeInspector(*typeHelper, shape); const auto size = tupleTypeInspector.GetElementsCount()) {
+ TUnboxedValue* items = nullptr;
+ auto res = valueBuilder->NewArray(size, items);
+ for (ui64 i = 0ULL; i < size; ++i) {
+ *items++ = MakeDom(typeHelper, tupleTypeInspector.GetElementType(i), static_cast<const TUnboxedValuePod&>(value.GetElement(i)), valueBuilder);
+ }
+ return SetNodeType<ENodeType::List>(res.Release());
+ }
+ return SetNodeType<ENodeType::List>(TUnboxedValuePod::Void());
+TUnboxedValuePod MakeStruct(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder) {
+ if (const auto structTypeInspector = TStructTypeInspector(*typeHelper, shape); const auto size = structTypeInspector.GetMembersCount()) {
+ TSmallVec<TPair, TStdAllocatorForUdf<TPair>> items;
+ items.reserve(size);
+ for (ui64 i = 0ULL; i < size; ++i) {
+ items.emplace_back(
+ valueBuilder->NewString(structTypeInspector.GetMemberName(i)),
+ MakeDom(typeHelper, structTypeInspector.GetMemberType(i), static_cast<const TUnboxedValuePod&>(value.GetElement(i)), valueBuilder)
+ );
+ }
+ return SetNodeType<ENodeType::Dict>(TUnboxedValuePod(new TMapNode(items.data(), items.size())));
+ }
+ return SetNodeType<ENodeType::Dict>(TUnboxedValuePod::Void());
+TUnboxedValuePod MakeVariant(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder) {
+ const auto index = value.GetVariantIndex();
+ const auto& item = value.GetVariantItem();
+ const auto underlyingType = TVariantTypeInspector(*typeHelper, shape).GetUnderlyingType();
+ switch (const auto kind = typeHelper->GetTypeKind(underlyingType)) {
+ case ETypeKind::Tuple:
+ if (const auto tupleTypeInspector = TTupleTypeInspector(*typeHelper, underlyingType); index < tupleTypeInspector.GetElementsCount())
+ return MakeDom(typeHelper, tupleTypeInspector.GetElementType(index), item, valueBuilder);
+ break;
+ case ETypeKind::Struct:
+ if (const auto structTypeInspector = TStructTypeInspector(*typeHelper, underlyingType); index < structTypeInspector.GetMembersCount())
+ return MakeDom(typeHelper, structTypeInspector.GetMemberType(index), item, valueBuilder);
+ break;
+ default:
+ break;
+ }
+ Y_ABORT("Unsupported underlying type.");
+TUnboxedValuePod MakeDom(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder) {
+ switch (const auto kind = typeHelper->GetTypeKind(shape)) {
+ case ETypeKind::Null:
+ return MakeEntity();
+ case ETypeKind::EmptyList:
+ return SetNodeType<ENodeType::List>(TUnboxedValuePod::Void());
+ case ETypeKind::EmptyDict:
+ return SetNodeType<ENodeType::Dict>(TUnboxedValuePod::Void());
+ case ETypeKind::Data:
+ return MakeData(TDataTypeInspector(*typeHelper, shape).GetTypeId(), value, valueBuilder);
+ case ETypeKind::Optional:
+ return value ? MakeDom(typeHelper, TOptionalTypeInspector(*typeHelper, shape).GetItemType(), value.GetOptionalValue(), valueBuilder) : MakeEntity();
+ case ETypeKind::List:
+ return MakeList(typeHelper, TListTypeInspector(*typeHelper, shape).GetItemType(), value, valueBuilder);
+ case ETypeKind::Dict: {
+ const auto dictTypeInspector = TDictTypeInspector(*typeHelper, shape);
+ const auto keyType = dictTypeInspector.GetKeyType();
+ Y_ABORT_UNLESS(ETypeKind::Data == typeHelper->GetTypeKind(keyType), "Unsupported dict key type kind.");
+ const auto keyId = TDataTypeInspector(*typeHelper, keyType).GetTypeId();
+ Y_ABORT_UNLESS(keyId == TDataType<char*>::Id || keyId == TDataType<TUtf8>::Id, "Unsupported dict key data type.");
+ return MakeDict(typeHelper, dictTypeInspector.GetValueType(), value, valueBuilder);
+ }
+ case ETypeKind::Tuple:
+ return MakeTuple(typeHelper, shape, value, valueBuilder);
+ case ETypeKind::Struct:
+ return MakeStruct(typeHelper, shape, value, valueBuilder);
+ case ETypeKind::Variant:
+ return MakeVariant(typeHelper, shape, value, valueBuilder);
+ case ETypeKind::Resource:
+ if (const auto inspector = TResourceTypeInspector(*typeHelper, shape); TStringBuf(inspector.GetTag()) == NodeResourceName)
+ return value;
+ [[fallthrough]];
+ default:
+ Y_ABORT("Unsupported data kind: %s", ToCString(kind));
+ }
diff --git a/yql/essentials/minikql/dom/make.h b/yql/essentials/minikql/dom/make.h
new file mode 100644
index 0000000000..48aab89474
--- /dev/null
+++ b/yql/essentials/minikql/dom/make.h
@@ -0,0 +1,10 @@
+#pragma once
+#include <yql/essentials/public/udf/udf_types.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+namespace NYql::NDom {
+NUdf::TUnboxedValuePod MakeDom(const NUdf::ITypeInfoHelper* typeHelper, const NUdf::TType* shape, const NUdf::TUnboxedValuePod value, const NUdf::IValueBuilder* valueBuilder);
diff --git a/yql/essentials/minikql/dom/node.cpp b/yql/essentials/minikql/dom/node.cpp
new file mode 100644
index 0000000000..6eabde3e4e
--- /dev/null
+++ b/yql/essentials/minikql/dom/node.cpp
@@ -0,0 +1,202 @@
+#include "node.h"
+#include <util/generic/algorithm.h>
+namespace NYql::NDom {
+namespace {
+inline bool StringLess(const TPair& x, const TPair& y) {
+ return x.first.AsStringRef() < y.first.AsStringRef();
+inline bool StringRefLess(const TPair& x, const TStringRef& y) {
+ return x.first.AsStringRef() < y;
+inline bool StringEquals(const TPair& x, const TPair& y) {
+ return x.first.AsStringRef() == y.first.AsStringRef();
+template <bool NoSwap>
+TMapNode::TIterator<NoSwap>::TIterator(const TMapNode* parent)
+ : Parent(const_cast<TMapNode*>(parent))
+ , Index(-1)
+template <bool NoSwap>
+bool TMapNode::TIterator<NoSwap>::Skip() {
+ if (Index + 1 == Parent->UniqueCount_) {
+ return false;
+ }
+ ++Index;
+ return true;
+template <bool NoSwap>
+bool TMapNode::TIterator<NoSwap>::Next(TUnboxedValue& key) {
+ if (!Skip())
+ return false;
+ if constexpr (NoSwap) {
+ key = Parent->Items_[Index].first;
+ } else {
+ key = Parent->Items_[Index].second;
+ }
+ return true;
+template <bool NoSwap>
+bool TMapNode::TIterator<NoSwap>::NextPair(TUnboxedValue& key, TUnboxedValue& payload) {
+ if (!Next(key))
+ return false;
+ if constexpr (NoSwap) {
+ payload = Parent->Items_[Index].second;
+ } else {
+ payload = Parent->Items_[Index].first;
+ }
+ return true;
+TMapNode::TMapNode(TMapNode&& src)
+ : Count_(src.Count_), UniqueCount_(src.UniqueCount_), Items_(src.Items_)
+ src.Count_ = src.UniqueCount_ = 0U;
+ src.Items_ = nullptr;
+TMapNode::TMapNode(const TPair* items, ui32 count)
+ : Count_(count)
+ , Items_((TPair*)UdfAllocateWithSize(sizeof(TPair) * count))
+ std::memset(Items_, 0, sizeof(TPair) * count);
+ for (ui32 i = 0; i < count; ++i) {
+ Items_[i] = std::move(items[i]);
+ }
+ StableSort(Items_, Items_ + count, StringLess);
+ UniqueCount_ = Unique(Items_, Items_ + count, StringEquals) - Items_;
+ for (ui32 i = UniqueCount_; i < count; ++i) {
+ Items_[i].first.Clear();
+ Items_[i].second.Clear();
+ }
+TMapNode::~TMapNode() {
+ for (ui32 i = 0; i < UniqueCount_; ++i) {
+ Items_[i].first.Clear();
+ Items_[i].second.Clear();
+ }
+ UdfFreeWithSize(Items_, sizeof(TPair) * Count_);
+ui64 TMapNode::GetDictLength() const {
+ return UniqueCount_;
+TUnboxedValue TMapNode::GetDictIterator() const {
+ return TUnboxedValuePod(new TIterator<true>(this));
+TUnboxedValue TMapNode::GetKeysIterator() const {
+ return TUnboxedValuePod(new TIterator<true>(this));
+TUnboxedValue TMapNode::GetPayloadsIterator() const {
+ return TUnboxedValuePod(new TIterator<false>(this));
+bool TMapNode::Contains(const TUnboxedValuePod& key) const {
+ return BinarySearch(Items_, Items_ + UniqueCount_, std::make_pair(key, TUnboxedValuePod()), StringLess);
+TUnboxedValue TMapNode::Lookup(const TUnboxedValuePod& key) const {
+ return Lookup(key.AsStringRef());
+TUnboxedValue TMapNode::Lookup(const TStringRef& key) const {
+ const auto it = LowerBound(Items_, Items_ + UniqueCount_, key, StringRefLess);
+ if (it == Items_ + UniqueCount_ || static_cast<TStringBuf>(it->first.AsStringRef()) != static_cast<TStringBuf>(key))
+ return {};
+ return it->second;
+bool TMapNode::HasDictItems() const {
+ return UniqueCount_ > 0ULL;
+bool TMapNode::IsSortedDict() const {
+ return true;
+void* TMapNode::GetResource() {
+ return Items_;
+TAttrNode::TAttrNode(const TUnboxedValue& map, TUnboxedValue&& value)
+ : TMapNode(std::move(*static_cast<TMapNode*>(map.AsBoxed().Get()))), Value_(std::move(value))
+TAttrNode::TAttrNode(TUnboxedValue&& value, const TPair* items, ui32 count)
+ : TMapNode(items, count), Value_(std::move(value))
+TUnboxedValue TAttrNode::GetVariantItem() const {
+ return Value_;
+TDebugPrinter::TDebugPrinter(const TUnboxedValuePod& node)
+ : Node(node)
+IOutputStream& TDebugPrinter::Out(IOutputStream &o) const {
+ switch (GetNodeType(Node)) {
+ case ENodeType::Entity:
+ o << "entity (#)";
+ break;
+ case ENodeType::Bool:
+ o << "boolean (" << (Node.Get<bool>() ? "true" : "false") << ") value";
+ break;
+ case ENodeType::Int64:
+ o << "integer (" << Node.Get<i64>() << ") value";
+ break;
+ case ENodeType::Uint64:
+ o << "unsigned integer (" << Node.Get<ui64>() << ") value";
+ break;
+ case ENodeType::Double:
+ o << "floating point (" << Node.Get<double>() << ") value";
+ break;
+ case ENodeType::String:
+ if (const std::string_view str(Node.AsStringRef()); str.empty())
+ o << "empty string";
+ else if(Node.IsEmbedded() && str.cend() == std::find_if(str.cbegin(), str.cend(), [](char c){ return !std::isprint(c); }))
+ o << "string '" << str << "' value";
+ else
+ o << "string value of size " << str.size();
+ break;
+ case ENodeType::List:
+ if (Node.IsBoxed())
+ o << "list of size " << Node.GetListLength();
+ else
+ o << "empty list";
+ break;
+ case ENodeType::Dict:
+ if (Node.IsBoxed())
+ o << "dict of size " << Node.GetDictLength();
+ else
+ o << "empty dict";
+ break;
+ case ENodeType::Attr:
+ return TDebugPrinter(Node.GetVariantItem()).Out(o);
+ default:
+ o << "invalid node";
+ break;
+ }
+ return o;
diff --git a/yql/essentials/minikql/dom/node.h b/yql/essentials/minikql/dom/node.h
new file mode 100644
index 0000000000..04a211fd09
--- /dev/null
+++ b/yql/essentials/minikql/dom/node.h
@@ -0,0 +1,167 @@
+#pragma once
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_value.h>
+namespace NYql::NDom {
+using namespace NUdf;
+constexpr char NodeResourceName[] = "Yson2.Node";
+using TPair = std::pair<TUnboxedValue, TUnboxedValue>;
+enum class ENodeType : ui8 {
+ String = 0,
+ Bool = 1,
+ Int64 = 2,
+ Uint64 = 3,
+ Double = 4,
+ Entity = 5,
+ List = 6,
+ Dict = 7,
+ Attr = 8,
+constexpr ui8 NodeTypeShift = 4;
+constexpr ui8 NodeTypeMask = 0xf0;
+template<ENodeType type>
+constexpr inline TUnboxedValuePod SetNodeType(TUnboxedValuePod node) {
+ const auto buffer = reinterpret_cast<ui8*>(&node);
+ buffer[TUnboxedValuePod::InternalBufferSize] = ui8(type) << NodeTypeShift;
+ return node;
+template<ENodeType type>
+constexpr inline bool IsNodeType(const TUnboxedValuePod node) {
+ const auto buffer = reinterpret_cast<const ui8*>(&node);
+ const auto currentMask = buffer[TUnboxedValuePod::InternalBufferSize] & NodeTypeMask;
+ constexpr ui8 expectedMask = static_cast<ui8>(type) << NodeTypeShift;
+ return currentMask == expectedMask;
+inline ENodeType GetNodeType(const TUnboxedValuePod& node) {
+ const auto* buffer = reinterpret_cast<const char*>(&node);
+ const ui8 flag = (buffer[TUnboxedValuePod::InternalBufferSize] & NodeTypeMask) >> NodeTypeShift;
+ return static_cast<ENodeType>(flag);
+inline bool IsNodeType(const TUnboxedValuePod& node, ENodeType type) {
+ const auto* buffer = reinterpret_cast<const char*>(&node);
+ const ui8 currentMask = buffer[TUnboxedValuePod::InternalBufferSize] & NodeTypeMask;
+ const ui8 expectedMask = static_cast<ui8>(type) << NodeTypeShift;
+ return currentMask == expectedMask;
+class TMapNode : public TManagedBoxedValue {
+ template <bool NoSwap>
+ class TIterator: public TManagedBoxedValue {
+ public:
+ TIterator(const TMapNode* parent);
+ private:
+ bool Skip() final;
+ bool Next(TUnboxedValue& key) final;
+ bool NextPair(TUnboxedValue& key, TUnboxedValue& payload) final;
+ const TRefCountedPtr<TMapNode> Parent;
+ ui32 Index;
+ };
+ TMapNode(const TPair* items, ui32 count);
+ TMapNode(TMapNode&& src);
+ ~TMapNode();
+ TUnboxedValue Lookup(const TStringRef& key) const;
+ ui64 GetDictLength() const final;
+ TUnboxedValue GetDictIterator() const final;
+ TUnboxedValue GetKeysIterator() const final;
+ TUnboxedValue GetPayloadsIterator() const final;
+ bool Contains(const TUnboxedValuePod& key) const final;
+ TUnboxedValue Lookup(const TUnboxedValuePod& key) const final;
+ bool HasDictItems() const final;
+ bool IsSortedDict() const final;
+ void* GetResource() final;
+ ui32 Count_;
+ ui32 UniqueCount_;
+ TPair * Items_;
+class TAttrNode : public TMapNode {
+ TAttrNode(const TUnboxedValue& map, NUdf::TUnboxedValue&& value);
+ TAttrNode(NUdf::TUnboxedValue&& value, const TPair* items, ui32 count);
+ NUdf::TUnboxedValue GetVariantItem() const final;
+ const NUdf::TUnboxedValue Value_;
+inline TUnboxedValuePod MakeAttr(TUnboxedValue&& value, TPair* items, ui32 count) {
+ if (count == 0) {
+ return value.Release();
+ }
+ return SetNodeType<ENodeType::Attr>(TUnboxedValuePod(new TAttrNode(std::move(value), items, count)));
+inline TUnboxedValuePod MakeString(const TStringBuf value, const IValueBuilder* valueBuilder) {
+ return valueBuilder->NewString(value).Release();
+inline TUnboxedValuePod MakeBool(bool value) {
+ return SetNodeType<ENodeType::Bool>(TUnboxedValuePod(value));
+inline TUnboxedValuePod MakeInt64(i64 value) {
+ return SetNodeType<ENodeType::Int64>(TUnboxedValuePod(value));
+inline TUnboxedValuePod MakeUint64(ui64 value) {
+ return SetNodeType<ENodeType::Uint64>(TUnboxedValuePod(value));
+inline TUnboxedValuePod MakeDouble(double value) {
+ return SetNodeType<ENodeType::Double>(TUnboxedValuePod(value));
+inline TUnboxedValuePod MakeEntity() {
+ return SetNodeType<ENodeType::Entity>(TUnboxedValuePod::Zero());
+inline TUnboxedValuePod MakeList(TUnboxedValue* items, ui32 count, const IValueBuilder* valueBuilder) {
+ return SetNodeType<ENodeType::List>(count > 0U ? valueBuilder->NewList(items, count).Release() : TUnboxedValuePod::Zero());
+inline TUnboxedValuePod MakeDict(const TPair* items, ui32 count) {
+ return SetNodeType<ENodeType::Dict>(count > 0U ? TUnboxedValuePod(new TMapNode(items, count)) : TUnboxedValuePod::Zero());
+struct TDebugPrinter {
+ TDebugPrinter(const TUnboxedValuePod& node);
+ class IOutputStream& Out(class IOutputStream &o) const;
+ const TUnboxedValuePod& Node;
+inline void Out<NYql::NDom::TDebugPrinter>(class IOutputStream &o, const NYql::NDom::TDebugPrinter& p) {
+ p.Out(o);
diff --git a/yql/essentials/minikql/dom/peel.cpp b/yql/essentials/minikql/dom/peel.cpp
new file mode 100644
index 0000000000..7508bfe6f8
--- /dev/null
+++ b/yql/essentials/minikql/dom/peel.cpp
@@ -0,0 +1,373 @@
+#include "peel.h"
+#include "node.h"
+#include "yson.h"
+#include "json.h"
+#include "convert.h"
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_type_printer.h>
+namespace NYql::NDom {
+using namespace NUdf;
+namespace {
+template<bool Strict, bool AutoConvert>
+TUnboxedValuePod PeelData(const TDataTypeId nodeType, const TUnboxedValuePod value, const IValueBuilder* valueBuilder, const TSourcePosition& pos) {
+ switch (nodeType) {
+ case TDataType<char*>::Id: return ConvertToString<Strict, AutoConvert, false>(value, valueBuilder, pos);
+ case TDataType<TUtf8>::Id: return ConvertToString<Strict, AutoConvert, true>(value, valueBuilder, pos);
+ case TDataType<bool>::Id: return ConvertToBool<Strict, AutoConvert>(value, valueBuilder, pos);
+ case TDataType<i8>::Id: return ConvertToIntegral<Strict, AutoConvert, i8>(value, valueBuilder, pos);
+ case TDataType<i16>::Id: return ConvertToIntegral<Strict, AutoConvert, i16>(value, valueBuilder, pos);
+ case TDataType<i32>::Id: return ConvertToIntegral<Strict, AutoConvert, i32>(value, valueBuilder, pos);
+ case TDataType<i64>::Id: return ConvertToIntegral<Strict, AutoConvert, i64>(value, valueBuilder, pos);
+ case TDataType<ui8>::Id: return ConvertToIntegral<Strict, AutoConvert, ui8>(value, valueBuilder, pos);
+ case TDataType<ui16>::Id: return ConvertToIntegral<Strict, AutoConvert, ui16>(value, valueBuilder, pos);
+ case TDataType<ui32>::Id: return ConvertToIntegral<Strict, AutoConvert, ui32>(value, valueBuilder, pos);
+ case TDataType<ui64>::Id: return ConvertToIntegral<Strict, AutoConvert, ui64>(value, valueBuilder, pos);
+ case TDataType<float>::Id: return ConvertToFloat<Strict, AutoConvert, float>(value, valueBuilder, pos);
+ case TDataType<double>::Id: return ConvertToFloat<Strict, AutoConvert, double>(value, valueBuilder, pos);
+ case TDataType<TYson>::Id: return valueBuilder->NewString(SerializeYsonDomToBinary(value)).Release();
+ case TDataType<TJson>::Id: return valueBuilder->NewString(SerializeJsonDom(value)).Release();
+ default: break;
+ }
+ UdfTerminate((::TStringBuilder() << "Unsupported data type: " << static_cast<int>(nodeType)).c_str());
+template<bool Strict, bool AutoConvert>
+TUnboxedValuePod TryPeelDom(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder, const TSourcePosition& pos);
+template<bool Strict, bool AutoConvert>
+TUnboxedValuePod PeelList(const ITypeInfoHelper* typeHelper, const TType* itemType, const TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) {
+ switch (GetNodeType(x)) {
+ case ENodeType::List: {
+ if (!x.IsBoxed())
+ break;
+ if constexpr (Strict || AutoConvert) {
+ return TUnboxedValuePod(new TLazyConveter(x, std::bind(&PeelDom<Strict, AutoConvert>, typeHelper, itemType, std::placeholders::_1, valueBuilder, pos)));
+ }
+ TSmallVec<TUnboxedValue, TUnboxedValue::TAllocator> values;
+ if (const auto elements = x.GetElements()) {
+ const auto size = x.GetListLength();
+ values.reserve(size);
+ for (ui32 i = 0U; i < size; ++i) {
+ if (const auto item = TryPeelDom<Strict, AutoConvert>(typeHelper, itemType, elements[i], valueBuilder, pos))
+ values.emplace_back(item.GetOptionalValue());
+ else if constexpr (Strict)
+ UdfTerminate("Error on convert list item.");
+ }
+ } else {
+ const auto it = x.GetListIterator();
+ for (TUnboxedValue v; it.Next(v);) {
+ if (const auto item = TryPeelDom<Strict, AutoConvert>(typeHelper, itemType, v, valueBuilder, pos))
+ values.emplace_back(item.GetOptionalValue());
+ else if constexpr (Strict)
+ UdfTerminate("Error on convert list item.");
+ }
+ }
+ if (values.empty()) {
+ break;
+ }
+ return valueBuilder->NewList(values.data(), values.size()).Release();
+ }
+ case ENodeType::Attr:
+ return PeelList<Strict, AutoConvert>(typeHelper, itemType, x.GetVariantItem().Release(), valueBuilder, pos);
+ default:
+ if constexpr (AutoConvert)
+ break;
+ else if constexpr (Strict)
+ UdfTerminate("Cannot parse list from entity, scalar value or dict.");
+ else
+ return {};
+ }
+ return valueBuilder->NewEmptyList().Release();
+template<bool Strict, bool AutoConvert, bool Utf8Keys>
+TUnboxedValuePod PeelDict(const ITypeInfoHelper* typeHelper, const TType* itemType, const TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) {
+ switch (GetNodeType(x)) {
+ case ENodeType::Dict:
+ if (!x.IsBoxed())
+ break;
+ if constexpr (!Utf8Keys && (Strict || AutoConvert)) {
+ return TUnboxedValuePod(new TLazyConveter(x, std::bind(&PeelDom<Strict, AutoConvert>, typeHelper, itemType, std::placeholders::_1, valueBuilder, pos)));
+ }
+ if (const auto size = x.GetDictLength()) {
+ TSmallVec<TPair, TStdAllocatorForUdf<TPair>> pairs;
+ pairs.reserve(size);
+ const auto it = x.GetDictIterator();
+ for (TUnboxedValue key, payload; it.NextPair(key, payload);) {
+ if (const auto k = ConvertToString<Strict, AutoConvert, Utf8Keys>(key.Release(), valueBuilder, pos)) {
+ if (const auto item = TryPeelDom<Strict, AutoConvert>(typeHelper, itemType, payload, valueBuilder, pos)) {
+ pairs.emplace_back(std::move(k), item.GetOptionalValue());
+ continue;
+ }
+ }
+ if constexpr (Strict)
+ UdfTerminate("Error on convert dict payload.");
+ }
+ if (pairs.empty()) {
+ break;
+ }
+ return TUnboxedValuePod(new TMapNode(pairs.data(), pairs.size()));
+ }
+ break;
+ case ENodeType::Attr:
+ return PeelDict<Strict, AutoConvert, Utf8Keys>(typeHelper, itemType, x.GetVariantItem().Release(), valueBuilder, pos);
+ default:
+ if constexpr (AutoConvert)
+ break;
+ else if constexpr (Strict)
+ UdfTerminate("Cannot parse dict from entity, scalar value or list.");
+ else
+ return {};
+ }
+ return valueBuilder->NewEmptyList().Release();
+TUnboxedValuePod MakeStub(const ITypeInfoHelper* typeHelper, const TType* shape, const IValueBuilder* valueBuilder, const TSourcePosition& pos) {
+ switch (const auto kind = typeHelper->GetTypeKind(shape)) {
+ case ETypeKind::Optional:
+ return TUnboxedValuePod();
+ case ETypeKind::Data:
+ switch (const auto nodeType = TDataTypeInspector(*typeHelper, shape).GetTypeId()) {
+ case TDataType<char*>::Id:
+ case TDataType<TUtf8>::Id:
+ case TDataType<bool>::Id:
+ case TDataType<i8>::Id:
+ case TDataType<i16>::Id:
+ case TDataType<i32>::Id:
+ case TDataType<i64>::Id:
+ case TDataType<ui8>::Id:
+ case TDataType<ui16>::Id:
+ case TDataType<ui32>::Id:
+ case TDataType<ui64>::Id:
+ case TDataType<float>::Id:
+ case TDataType<double>::Id:
+ case TDataType<TDecimal>::Id:
+ return TUnboxedValuePod::Zero();
+ case TDataType<TYson>::Id:
+ return TUnboxedValuePod::Embedded("#");
+ case TDataType<TJson>::Id:
+ return TUnboxedValuePod::Embedded("null");
+ default:
+ UdfTerminate((::TStringBuilder() << "Unsupported data type: " << static_cast<int>(nodeType)).c_str());
+ }
+ case ETypeKind::Tuple:
+ if (const auto tupleTypeInspector = TTupleTypeInspector(*typeHelper, shape); auto count = tupleTypeInspector.GetElementsCount()) {
+ TUnboxedValue* items = nullptr;
+ auto result = valueBuilder->NewArray(count, items);
+ items += count;
+ do *--items = MakeStub(typeHelper, tupleTypeInspector.GetElementType(--count), valueBuilder, pos);
+ while (count);
+ return result.Release();
+ }
+ return valueBuilder->NewEmptyList().Release();
+ case ETypeKind::Struct:
+ if (const auto structTypeInspector = TStructTypeInspector(*typeHelper, shape); auto count = structTypeInspector.GetMembersCount()) {
+ TUnboxedValue* items = nullptr;
+ auto result = valueBuilder->NewArray(count, items);
+ items += count;
+ do *--items = MakeStub(typeHelper, structTypeInspector.GetMemberType(--count), valueBuilder, pos);
+ while (count);
+ return result.Release();
+ }
+ return valueBuilder->NewEmptyList().Release();
+ case ETypeKind::List:
+ case ETypeKind::Dict:
+ return valueBuilder->NewEmptyList().Release();
+ case ETypeKind::Resource:
+ if (const auto inspector = TResourceTypeInspector(*typeHelper, shape); TStringBuf(inspector.GetTag()) == NodeResourceName)
+ return MakeEntity();
+ [[fallthrough]];
+ default:
+ UdfTerminate((::TStringBuilder() << "Unsupported data kind: " << kind).c_str());
+ }
+template<bool Strict, bool AutoConvert>
+TUnboxedValuePod PeelTuple(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) {
+ if (const auto tupleTypeInspector = TTupleTypeInspector(*typeHelper, shape); auto count = tupleTypeInspector.GetElementsCount()) {
+ switch (GetNodeType(x)) {
+ case ENodeType::List: {
+ TUnboxedValue* items = nullptr;
+ auto result = valueBuilder->NewArray(count, items);
+ ui32 i = 0U;
+ if (x.IsBoxed()) {
+ if (auto elements = x.GetElements()) {
+ for (auto size = x.GetListLength(); count && size--; --count) {
+ if (const auto item = TryPeelDom<Strict, AutoConvert>(typeHelper, tupleTypeInspector.GetElementType(i++), *elements++, valueBuilder, pos))
+ *items++ = item.GetOptionalValue();
+ else if constexpr (Strict)
+ UdfTerminate("Error on convert tuple item.");
+ else
+ return {};
+ }
+ } else if (const auto it = x.GetListIterator()) {
+ for (TUnboxedValue v; count && it.Next(v); --count) {
+ if (const auto item = TryPeelDom<Strict, AutoConvert>(typeHelper, tupleTypeInspector.GetElementType(i++), v, valueBuilder, pos))
+ *items++ = item.GetOptionalValue();
+ else if constexpr (Strict)
+ UdfTerminate("Error on convert tuple item.");
+ else
+ return {};
+ }
+ }
+ }
+ if (count) do
+ if constexpr (AutoConvert)
+ *items++ = MakeStub(typeHelper, tupleTypeInspector.GetElementType(i++), valueBuilder, pos);
+ else if (ETypeKind::Optional == typeHelper->GetTypeKind(tupleTypeInspector.GetElementType(i++)))
+ ++items;
+ else if constexpr (Strict)
+ UdfTerminate((::TStringBuilder() << "DOM list has less items then " << tupleTypeInspector.GetElementsCount() << " tuple elements.").c_str());
+ else
+ return {};
+ while (--count);
+ return result.Release();
+ }
+ case ENodeType::Attr:
+ return PeelTuple<Strict, AutoConvert>(typeHelper, shape, x.GetVariantItem().Release(), valueBuilder, pos);
+ default:
+ if constexpr (AutoConvert) {
+ TUnboxedValue* items = nullptr;
+ auto result = valueBuilder->NewArray(count, items);
+ for (ui32 i = 0ULL; i < count; ++i)
+ if (ETypeKind::Optional != typeHelper->GetTypeKind(tupleTypeInspector.GetElementType(i)))
+ *items++ = MakeStub(typeHelper, tupleTypeInspector.GetElementType(i), valueBuilder, pos);
+ else
+ ++items;
+ return result.Release();
+ } else if constexpr (Strict)
+ UdfTerminate("Cannot parse tuple from entity, scalar value or dict.");
+ else
+ break;
+ }
+ }
+ return {};
+template<bool Strict, bool AutoConvert>
+TUnboxedValuePod PeelStruct(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) {
+ if (const auto structTypeInspector = TStructTypeInspector(*typeHelper, shape)) {
+ const auto size = structTypeInspector.GetMembersCount();
+ switch (GetNodeType(x)) {
+ case ENodeType::Dict: {
+ TUnboxedValue* items = nullptr;
+ auto result = valueBuilder->NewArray(size, items);
+ for (ui32 i = 0ULL; i < size; ++i) {
+ if (x.IsBoxed()) {
+ if (const auto v = x.Lookup(valueBuilder->NewString(structTypeInspector.GetMemberName(i)))) {
+ if (const auto item = TryPeelDom<Strict, AutoConvert>(typeHelper, structTypeInspector.GetMemberType(i), v.GetOptionalValue(), valueBuilder, pos))
+ *items++ = item.GetOptionalValue();
+ else if constexpr (Strict)
+ UdfTerminate((::TStringBuilder() << "Error on convert struct member '" << structTypeInspector.GetMemberName(i) << "'.").c_str());
+ else
+ return {};
+ continue;
+ }
+ }
+ if constexpr (AutoConvert)
+ *items++ = MakeStub(typeHelper, structTypeInspector.GetMemberType(i), valueBuilder, pos);
+ else if (ETypeKind::Optional == typeHelper->GetTypeKind(structTypeInspector.GetMemberType(i)))
+ ++items;
+ else if constexpr (Strict)
+ UdfTerminate((::TStringBuilder() << "Missed struct member '" << structTypeInspector.GetMemberName(i) << "'.").c_str());
+ else
+ return {};
+ }
+ return result.Release();
+ }
+ case ENodeType::Attr:
+ return PeelStruct<Strict, AutoConvert>(typeHelper, shape, x.GetVariantItem().Release(), valueBuilder, pos);
+ default:
+ if constexpr (AutoConvert) {
+ TUnboxedValue* items = nullptr;
+ auto result = valueBuilder->NewArray(size, items);
+ for (ui32 i = 0ULL; i < size; ++i)
+ if (ETypeKind::Optional != typeHelper->GetTypeKind(structTypeInspector.GetMemberType(i)))
+ *items++ = MakeStub(typeHelper, structTypeInspector.GetMemberType(i), valueBuilder, pos);
+ else
+ ++items;
+ return result.Release();
+ } else if constexpr (Strict)
+ UdfTerminate("Cannot parse struct from entity, scalar value or list.");
+ else
+ break;
+ }
+ }
+ return {};
+template<bool Strict, bool AutoConvert>
+TUnboxedValuePod PeelOptional(const ITypeInfoHelper* typeHelper, const TType* itemType, const TUnboxedValuePod value, const IValueBuilder* valueBuilder, const TSourcePosition& pos) {
+ if (IsNodeType<ENodeType::Entity>(value))
+ return TUnboxedValuePod().MakeOptional();
+ if (const auto result = TryPeelDom<Strict, AutoConvert>(typeHelper, itemType, value, valueBuilder, pos); AutoConvert || result)
+ return result;
+ else if constexpr (Strict)
+ UdfTerminate("Failed to convert Yson DOM.");
+ else
+ return TUnboxedValuePod().MakeOptional();
+template<bool Strict, bool AutoConvert>
+TUnboxedValuePod TryPeelDom(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder, const TSourcePosition& pos) {
+ switch (const auto kind = typeHelper->GetTypeKind(shape)) {
+ case ETypeKind::Data:
+ return PeelData<Strict, AutoConvert>(TDataTypeInspector(*typeHelper, shape).GetTypeId(), value, valueBuilder, pos);
+ case ETypeKind::Optional:
+ return PeelOptional<Strict, AutoConvert>(typeHelper, TOptionalTypeInspector(*typeHelper, shape).GetItemType(), value, valueBuilder, pos);
+ case ETypeKind::List:
+ return PeelList<Strict, AutoConvert>(typeHelper, TListTypeInspector(*typeHelper, shape).GetItemType(), value, valueBuilder, pos);
+ case ETypeKind::Dict: {
+ const auto dictTypeInspector = TDictTypeInspector(*typeHelper, shape);
+ const auto keyType = dictTypeInspector.GetKeyType();
+ if (const auto keyKind = typeHelper->GetTypeKind(keyType); ETypeKind::Data == keyKind)
+ switch (const auto keyId = TDataTypeInspector(*typeHelper, keyType).GetTypeId()) {
+ case TDataType<char*>::Id: return PeelDict<Strict, AutoConvert, false>(typeHelper, dictTypeInspector.GetValueType(), value, valueBuilder, pos);
+ case TDataType<TUtf8>::Id: return PeelDict<Strict, AutoConvert, true>(typeHelper, dictTypeInspector.GetValueType(), value, valueBuilder, pos);
+ default: UdfTerminate((::TStringBuilder() << "Unsupported dict key type: " << keyId).c_str());
+ }
+ else
+ UdfTerminate((::TStringBuilder() << "Unsupported dict key kind: " << keyKind).c_str());
+ }
+ case ETypeKind::Tuple:
+ return PeelTuple<Strict, AutoConvert>(typeHelper, shape, value, valueBuilder, pos);
+ case ETypeKind::Struct:
+ return PeelStruct<Strict, AutoConvert>(typeHelper, shape, value, valueBuilder, pos);
+ case ETypeKind::Resource:
+ if (const auto inspector = TResourceTypeInspector(*typeHelper, shape); TStringBuf(inspector.GetTag()) == NodeResourceName)
+ return value;
+ default:
+ UdfTerminate((::TStringBuilder() << "Unsupported data kind: " << kind).c_str());
+ }
+template<bool Strict, bool AutoConvert>
+TUnboxedValuePod PeelDom(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder, const TSourcePosition& pos) {
+ if (const auto result = TryPeelDom<Strict, AutoConvert>(typeHelper, shape, value, valueBuilder, pos))
+ return result.GetOptionalValue();
+ ::TStringBuilder sb;
+ sb << "Failed to convert Yson DOM into strict type: ";
+ TTypePrinter(*typeHelper, shape).Out(sb.Out);
+ UdfTerminate(sb.c_str());
+template TUnboxedValuePod PeelDom<true, true>(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder, const TSourcePosition& pos);
+template TUnboxedValuePod PeelDom<false, true>(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder, const TSourcePosition& pos);
+template TUnboxedValuePod PeelDom<true, false>(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder, const TSourcePosition& pos);
+template TUnboxedValuePod PeelDom<false, false>(const ITypeInfoHelper* typeHelper, const TType* shape, const TUnboxedValuePod value, const IValueBuilder* valueBuilder, const TSourcePosition& pos);
diff --git a/yql/essentials/minikql/dom/peel.h b/yql/essentials/minikql/dom/peel.h
new file mode 100644
index 0000000000..6e0dab1002
--- /dev/null
+++ b/yql/essentials/minikql/dom/peel.h
@@ -0,0 +1,11 @@
+#pragma once
+#include <yql/essentials/public/udf/udf_types.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+namespace NYql::NDom {
+template<bool Strict, bool AutoConvert>
+NUdf::TUnboxedValuePod PeelDom(const NUdf::ITypeInfoHelper* typeHelper, const NUdf::TType* shape, const NUdf::TUnboxedValuePod value, const NUdf::IValueBuilder* valueBuilder, const NUdf::TSourcePosition& pos);
diff --git a/yql/essentials/minikql/dom/ut/json_ut.cpp b/yql/essentials/minikql/dom/ut/json_ut.cpp
new file mode 100644
index 0000000000..7184f3507f
--- /dev/null
+++ b/yql/essentials/minikql/dom/ut/json_ut.cpp
@@ -0,0 +1,2028 @@
+#include <yql/essentials/minikql/dom/json.h>
+#include <library/cpp/testing/unittest/registar.h>
+#include <contrib/ydb/library/yql/minikql/mkql_alloc.h>
+#include <contrib/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h>
+#include <contrib/ydb/library/yql/minikql/computation/mkql_value_builder.h>
+using namespace NYql;
+using namespace NYql::NDom;
+using namespace NKikimr;
+constexpr char json[] =
+ "Fullname": [
+ {
+ "freqs": {
+ "sum_qf@de": 28,
+ "sum_qf@en": 8,
+ "sum_qf@ru": 10060,
+ "sum_qf@tr": 91,
+ "sum_qf@uk": 245,
+ "sum_qf@uz": 6
+ },
+ "src": [
+ {
+ "c": "ltr"
+ }
+ ],
+ "value": "Татьяна Сорокина"
+ }
+ ],
+ "Gender": [
+ {
+ "src": [
+ {
+ "c": "yam",
+ "is_guessed": "True"
+ },
+ {
+ "c": "scm",
+ "is_guessed": "True"
+ },
+ {
+ "c": "ltr",
+ "is_guessed": "True"
+ },
+ {
+ "c": "lbr",
+ "is_guessed": "True"
+ }
+ ],
+ "value": "female"
+ }
+ ],
+ "Image": [
+ {
+ "RelevLocale": [
+ "universe"
+ ],
+ "avatar_type": "face",
+ "color_wiz": {
+ "back": "#DBC4B5",
+ "button": "#BFAC9E",
+ "button_text": "#23211E",
+ "text": "#705549"
+ },
+ "faces_count": 1,
+ "langua": [
+ "uk",
+ "by",
+ "kk",
+ "ru"
+ ],
+ "mds_avatar_id": "2001742/402534297",
+ "original_size": {
+ "height": 1478,
+ "width": 1478
+ },
+ "show_on_serp": true,
+ "src": [
+ {
+ "url": "http://music.yandex.ru/artist/7945920",
+ "url_type": "page",
+ "value": "yam"
+ }
+ ],
+ "thumb": "Face",
+ "type": "image",
+ "url": "//avatars.yandex.net/get-music-content/113160/26f40ebf.a.8459289-1/orig",
+ "value": "//avatars.yandex.net/get-music-content/113160/26f40ebf.a.8459289-1/orig"
+ }
+ ],
+ "ImageSearchRequest": [
+ {
+ "RelevLocale": [
+ "ru",
+ "by"
+ ],
+ "value": "Сорокина Татьяна фото"
+ }
+ ],
+ "Key": [
+ {
+ "langua": [
+ "ru"
+ ],
+ "predict": "972",
+ "rank": 0,
+ "src": [
+ {
+ "c": "rut"
+ }
+ ],
+ "value": "sorokina tatyana"
+ },
+ {
+ "freqs": {
+ "sum_qf@de": 3,
+ "sum_qf@en": 2,
+ "sum_qf@ru": 11504,
+ "sum_qf@tr": 35,
+ "sum_qf@uk": 145,
+ "sum_qf@uz": 1
+ },
+ "langua": [
+ "ru"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ },
+ {
+ "c": "ltr"
+ },
+ {
+ "c": "lbr"
+ }
+ ],
+ "value": "сорокина татьяна"
+ },
+ {
+ "langua": [
+ "ru"
+ ],
+ "predict": "931",
+ "rank": 1,
+ "src": [
+ {
+ "c": "rut"
+ }
+ ],
+ "value": "tatiana sorokina"
+ },
+ {
+ "langua": [
+ "ru"
+ ],
+ "predict": "951",
+ "rank": 0,
+ "src": [
+ {
+ "c": "rut"
+ }
+ ],
+ "value": "tatyana sorokina"
+ },
+ {
+ "freqs": {
+ "SenseRatio": 0.01,
+ "SenseRatio@de": 0.5,
+ "SenseRatio@en": 0.5,
+ "SenseRatio@et": 0.5,
+ "SenseRatio@fi": 0.5,
+ "SenseRatio@id": 0.5,
+ "SenseRatio@kk": 0.5,
+ "SenseRatio@lt": 0.5,
+ "SenseRatio@lv": 0.5,
+ "SenseRatio@pl": 0.5,
+ "SenseRatio@ru": 0,
+ "SenseRatio@tr": 0.5,
+ "SenseRatio@uk": 0.5,
+ "SenseRatio@uz": 0.5,
+ "sum_qf@de": 28,
+ "sum_qf@en": 8,
+ "sum_qf@ru": 10060,
+ "sum_qf@tr": 91,
+ "sum_qf@uk": 245,
+ "sum_qf@uz": 6
+ },
+ "langua": [
+ "ru"
+ ],
+ "src": [
+ {
+ "c": "scm",
+ "name": "bookmate.com"
+ },
+ {
+ "c": "yam"
+ },
+ {
+ "c": "ltr"
+ }
+ ],
+ "value": "татьяна сорокина"
+ }
+ ],
+ "Projects": [
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845750|Наши дети]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 256643
+ },
+ "otype": "Music/Recording@on",
+ "report": "False",
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845750|Наши дети]]"
+ },
+ {
+ "Role": [
+ ],
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam08459289|Сто дорог, одна – моя]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 54092
+ },
+ "hint_description": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "tr",
+ "by"
+ ],
+ "value": "2019"
+ }
+ ],
+ "otype": "Music/Album@on",
+ "report": "False",
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam08459289|Сто дорог, одна – моя]]"
+ },
+ {
+ "Role": "Author@on",
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#scm540668de..0|История медицины]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 49611
+ },
+ "report": "False",
+ "src": [
+ {
+ "c": "scm"
+ }
+ ],
+ "value": "[[#scm540668de..0|История медицины]]"
+ },
+ {
+ "Role": "Author@on",
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#scm-3f1fcad4..0|Мыколка]]"
+ }
+ ],
+ "report": "False",
+ "src": [
+ {
+ "c": "scm"
+ }
+ ],
+ "value": "[[#scm-3f1fcad4..0|Мыколка]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845751|100 дорог]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 21522
+ },
+ "otype": "Music/Recording@on",
+ "report": "False",
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845751|100 дорог]]"
+ },
+ {
+ "Role": [
+ "Author@on"
+ ],
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#ltr08920335|Система дистрибуции. Инструменты создания конкурентного преимущества]]"
+ }
+ ],
+ "hint_description": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "tr",
+ "by"
+ ],
+ "value": "2015"
+ }
+ ],
+ "report": "False",
+ "src": [
+ {
+ "c": "ltr"
+ },
+ {
+ "c": "lbr"
+ }
+ ],
+ "value": "[[#ltr08920335]]"
+ },
+ {
+ "Role": [
+ "Author@on"
+ ],
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#lbrbs464788|Пейп-арт]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 12676
+ },
+ "report": "False",
+ "src": [
+ {
+ "c": "lbr"
+ }
+ ],
+ "value": "[[#lbrbs464788]]"
+ },
+ {
+ "Role": [
+ "Author@on"
+ ],
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#lbrbs137089|Филиальная сеть: развитие и управление]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 21
+ },
+ "report": "False",
+ "src": [
+ {
+ "c": "lbr"
+ }
+ ],
+ "value": "[[#lbrbs137089]]"
+ },
+ {
+ "Role": [
+ "Author@on"
+ ],
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#lbrb467470|Система дистрибуции. Инструменты создания конкурентного преимущества]]"
+ }
+ ],
+ "report": "False",
+ "src": [
+ {
+ "c": "lbr",
+ "f": "book"
+ }
+ ],
+ "value": "[[#lbrb467470|Система дистрибуции. Инструменты создания конкурентного преимущества]]"
+ },
+ {
+ "Role": [
+ "Author@on"
+ ],
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#lbrb464788|Пейп-арт]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 12676
+ },
+ "report": "False",
+ "src": [
+ {
+ "c": "lbr",
+ "f": "book"
+ }
+ ],
+ "value": "[[#lbrb464788|Пейп-арт]]"
+ },
+ {
+ "Role": [
+ "Artist@on"
+ ],
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#lbrb274279|Что сначала,что потом?]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 15
+ },
+ "report": "False",
+ "src": [
+ {
+ "c": "lbr",
+ "f": "book"
+ }
+ ],
+ "value": "[[#lbrb274279|Что сначала,что потом?]]"
+ },
+ {
+ "Role": [
+ "Author@on"
+ ],
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#lbrb137089|Филиальная сеть: развитие и управление]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 21
+ },
+ "report": "False",
+ "src": [
+ {
+ "c": "lbr",
+ "f": "book"
+ }
+ ],
+ "value": "[[#lbrb137089|Филиальная сеть: развитие и управление]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845752|Храни его]]"
+ }
+ ],
+ "otype": "Music/Recording@on",
+ "report": "False",
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845752|Храни его]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845753|Удача]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 1431963
+ },
+ "otype": "Music/Recording@on",
+ "report": "False",
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845753|Удача]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845754|Матушка Россия]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 34699
+ },
+ "otype": "Music/Recording@on",
+ "report": "False",
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845754|Матушка Россия]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845755|Нежданное свидание]]"
+ }
+ ],
+ "otype": "Music/Recording@on",
+ "report": "False",
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845755|Нежданное свидание]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845756|Я – мама]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 441
+ },
+ "otype": "Music/Recording@on",
+ "report": "False",
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845756|Я – мама]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845757|Глупый сон]]"
+ }
+ ],
+ "otype": "Music/Recording@on",
+ "report": "False",
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845757|Глупый сон]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845760|Спасибо вам]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 152646
+ },
+ "otype": "Music/Recording@on",
+ "report": "False",
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845760|Спасибо вам]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845758|С Днём рождения]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 16331217
+ },
+ "otype": "Music/Recording@on",
+ "report": "False",
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845758|С Днём рождения]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845759|Песенка о мечтах]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 94
+ },
+ "otype": "Music/Recording@on",
+ "report": "False",
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845759|Песенка о мечтах]]"
+ },
+ {
+ "Role": "Author@on",
+ "carousel": "False",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#scm-1eeb2744..0|Система дистрибуции: Инструменты создания конкурентного преимущества]]"
+ }
+ ],
+ "report": "False",
+ "src": [
+ {
+ "c": "scm"
+ }
+ ],
+ "value": "[[#scm-1eeb2744..0|Система дистрибуции: Инструменты создания конкурентного преимущества]]"
+ }
+ ],
+ "SearchRequest": [
+ {
+ "RelevLocale": [
+ "ru",
+ "by"
+ ],
+ "value": "Сорокина Татьяна"
+ }
+ ],
+ "Title": [
+ {
+ "freqs": {
+ "sum_qf@de": 3,
+ "sum_qf@en": 2,
+ "sum_qf@ru": 11504,
+ "sum_qf@tr": 35,
+ "sum_qf@uk": 145,
+ "sum_qf@uz": 1
+ },
+ "langua": [
+ "ru"
+ ],
+ "src": [
+ {
+ "c": "lbr"
+ }
+ ],
+ "value": "Сорокина Татьяна"
+ },
+ {
+ "RelevLocale": [
+ "kz",
+ "ua",
+ "by",
+ "ru"
+ ],
+ "freqs": {
+ "sum_qf@de": 28,
+ "sum_qf@en": 8,
+ "sum_qf@ru": 10060,
+ "sum_qf@tr": 91,
+ "sum_qf@uk": 245,
+ "sum_qf@uz": 6
+ },
+ "langua": [
+ "ru"
+ ],
+ "src": [
+ {
+ "c": "scm",
+ "name": "bookmate.com"
+ },
+ {
+ "c": "yam"
+ },
+ {
+ "c": "ltr"
+ }
+ ],
+ "value": "Татьяна Сорокина"
+ }
+ ],
+ "TopTracks": [
+ {
+ "Position": 9,
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845757|Глупый сон]]"
+ }
+ ],
+ "langua": [
+ "uk",
+ "ru",
+ "kk",
+ "by"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845757|Глупый сон]]"
+ },
+ {
+ "Position": 8,
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845758|С Днём рождения]]"
+ }
+ ],
+ "langua": [
+ "uk",
+ "ru",
+ "kk",
+ "by"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845758|С Днём рождения]]"
+ },
+ {
+ "Position": 7,
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845759|Песенка о мечтах]]"
+ }
+ ],
+ "langua": [
+ "uk",
+ "ru",
+ "kk",
+ "by"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845759|Песенка о мечтах]]"
+ },
+ {
+ "Position": 6,
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845752|Храни его]]"
+ }
+ ],
+ "langua": [
+ "uk",
+ "ru",
+ "kk",
+ "by"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845752|Храни его]]"
+ },
+ {
+ "Position": 5,
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845753|Удача]]"
+ }
+ ],
+ "langua": [
+ "uk",
+ "ru",
+ "kk",
+ "by"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845753|Удача]]"
+ },
+ {
+ "Position": 4,
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845754|Матушка Россия]]"
+ }
+ ],
+ "langua": [
+ "uk",
+ "ru",
+ "kk",
+ "by"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845754|Матушка Россия]]"
+ },
+ {
+ "Position": 3,
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845755|Нежданное свидание]]"
+ }
+ ],
+ "langua": [
+ "uk",
+ "ru",
+ "kk",
+ "by"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845755|Нежданное свидание]]"
+ },
+ {
+ "Position": 2,
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845750|Наши дети]]"
+ }
+ ],
+ "langua": [
+ "uk",
+ "ru",
+ "kk",
+ "by"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845750|Наши дети]]"
+ },
+ {
+ "Position": 1,
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845751|100 дорог]]"
+ }
+ ],
+ "langua": [
+ "uk",
+ "ru",
+ "kk",
+ "by"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845751|100 дорог]]"
+ },
+ {
+ "Position": 0,
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845760|Спасибо вам]]"
+ }
+ ],
+ "langua": [
+ "uk",
+ "ru",
+ "kk",
+ "by"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845760|Спасибо вам]]"
+ }
+ ],
+ "freqs": {
+ "average_proper_ratio": [
+ "1.00"
+ ],
+ "proper_ratio": [
+ {
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "1.00"
+ }
+ ],
+ "sum_qf@de": [
+ "31"
+ ],
+ "sum_qf@en": [
+ "10"
+ ],
+ "sum_qf@ru": [
+ "21572"
+ ],
+ "sum_qf@tr": [
+ "126"
+ ],
+ "sum_qf@uk": [
+ "390"
+ ],
+ "sum_qf@uz": [
+ "7"
+ ]
+ },
+ "fullname": [
+ {
+ "freqs": {
+ "sum_qf@de": 28,
+ "sum_qf@en": 8,
+ "sum_qf@ru": 10060,
+ "sum_qf@tr": 91,
+ "sum_qf@uk": 245,
+ "sum_qf@uz": 6
+ },
+ "rfr": [
+ "[[#rfr21731b2]]"
+ ],
+ "src": [
+ {
+ "c": "ltr"
+ }
+ ],
+ "value": "Татьяна Сорокина"
+ }
+ ],
+ "human_gender": [
+ {
+ "rfr": [
+ "[[#rfr21f0d779]]"
+ ],
+ "src": [
+ {
+ "c": "yam",
+ "is_guessed": "True"
+ },
+ {
+ "c": "scm",
+ "is_guessed": "True"
+ },
+ {
+ "c": "ltr",
+ "is_guessed": "True"
+ },
+ {
+ "c": "lbr",
+ "is_guessed": "True"
+ }
+ ],
+ "value": "female"
+ }
+ ],
+ "ids": [
+ {
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "http://music.yandex.ru/artist/7945920"
+ },
+ {
+ "src": [
+ {
+ "c": "ltr"
+ }
+ ],
+ "value": "https://www.litres.ru/4815845"
+ },
+ {
+ "src": [
+ {
+ "c": "lbr"
+ }
+ ],
+ "value": "http://www.labirint.ru/authors/43298"
+ }
+ ],
+ "isa": {
+ "Wtype": "Hum",
+ "otype": [
+ {
+ "src": [
+ {
+ "c": "yam"
+ },
+ {
+ "c": "scm"
+ },
+ {
+ "c": "ltr"
+ },
+ {
+ "c": "lbr"
+ }
+ ],
+ "value": "Hum"
+ }
+ ]
+ },
+ "merged_ontoids": [
+ "ltr24815845",
+ "scmbookmatecomh7b363cfd07a49aed419fde3dbd010f64",
+ "lbrh43298",
+ "yam17945920"
+ ],
+ "musical_artist_groups": [
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845750|Наши дети]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 256643
+ },
+ "otype": "Music/Recording@on",
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845750|Наши дети]]"
+ },
+ {
+ "Role": [
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam08459289|Сто дорог, одна – моя]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 54092
+ },
+ "hint_description": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "tr",
+ "by"
+ ],
+ "value": "2019"
+ }
+ ],
+ "otype": "Music/Album@on",
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam08459289|Сто дорог, одна – моя]]"
+ },
+ {
+ "Role": "Author@on",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#scm540668de..0|История медицины]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 49611
+ },
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "scm"
+ }
+ ],
+ "value": "[[#scm540668de..0|История медицины]]"
+ },
+ {
+ "Role": "Author@on",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#scm-3f1fcad4..0|Мыколка]]"
+ }
+ ],
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "scm"
+ }
+ ],
+ "value": "[[#scm-3f1fcad4..0|Мыколка]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845751|100 дорог]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 21522
+ },
+ "otype": "Music/Recording@on",
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845751|100 дорог]]"
+ },
+ {
+ "Role": [
+ "Author@on"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#ltr08920335|Система дистрибуции. Инструменты создания конкурентного преимущества]]"
+ }
+ ],
+ "hint_description": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "tr",
+ "by"
+ ],
+ "value": "2015"
+ }
+ ],
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "ltr"
+ },
+ {
+ "c": "lbr"
+ }
+ ],
+ "value": "[[#ltr08920335]]"
+ },
+ {
+ "Role": [
+ "Author@on"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#lbrbs464788|Пейп-арт]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 12676
+ },
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "lbr"
+ }
+ ],
+ "value": "[[#lbrbs464788]]"
+ },
+ {
+ "Role": [
+ "Author@on"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#lbrbs137089|Филиальная сеть: развитие и управление]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 21
+ },
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "lbr"
+ }
+ ],
+ "value": "[[#lbrbs137089]]"
+ },
+ {
+ "Role": [
+ "Author@on"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#lbrb467470|Система дистрибуции. Инструменты создания конкурентного преимущества]]"
+ }
+ ],
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "lbr",
+ "f": "book"
+ }
+ ],
+ "value": "[[#lbrb467470|Система дистрибуции. Инструменты создания конкурентного преимущества]]"
+ },
+ {
+ "Role": [
+ "Author@on"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#lbrb464788|Пейп-арт]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 12676
+ },
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "lbr",
+ "f": "book"
+ }
+ ],
+ "value": "[[#lbrb464788|Пейп-арт]]"
+ },
+ {
+ "Role": [
+ "Artist@on"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#lbrb274279|Что сначала,что потом?]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 15
+ },
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "lbr",
+ "f": "book"
+ }
+ ],
+ "value": "[[#lbrb274279|Что сначала,что потом?]]"
+ },
+ {
+ "Role": [
+ "Author@on"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#lbrb137089|Филиальная сеть: развитие и управление]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 21
+ },
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "lbr",
+ "f": "book"
+ }
+ ],
+ "value": "[[#lbrb137089|Филиальная сеть: развитие и управление]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845752|Храни его]]"
+ }
+ ],
+ "otype": "Music/Recording@on",
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845752|Храни его]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845753|Удача]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 1431963
+ },
+ "otype": "Music/Recording@on",
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845753|Удача]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845754|Матушка Россия]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 34699
+ },
+ "otype": "Music/Recording@on",
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845754|Матушка Россия]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845755|Нежданное свидание]]"
+ }
+ ],
+ "otype": "Music/Recording@on",
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845755|Нежданное свидание]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845756|Я – мама]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 441
+ },
+ "otype": "Music/Recording@on",
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845756|Я – мама]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845757|Глупый сон]]"
+ }
+ ],
+ "otype": "Music/Recording@on",
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845757|Глупый сон]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845760|Спасибо вам]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 152646
+ },
+ "otype": "Music/Recording@on",
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845760|Спасибо вам]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845758|С Днём рождения]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 16331217
+ },
+ "otype": "Music/Recording@on",
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845758|С Днём рождения]]"
+ },
+ {
+ "Role": [
+ "Performer@on"
+ ],
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#yam356845759|Песенка о мечтах]]"
+ }
+ ],
+ "freqs": {
+ "sum_qf": 94
+ },
+ "otype": "Music/Recording@on",
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "yam"
+ }
+ ],
+ "value": "[[#yam356845759|Песенка о мечтах]]"
+ },
+ {
+ "Role": "Author@on",
+ "formatted": [
+ {
+ "RelevLocale": [
+ "ru",
+ "ua",
+ "by",
+ "kz"
+ ],
+ "value": "[[#scm-1eeb2744..0|Система дистрибуции: Инструменты создания конкурентного преимущества]]"
+ }
+ ],
+ "rfr": [
+ "[[#rfr110390d1]]"
+ ],
+ "src": [
+ {
+ "c": "scm"
+ }
+ ],
+ "value": "[[#scm-1eeb2744..0|Система дистрибуции: Инструменты создания конкурентного преимущества]]"
+ }
+ ]
+constexpr auto Steps = 10000U;
+ Y_UNIT_TEST(TestValidate) {
+ UNIT_ASSERT(IsValidJson(json));
+ UNIT_ASSERT(!IsValidJson("[123}"));
+ UNIT_ASSERT(!IsValidJson("[123],[456]"));
+ UNIT_ASSERT(!IsValidJson(R"({"c" : "scm"])"));
+ UNIT_ASSERT(!IsValidJson(""));
+ UNIT_ASSERT(!IsValidJson(R"({"c",})"));
+ UNIT_ASSERT(!IsValidJson(R"({null : "scm"})"));
+ UNIT_ASSERT(!IsValidJson(R"({'one': 1})"));
+ }
+ Y_UNIT_TEST(TestPerfValidate) {
+ const auto t = TInstant::Now();
+ for (auto i = 0U; i < Steps; ++i) {
+ UNIT_ASSERT(IsValidJson(json));
+ }
+ const auto time = TInstant::Now() - t;
+ Cerr << "Time is " << time << Endl;
+ }
+ Y_UNIT_TEST(TestPerfParse) {
+ NMiniKQL::TScopedAlloc alloc(__LOCATION__);
+ NMiniKQL::TMemoryUsageInfo memInfo("Memory");
+ NMiniKQL::THolderFactory holderFactory(alloc.Ref(), memInfo, nullptr);
+ NMiniKQL::TDefaultValueBuilder builder(holderFactory);
+ std::array<NUdf::TUnboxedValue, Steps> v;
+ const auto t = TInstant::Now();
+ for (auto& i : v) {
+ UNIT_ASSERT(i = TryParseJsonDom(json, &builder));
+ }
+ const auto time = TInstant::Now() - t;
+ Cerr << "Time is " << time << Endl;
+ }
+ Y_UNIT_TEST(TestPerfSerialize) {
+ NMiniKQL::TScopedAlloc alloc(__LOCATION__);
+ NMiniKQL::TMemoryUsageInfo memInfo("Memory");
+ NMiniKQL::THolderFactory holderFactory(alloc.Ref(), memInfo, nullptr);
+ NMiniKQL::TDefaultValueBuilder builder(holderFactory);
+ const auto dom = TryParseJsonDom(json, &builder);
+ std::array<NUdf::TUnboxedValue, Steps> v;
+ const auto t = TInstant::Now();
+ for (auto& i : v) {
+ UNIT_ASSERT(i = builder.NewString(SerializeJsonDom(dom)));
+ }
+ const auto time = TInstant::Now() - t;
+ Cerr << "Time is " << time << Endl;
+ }
diff --git a/yql/essentials/minikql/dom/ut/ya.make b/yql/essentials/minikql/dom/ut/ya.make
new file mode 100644
index 0000000000..da77c16342
--- /dev/null
+++ b/yql/essentials/minikql/dom/ut/ya.make
@@ -0,0 +1,20 @@
+ UNITTEST_FOR(yql/essentials/minikql/dom)
+ yson_ut.cpp
+ json_ut.cpp
+ )
+ contrib/ydb/library/yql/minikql/computation/llvm14
+ yql/essentials/public/udf/service/exception_policy
+ contrib/ydb/library/yql/sql/pg_dummy
+ )
+ END()
diff --git a/yql/essentials/minikql/dom/ut/yson_ut.cpp b/yql/essentials/minikql/dom/ut/yson_ut.cpp
new file mode 100644
index 0000000000..a91fb70bf2
--- /dev/null
+++ b/yql/essentials/minikql/dom/ut/yson_ut.cpp
@@ -0,0 +1,2087 @@
+#include <yql/essentials/minikql/dom/yson.h>
+#include <yql/essentials/minikql/dom/json.h>
+#include <library/cpp/testing/unittest/registar.h>
+#include <contrib/ydb/library/yql/minikql/mkql_alloc.h>
+#include <contrib/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h>
+#include <contrib/ydb/library/yql/minikql/computation/mkql_value_builder.h>
+using namespace NYql;
+using namespace NYql::NDom;
+using namespace NKikimr;
+constexpr char yson[] =
+ "Fullname" = [
+ {
+ "freqs" = {
+ "sum_qf@de" = 28;
+ "sum_qf@en" = 8;
+ "sum_qf@ru" = 10060;
+ "sum_qf@tr" = 91;
+ "sum_qf@uk" = 245;
+ "sum_qf@uz" = 6
+ };
+ "src" = [
+ {
+ "c" = "ltr"
+ }
+ ];
+ "value" = "Татьяна Сорокина"
+ }
+ ];
+ "Gender" = [
+ {
+ "src" = [
+ {
+ "c" = "yam";
+ "is_guessed" = "True"
+ };
+ {
+ "c" = "scm";
+ "is_guessed" = "True"
+ };
+ {
+ "c" = "ltr";
+ "is_guessed" = "True"
+ };
+ {
+ "c" = "lbr";
+ "is_guessed" = "True"
+ }
+ ];
+ "value" = "female"
+ }
+ ];
+ "Image" = [
+ {
+ "RelevLocale" = [
+ "universe"
+ ];
+ "avatar_type" = "face";
+ "color_wiz" = {
+ "back" = "#DBC4B5";
+ "button" = "#BFAC9E";
+ "button_text" = "#23211E";
+ "text" = "#705549"
+ };
+ "faces_count" = 1;
+ "langua" = [
+ "uk";
+ "by";
+ "kk";
+ "ru"
+ ];
+ "mds_avatar_id" = "2001742/402534297";
+ "original_size" = {
+ "height" = 1478;
+ "width" = 1478
+ };
+ "show_on_serp" = %true;
+ "src" = [
+ {
+ "url" = "http://music.yandex.ru/artist/7945920";
+ "url_type" = "page";
+ "value" = "yam"
+ }
+ ];
+ "thumb" = "Face";
+ "type" = "image";
+ "url" = "//avatars.yandex.net/get-music-content/113160/26f40ebf.a.8459289-1/orig";
+ "value" = "//avatars.yandex.net/get-music-content/113160/26f40ebf.a.8459289-1/orig"
+ }
+ ];
+ "ImageSearchRequest" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "by"
+ ];
+ "value" = "Сорокина Татьяна фото"
+ }
+ ];
+ "Key" = [
+ {
+ "langua" = [
+ "ru"
+ ];
+ "predict" = "972";
+ "rank" = 0;
+ "src" = [
+ {
+ "c" = "rut"
+ }
+ ];
+ "value" = "sorokina tatyana"
+ };
+ {
+ "freqs" = {
+ "sum_qf@de" = 3;
+ "sum_qf@en" = 2;
+ "sum_qf@ru" = 11504;
+ "sum_qf@tr" = 35;
+ "sum_qf@uk" = 145;
+ "sum_qf@uz" = 1
+ };
+ "langua" = [
+ "ru"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ };
+ {
+ "c" = "ltr"
+ };
+ {
+ "c" = "lbr"
+ }
+ ];
+ "value" = "сорокина татьяна"
+ };
+ {
+ "langua" = [
+ "ru"
+ ];
+ "predict" = "931";
+ "rank" = 1;
+ "src" = [
+ {
+ "c" = "rut"
+ }
+ ];
+ "value" = "tatiana sorokina"
+ };
+ {
+ "langua" = [
+ "ru"
+ ];
+ "predict" = "951";
+ "rank" = 0;
+ "src" = [
+ {
+ "c" = "rut"
+ }
+ ];
+ "value" = "tatyana sorokina"
+ };
+ {
+ "freqs" = {
+ "SenseRatio" = 0.01;
+ "SenseRatio@de" = 0.5;
+ "SenseRatio@en" = 0.5;
+ "SenseRatio@et" = 0.5;
+ "SenseRatio@fi" = 0.5;
+ "SenseRatio@id" = 0.5;
+ "SenseRatio@kk" = 0.5;
+ "SenseRatio@lt" = 0.5;
+ "SenseRatio@lv" = 0.5;
+ "SenseRatio@pl" = 0.5;
+ "SenseRatio@ru" = 0;
+ "SenseRatio@tr" = 0.5;
+ "SenseRatio@uk" = 0.5;
+ "SenseRatio@uz" = 0.5;
+ "sum_qf@de" = 28;
+ "sum_qf@en" = 8;
+ "sum_qf@ru" = 10060;
+ "sum_qf@tr" = 91;
+ "sum_qf@uk" = 245;
+ "sum_qf@uz" = 6
+ };
+ "langua" = [
+ "ru"
+ ];
+ "src" = [
+ {
+ "c" = "scm";
+ "name" = "bookmate.com"
+ };
+ {
+ "c" = "yam"
+ };
+ {
+ "c" = "ltr"
+ }
+ ];
+ "value" = "татьяна сорокина"
+ }
+ ];
+ "Projects" = [
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845750|Наши дети]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 256643
+ };
+ "otype" = "Music/Recording@on";
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845750|Наши дети]]"
+ };
+ {
+ "Role" = [
+ ];
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam08459289|Сто дорог, одна – моя]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 54092
+ };
+ "hint_description" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "tr";
+ "by"
+ ];
+ "value" = "2019"
+ }
+ ];
+ "otype" = "Music/Album@on";
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam08459289|Сто дорог, одна – моя]]"
+ };
+ {
+ "Role" = "Author@on";
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#scm540668de..0|История медицины]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 49611
+ };
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "scm"
+ }
+ ];
+ "value" = "[[#scm540668de..0|История медицины]]"
+ };
+ {
+ "Role" = "Author@on";
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#scm-3f1fcad4..0|Мыколка]]"
+ }
+ ];
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "scm"
+ }
+ ];
+ "value" = "[[#scm-3f1fcad4..0|Мыколка]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845751|100 дорог]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 21522
+ };
+ "otype" = "Music/Recording@on";
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845751|100 дорог]]"
+ };
+ {
+ "Role" = [
+ "Author@on"
+ ];
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#ltr08920335|Система дистрибуции. Инструменты создания конкурентного преимущества]]"
+ }
+ ];
+ "hint_description" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "tr";
+ "by"
+ ];
+ "value" = "2015"
+ }
+ ];
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "ltr"
+ };
+ {
+ "c" = "lbr"
+ }
+ ];
+ "value" = "[[#ltr08920335]]"
+ };
+ {
+ "Role" = [
+ "Author@on"
+ ];
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#lbrbs464788|Пейп-арт]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 12676
+ };
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "lbr"
+ }
+ ];
+ "value" = "[[#lbrbs464788]]"
+ };
+ {
+ "Role" = [
+ "Author@on"
+ ];
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#lbrbs137089|Филиальная сеть: развитие и управление]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 21
+ };
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "lbr"
+ }
+ ];
+ "value" = "[[#lbrbs137089]]"
+ };
+ {
+ "Role" = [
+ "Author@on"
+ ];
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#lbrb467470|Система дистрибуции. Инструменты создания конкурентного преимущества]]"
+ }
+ ];
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "lbr";
+ "f" = "book"
+ }
+ ];
+ "value" = "[[#lbrb467470|Система дистрибуции. Инструменты создания конкурентного преимущества]]"
+ };
+ {
+ "Role" = [
+ "Author@on"
+ ];
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#lbrb464788|Пейп-арт]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 12676
+ };
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "lbr";
+ "f" = "book"
+ }
+ ];
+ "value" = "[[#lbrb464788|Пейп-арт]]"
+ };
+ {
+ "Role" = [
+ "Artist@on"
+ ];
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#lbrb274279|Что сначала,что потом?]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 15
+ };
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "lbr";
+ "f" = "book"
+ }
+ ];
+ "value" = "[[#lbrb274279|Что сначала,что потом?]]"
+ };
+ {
+ "Role" = [
+ "Author@on"
+ ];
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#lbrb137089|Филиальная сеть: развитие и управление]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 21
+ };
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "lbr";
+ "f" = "book"
+ }
+ ];
+ "value" = "[[#lbrb137089|Филиальная сеть: развитие и управление]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845752|Храни его]]"
+ }
+ ];
+ "otype" = "Music/Recording@on";
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845752|Храни его]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845753|Удача]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 1431963
+ };
+ "otype" = "Music/Recording@on";
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845753|Удача]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845754|Матушка Россия]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 34699
+ };
+ "otype" = "Music/Recording@on";
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845754|Матушка Россия]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845755|Нежданное свидание]]"
+ }
+ ];
+ "otype" = "Music/Recording@on";
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845755|Нежданное свидание]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845756|Я – мама]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 441
+ };
+ "otype" = "Music/Recording@on";
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845756|Я – мама]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845757|Глупый сон]]"
+ }
+ ];
+ "otype" = "Music/Recording@on";
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845757|Глупый сон]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845760|Спасибо вам]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 152646
+ };
+ "otype" = "Music/Recording@on";
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845760|Спасибо вам]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845758|С Днём рождения]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 16331217
+ };
+ "otype" = "Music/Recording@on";
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845758|С Днём рождения]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845759|Песенка о мечтах]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 94
+ };
+ "otype" = "Music/Recording@on";
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845759|Песенка о мечтах]]"
+ };
+ {
+ "Role" = "Author@on";
+ "carousel" = "False";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#scm-1eeb2744..0|Система дистрибуции: Инструменты создания конкурентного преимущества]]"
+ }
+ ];
+ "report" = "False";
+ "src" = [
+ {
+ "c" = "scm"
+ }
+ ];
+ "value" = "[[#scm-1eeb2744..0|Система дистрибуции: Инструменты создания конкурентного преимущества]]"
+ }
+ ];
+ "SearchRequest" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "by"
+ ];
+ "value" = "Сорокина Татьяна"
+ }
+ ];
+ "Title" = [
+ {
+ "freqs" = {
+ "sum_qf@de" = 3;
+ "sum_qf@en" = 2;
+ "sum_qf@ru" = 11504;
+ "sum_qf@tr" = 35;
+ "sum_qf@uk" = 145;
+ "sum_qf@uz" = 1
+ };
+ "langua" = [
+ "ru"
+ ];
+ "src" = [
+ {
+ "c" = "lbr"
+ }
+ ];
+ "value" = "Сорокина Татьяна"
+ };
+ {
+ "RelevLocale" = [
+ "kz";
+ "ua";
+ "by";
+ "ru"
+ ];
+ "freqs" = {
+ "sum_qf@de" = 28;
+ "sum_qf@en" = 8;
+ "sum_qf@ru" = 10060;
+ "sum_qf@tr" = 91;
+ "sum_qf@uk" = 245;
+ "sum_qf@uz" = 6
+ };
+ "langua" = [
+ "ru"
+ ];
+ "src" = [
+ {
+ "c" = "scm";
+ "name" = "bookmate.com"
+ };
+ {
+ "c" = "yam"
+ };
+ {
+ "c" = "ltr"
+ }
+ ];
+ "value" = "Татьяна Сорокина"
+ }
+ ];
+ "TopTracks" = [
+ {
+ "Position" = 9;
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845757|Глупый сон]]"
+ }
+ ];
+ "langua" = [
+ "uk";
+ "ru";
+ "kk";
+ "by"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845757|Глупый сон]]"
+ };
+ {
+ "Position" = 8;
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845758|С Днём рождения]]"
+ }
+ ];
+ "langua" = [
+ "uk";
+ "ru";
+ "kk";
+ "by"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845758|С Днём рождения]]"
+ };
+ {
+ "Position" = 7;
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845759|Песенка о мечтах]]"
+ }
+ ];
+ "langua" = [
+ "uk";
+ "ru";
+ "kk";
+ "by"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845759|Песенка о мечтах]]"
+ };
+ {
+ "Position" = 6;
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845752|Храни его]]"
+ }
+ ];
+ "langua" = [
+ "uk";
+ "ru";
+ "kk";
+ "by"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845752|Храни его]]"
+ };
+ {
+ "Position" = 5;
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845753|Удача]]"
+ }
+ ];
+ "langua" = [
+ "uk";
+ "ru";
+ "kk";
+ "by"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845753|Удача]]"
+ };
+ {
+ "Position" = 4;
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845754|Матушка Россия]]"
+ }
+ ];
+ "langua" = [
+ "uk";
+ "ru";
+ "kk";
+ "by"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845754|Матушка Россия]]"
+ };
+ {
+ "Position" = 3;
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845755|Нежданное свидание]]"
+ }
+ ];
+ "langua" = [
+ "uk";
+ "ru";
+ "kk";
+ "by"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845755|Нежданное свидание]]"
+ };
+ {
+ "Position" = 2;
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845750|Наши дети]]"
+ }
+ ];
+ "langua" = [
+ "uk";
+ "ru";
+ "kk";
+ "by"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845750|Наши дети]]"
+ };
+ {
+ "Position" = 1;
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845751|100 дорог]]"
+ }
+ ];
+ "langua" = [
+ "uk";
+ "ru";
+ "kk";
+ "by"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845751|100 дорог]]"
+ };
+ {
+ "Position" = 0;
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845760|Спасибо вам]]"
+ }
+ ];
+ "langua" = [
+ "uk";
+ "ru";
+ "kk";
+ "by"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845760|Спасибо вам]]"
+ }
+ ];
+ "freqs" = {
+ "average_proper_ratio" = [
+ "1.00"
+ ];
+ "proper_ratio" = [
+ {
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "1.00"
+ }
+ ];
+ "sum_qf@de" = [
+ "31"
+ ];
+ "sum_qf@en" = [
+ "10"
+ ];
+ "sum_qf@ru" = [
+ "21572"
+ ];
+ "sum_qf@tr" = [
+ "126"
+ ];
+ "sum_qf@uk" = [
+ "390"
+ ];
+ "sum_qf@uz" = [
+ "7"
+ ]
+ };
+ "fullname" = [
+ {
+ "freqs" = {
+ "sum_qf@de" = 28;
+ "sum_qf@en" = 8;
+ "sum_qf@ru" = 10060;
+ "sum_qf@tr" = 91;
+ "sum_qf@uk" = 245;
+ "sum_qf@uz" = 6
+ };
+ "rfr" = [
+ "[[#rfr21731b2]]"
+ ];
+ "src" = [
+ {
+ "c" = "ltr"
+ }
+ ];
+ "value" = "Татьяна Сорокина"
+ }
+ ];
+ "human_gender" = [
+ {
+ "rfr" = [
+ "[[#rfr21f0d779]]"
+ ];
+ "src" = [
+ {
+ "c" = "yam";
+ "is_guessed" = "True"
+ };
+ {
+ "c" = "scm";
+ "is_guessed" = "True"
+ };
+ {
+ "c" = "ltr";
+ "is_guessed" = "True"
+ };
+ {
+ "c" = "lbr";
+ "is_guessed" = "True"
+ }
+ ];
+ "value" = "female"
+ }
+ ];
+ "ids" = [
+ {
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "http://music.yandex.ru/artist/7945920"
+ };
+ {
+ "src" = [
+ {
+ "c" = "ltr"
+ }
+ ];
+ "value" = "https://www.litres.ru/4815845"
+ };
+ {
+ "src" = [
+ {
+ "c" = "lbr"
+ }
+ ];
+ "value" = "http://www.labirint.ru/authors/43298"
+ }
+ ];
+ "isa" = {
+ "Wtype" = "Hum";
+ "otype" = [
+ {
+ "src" = [
+ {
+ "c" = "yam"
+ };
+ {
+ "c" = "scm"
+ };
+ {
+ "c" = "ltr"
+ };
+ {
+ "c" = "lbr"
+ }
+ ];
+ "value" = "Hum"
+ }
+ ]
+ };
+ "merged_ontoids" = [
+ "ltr24815845";
+ "scmbookmatecomh7b363cfd07a49aed419fde3dbd010f64";
+ "lbrh43298";
+ "yam17945920"
+ ];
+ "musical_artist_groups" = [
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845750|Наши дети]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 256643
+ };
+ "otype" = "Music/Recording@on";
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845750|Наши дети]]"
+ };
+ {
+ "Role" = [
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam08459289|Сто дорог, одна – моя]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 54092
+ };
+ "hint_description" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "tr";
+ "by"
+ ];
+ "value" = "2019"
+ }
+ ];
+ "otype" = "Music/Album@on";
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam08459289|Сто дорог, одна – моя]]"
+ };
+ {
+ "Role" = "Author@on";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#scm540668de..0|История медицины]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 49611
+ };
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "scm"
+ }
+ ];
+ "value" = "[[#scm540668de..0|История медицины]]"
+ };
+ {
+ "Role" = "Author@on";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#scm-3f1fcad4..0|Мыколка]]"
+ }
+ ];
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "scm"
+ }
+ ];
+ "value" = "[[#scm-3f1fcad4..0|Мыколка]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845751|100 дорог]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 21522
+ };
+ "otype" = "Music/Recording@on";
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845751|100 дорог]]"
+ };
+ {
+ "Role" = [
+ "Author@on"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#ltr08920335|Система дистрибуции. Инструменты создания конкурентного преимущества]]"
+ }
+ ];
+ "hint_description" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "tr";
+ "by"
+ ];
+ "value" = "2015"
+ }
+ ];
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "ltr"
+ };
+ {
+ "c" = "lbr"
+ }
+ ];
+ "value" = "[[#ltr08920335]]"
+ };
+ {
+ "Role" = [
+ "Author@on"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#lbrbs464788|Пейп-арт]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 12676
+ };
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "lbr"
+ }
+ ];
+ "value" = "[[#lbrbs464788]]"
+ };
+ {
+ "Role" = [
+ "Author@on"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#lbrbs137089|Филиальная сеть: развитие и управление]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 21
+ };
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "lbr"
+ }
+ ];
+ "value" = "[[#lbrbs137089]]"
+ };
+ {
+ "Role" = [
+ "Author@on"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#lbrb467470|Система дистрибуции. Инструменты создания конкурентного преимущества]]"
+ }
+ ];
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "lbr";
+ "f" = "book"
+ }
+ ];
+ "value" = "[[#lbrb467470|Система дистрибуции. Инструменты создания конкурентного преимущества]]"
+ };
+ {
+ "Role" = [
+ "Author@on"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#lbrb464788|Пейп-арт]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 12676
+ };
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "lbr";
+ "f" = "book"
+ }
+ ];
+ "value" = "[[#lbrb464788|Пейп-арт]]"
+ };
+ {
+ "Role" = [
+ "Artist@on"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#lbrb274279|Что сначала,что потом?]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 15
+ };
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "lbr";
+ "f" = "book"
+ }
+ ];
+ "value" = "[[#lbrb274279|Что сначала,что потом?]]"
+ };
+ {
+ "Role" = [
+ "Author@on"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#lbrb137089|Филиальная сеть: развитие и управление]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 21
+ };
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "lbr";
+ "f" = "book"
+ }
+ ];
+ "value" = "[[#lbrb137089|Филиальная сеть: развитие и управление]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845752|Храни его]]"
+ }
+ ];
+ "otype" = "Music/Recording@on";
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845752|Храни его]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845753|Удача]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 1431963
+ };
+ "otype" = "Music/Recording@on";
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845753|Удача]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845754|Матушка Россия]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 34699
+ };
+ "otype" = "Music/Recording@on";
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845754|Матушка Россия]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845755|Нежданное свидание]]"
+ }
+ ];
+ "otype" = "Music/Recording@on";
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845755|Нежданное свидание]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845756|Я – мама]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 441
+ };
+ "otype" = "Music/Recording@on";
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845756|Я – мама]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845757|Глупый сон]]"
+ }
+ ];
+ "otype" = "Music/Recording@on";
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845757|Глупый сон]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845760|Спасибо вам]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 152646
+ };
+ "otype" = "Music/Recording@on";
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845760|Спасибо вам]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845758|С Днём рождения]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 16331217
+ };
+ "otype" = "Music/Recording@on";
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845758|С Днём рождения]]"
+ };
+ {
+ "Role" = [
+ "Performer@on"
+ ];
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#yam356845759|Песенка о мечтах]]"
+ }
+ ];
+ "freqs" = {
+ "sum_qf" = 94
+ };
+ "otype" = "Music/Recording@on";
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "yam"
+ }
+ ];
+ "value" = "[[#yam356845759|Песенка о мечтах]]"
+ };
+ {
+ "Role" = "Author@on";
+ "formatted" = [
+ {
+ "RelevLocale" = [
+ "ru";
+ "ua";
+ "by";
+ "kz"
+ ];
+ "value" = "[[#scm-1eeb2744..0|Система дистрибуции: Инструменты создания конкурентного преимущества]]"
+ }
+ ];
+ "rfr" = [
+ "[[#rfr110390d1]]"
+ ];
+ "src" = [
+ {
+ "c" = "scm"
+ }
+ ];
+ "value" = "[[#scm-1eeb2744..0|Система дистрибуции: Инструменты создания конкурентного преимущества]]"
+ }
+ ]
+constexpr auto Steps = 10000U;
+ Y_UNIT_TEST(TestValidate) {
+ UNIT_ASSERT(IsValidYson(yson));
+ UNIT_ASSERT(!IsValidYson("[123}"));
+ UNIT_ASSERT(!IsValidYson("[123];[456]"));
+ UNIT_ASSERT(!IsValidYson(R"({"c" = "scm"])"));
+ UNIT_ASSERT(!IsValidYson(""));
+ UNIT_ASSERT(!IsValidYson(R"({"c";})"));
+ UNIT_ASSERT(!IsValidYson(R"({# = "scm"})"));
+ UNIT_ASSERT(!IsValidYson(R"({'one'= 1})"));
+ }
+ Y_UNIT_TEST(TestPerfValidate) {
+ const auto t = TInstant::Now();
+ for (auto i = 0U; i < Steps; ++i) {
+ UNIT_ASSERT(IsValidYson(yson));
+ }
+ const auto time = TInstant::Now() - t;
+ Cerr << "Time is " << time << Endl;
+ }
+ Y_UNIT_TEST(TestPerfParse) {
+ NMiniKQL::TScopedAlloc alloc(__LOCATION__);
+ NMiniKQL::TMemoryUsageInfo memInfo("Memory");
+ NMiniKQL::THolderFactory holderFactory(alloc.Ref(), memInfo, nullptr);
+ NMiniKQL::TDefaultValueBuilder builder(holderFactory);
+ std::array<NUdf::TUnboxedValue, Steps> v;
+ const auto t = TInstant::Now();
+ for (auto& i : v) {
+ UNIT_ASSERT(i = TryParseYsonDom(yson, &builder));
+ }
+ const auto time = TInstant::Now() - t;
+ Cerr << "Time is " << time << Endl;
+ }
+ Y_UNIT_TEST(TestPerfSerialize) {
+ NMiniKQL::TScopedAlloc alloc(__LOCATION__);
+ NMiniKQL::TMemoryUsageInfo memInfo("Memory");
+ NMiniKQL::THolderFactory holderFactory(alloc.Ref(), memInfo, nullptr);
+ NMiniKQL::TDefaultValueBuilder builder(holderFactory);
+ const auto dom = TryParseYsonDom(yson, &builder);
+ std::array<NUdf::TUnboxedValue, Steps> v;
+ const auto t = TInstant::Now();
+ for (auto& i : v) {
+ UNIT_ASSERT(i = builder.NewString(SerializeYsonDomToBinary(dom)));
+ }
+ const auto time = TInstant::Now() - t;
+ Cerr << "Time is " << time << Endl;
+ }
+ Y_UNIT_TEST(TestPerfSerializeText) {
+ NMiniKQL::TScopedAlloc alloc(__LOCATION__);
+ NMiniKQL::TMemoryUsageInfo memInfo("Memory");
+ NMiniKQL::THolderFactory holderFactory(alloc.Ref(), memInfo, nullptr);
+ NMiniKQL::TDefaultValueBuilder builder(holderFactory);
+ const auto dom = TryParseYsonDom(yson, &builder);
+ std::array<NUdf::TUnboxedValue, Steps> v;
+ const auto t = TInstant::Now();
+ for (auto& i : v) {
+ UNIT_ASSERT(i = builder.NewString(SerializeYsonDomToText(dom)));
+ }
+ const auto time = TInstant::Now() - t;
+ Cerr << "Time is " << time << Endl;
+ }
+ Y_UNIT_TEST(TestPerfSerializePrettyText) {
+ NMiniKQL::TScopedAlloc alloc(__LOCATION__);
+ NMiniKQL::TMemoryUsageInfo memInfo("Memory");
+ NMiniKQL::THolderFactory holderFactory(alloc.Ref(), memInfo, nullptr);
+ NMiniKQL::TDefaultValueBuilder builder(holderFactory);
+ const auto dom = TryParseYsonDom(yson, &builder);
+ std::array<NUdf::TUnboxedValue, Steps> v;
+ const auto t = TInstant::Now();
+ for (auto& i : v) {
+ UNIT_ASSERT(i = builder.NewString(SerializeYsonDomToPrettyText(dom)));
+ }
+ const auto time = TInstant::Now() - t;
+ Cerr << "Time is " << time << Endl;
+ }
+ Y_UNIT_TEST(TestSerializeJsonNanInf) {
+ NMiniKQL::TScopedAlloc alloc(__LOCATION__);
+ NMiniKQL::TMemoryUsageInfo memInfo("Memory");
+ NMiniKQL::THolderFactory holderFactory(alloc.Ref(), memInfo, nullptr);
+ NMiniKQL::TDefaultValueBuilder builder(holderFactory);
+ constexpr char yson[] =
+ R"(
+ {
+ "Nan" = %nan;
+ "Inf" = %inf;
+ "NegInf" = %-inf
+ }
+ )";
+ TString expected(R"({"Inf":"inf","Nan":"nan","NegInf":"-inf"})");
+ const auto dom = TryParseYsonDom(yson, &builder);
+ TString res = SerializeJsonDom(dom, false, true, true);
+ UNIT_ASSERT_EQUAL(expected, res);
+ }
diff --git a/yql/essentials/minikql/dom/ya.make b/yql/essentials/minikql/dom/ya.make
new file mode 100644
index 0000000000..772eb55dc6
--- /dev/null
+++ b/yql/essentials/minikql/dom/ya.make
@@ -0,0 +1,26 @@
+ library/cpp/containers/stack_vector
+ library/cpp/json
+ library/cpp/yson_pull
+ yql/essentials/public/udf
+ yql/essentials/utils
+ node.cpp
+ json.cpp
+ yson.cpp
+ make.cpp
+ peel.cpp
+ hash.cpp
+ ut
diff --git a/yql/essentials/minikql/dom/yson.cpp b/yql/essentials/minikql/dom/yson.cpp
new file mode 100644
index 0000000000..f3ab30f22c
--- /dev/null
+++ b/yql/essentials/minikql/dom/yson.cpp
@@ -0,0 +1,360 @@
+#include "node.h"
+#include "yson.h"
+#include <library/cpp/containers/stack_vector/stack_vec.h>
+#include <library/cpp/yson_pull/exceptions.h>
+#include <library/cpp/yson_pull/reader.h>
+#include <library/cpp/yson_pull/writer.h>
+#include <util/string/builder.h>
+namespace NYql::NDom {
+using namespace NUdf;
+using namespace NYsonPull;
+namespace {
+[[noreturn]] Y_NO_INLINE void UnexpectedEvent(EEventType ev) {
+ UdfTerminate((::TStringBuilder() << "Unexpected event: " << ev).c_str());
+TUnboxedValuePod ParseScalar(const TScalar& scalar, const IValueBuilder* valueBuilder) {
+ switch (scalar.Type()) {
+ case EScalarType::Entity:
+ return MakeEntity();
+ case EScalarType::Boolean:
+ return MakeBool(scalar.AsBoolean());
+ case EScalarType::Int64:
+ return MakeInt64(scalar.AsInt64());
+ case EScalarType::UInt64:
+ return MakeUint64(scalar.AsUInt64());
+ case EScalarType::Float64:
+ return MakeDouble(scalar.AsFloat64());
+ case EScalarType::String:
+ return MakeString(scalar.AsString(), valueBuilder);
+ }
+TUnboxedValue ParseAttributes(TReader& reader, const IValueBuilder* valueBuilder);
+TUnboxedValue ParseDict(TReader& reader, const IValueBuilder* valueBuilder);
+TUnboxedValue ParseList(TReader& reader, const IValueBuilder* valueBuilder) {
+ TSmallVec<TUnboxedValue, TStdAllocatorForUdf<TUnboxedValue>> items;
+ for (;;) {
+ const auto& ev = reader.NextEvent();
+ switch (ev.Type()) {
+ case EEventType::BeginList:
+ items.emplace_back(ParseList(reader, valueBuilder));
+ break;
+ case EEventType::EndList:
+ return MakeList(items.data(), items.size(), valueBuilder);
+ case EEventType::BeginMap:
+ items.emplace_back(ParseDict(reader, valueBuilder));
+ break;
+ case EEventType::BeginAttributes:
+ items.emplace_back(ParseAttributes(reader, valueBuilder));
+ break;
+ case EEventType::Scalar:
+ items.emplace_back(ParseScalar(ev.AsScalar(), valueBuilder));
+ break;
+ default:
+ UnexpectedEvent(ev.Type());
+ }
+ }
+TUnboxedValue ParseDict(TReader& reader, const IValueBuilder* valueBuilder) {
+ TSmallVec<TPair, TStdAllocatorForUdf<TPair>> items;
+ for (;;) {
+ const auto& evKey = reader.NextEvent();
+ if (evKey.Type() == EEventType::EndMap) {
+ return MakeDict(items.data(), items.size());
+ }
+ Y_ASSERT(evKey.Type() == EEventType::Key);
+ auto key = valueBuilder->NewString(evKey.AsString());
+ const auto& ev = reader.NextEvent();
+ switch (ev.Type()) {
+ case EEventType::BeginList:
+ items.emplace_back(std::make_pair(std::move(key), ParseList(reader, valueBuilder)));
+ break;
+ case EEventType::BeginMap:
+ items.emplace_back(std::make_pair(std::move(key), ParseDict(reader, valueBuilder)));
+ break;
+ case EEventType::BeginAttributes:
+ items.emplace_back(std::make_pair(std::move(key), ParseAttributes(reader, valueBuilder)));
+ break;
+ case EEventType::Scalar:
+ items.emplace_back(std::make_pair(std::move(key), ParseScalar(ev.AsScalar(), valueBuilder)));
+ break;
+ default:
+ UnexpectedEvent(ev.Type());
+ }
+ }
+TUnboxedValue ParseValue(TReader& reader, const IValueBuilder* valueBuilder);
+TUnboxedValue ParseAttributes(TReader& reader, const IValueBuilder* valueBuilder) {
+ TSmallVec<TPair, TStdAllocatorForUdf<TPair>> items;
+ for (;;) {
+ const auto& evKey = reader.NextEvent();
+ if (evKey.Type() == EEventType::EndAttributes) {
+ break;
+ }
+ Y_ASSERT(evKey.Type() == EEventType::Key);
+ auto key = valueBuilder->NewString(evKey.AsString());
+ const auto& ev = reader.NextEvent();
+ switch (ev.Type()) {
+ case EEventType::BeginList:
+ items.emplace_back(std::make_pair(std::move(key), ParseList(reader, valueBuilder)));
+ break;
+ case EEventType::BeginMap:
+ items.emplace_back(std::make_pair(std::move(key), ParseDict(reader, valueBuilder)));
+ break;
+ case EEventType::BeginAttributes:
+ items.emplace_back(std::make_pair(std::move(key), ParseAttributes(reader, valueBuilder)));
+ break;
+ case EEventType::Scalar:
+ items.emplace_back(std::make_pair(std::move(key), ParseScalar(ev.AsScalar(), valueBuilder)));
+ break;
+ default:
+ UnexpectedEvent(ev.Type());
+ }
+ }
+ return MakeAttr(ParseValue(reader, valueBuilder), items.data(), items.size());
+TUnboxedValue ParseValue(TReader& reader, const IValueBuilder* valueBuilder) {
+ const auto& ev = reader.NextEvent();
+ switch (ev.Type()) {
+ case EEventType::BeginList:
+ return ParseList(reader, valueBuilder);
+ case EEventType::BeginMap:
+ return ParseDict(reader, valueBuilder);
+ case EEventType::BeginAttributes:
+ return ParseAttributes(reader, valueBuilder);
+ case EEventType::Scalar:
+ return ParseScalar(ev.AsScalar(), valueBuilder);
+ default:
+ UnexpectedEvent(ev.Type());
+ }
+bool CheckValue(TReader& reader);
+bool CheckDict(TReader& reader) {
+ for (;;) {
+ const auto& evKey = reader.NextEvent();
+ if (evKey.Type() == EEventType::EndMap)
+ return true;
+ if (evKey.Type() != EEventType::Key)
+ return false;
+ if (CheckValue(reader))
+ continue;
+ else
+ return false;
+ }
+bool CheckAttributes(TReader& reader) {
+ for (;;) {
+ const auto& evKey = reader.NextEvent();
+ if (evKey.Type() == EEventType::EndAttributes)
+ break;
+ if (evKey.Type() != EEventType::Key)
+ return false;
+ if (CheckValue(reader))
+ continue;
+ else
+ return false;
+ }
+ return CheckValue(reader);
+bool CheckList(TReader& reader) {
+ for (;;) {
+ const auto& ev = reader.NextEvent();
+ switch (ev.Type()) {
+ case EEventType::BeginList:
+ if (CheckList(reader))
+ break;
+ else
+ return false;
+ case EEventType::BeginMap:
+ if (CheckDict(reader))
+ break;
+ else
+ return false;
+ case EEventType::BeginAttributes:
+ if (CheckAttributes(reader))
+ break;
+ else
+ return false;
+ case EEventType::Scalar:
+ break;
+ case EEventType::EndList:
+ return true;
+ default:
+ return false;
+ }
+ }
+bool CheckValue(TReader& reader) {
+ const auto& ev = reader.NextEvent();
+ switch (ev.Type()) {
+ case EEventType::BeginList:
+ if (CheckList(reader))
+ break;
+ else
+ return false;
+ case EEventType::BeginMap:
+ if (CheckDict(reader))
+ break;
+ else
+ return false;
+ case EEventType::BeginAttributes:
+ if (CheckAttributes(reader))
+ break;
+ else
+ return false;
+ case EEventType::Scalar:
+ break;
+ default:
+ return false;
+ }
+ return true;
+void WriteValue(TWriter& writer, const TUnboxedValue& x) {
+ switch (GetNodeType(x)) {
+ case ENodeType::String:
+ writer.String(x.AsStringRef());
+ break;
+ case ENodeType::Bool:
+ writer.Boolean(x.Get<bool>());
+ break;
+ case ENodeType::Int64:
+ writer.Int64(x.Get<i64>());
+ break;
+ case ENodeType::Uint64:
+ writer.UInt64(x.Get<ui64>());
+ break;
+ case ENodeType::Double:
+ writer.Float64(x.Get<double>());
+ break;
+ case ENodeType::Entity:
+ writer.Entity();
+ break;
+ case ENodeType::List:
+ writer.BeginList();
+ if (x.IsBoxed()) {
+ if (const auto elements = x.GetElements()) {
+ const auto size = x.GetListLength();
+ for (ui64 i = 0; i < size; ++i) {
+ WriteValue(writer, elements[i]);
+ }
+ } else {
+ const auto it = x.GetListIterator();
+ for (TUnboxedValue v; it.Next(v); WriteValue(writer, v))
+ continue;
+ }
+ }
+ writer.EndList();
+ break;
+ case ENodeType::Dict:
+ writer.BeginMap();
+ if (x.IsBoxed()) {
+ TUnboxedValue key, payload;
+ for (const auto it = x.GetDictIterator(); it.NextPair(key, payload);) {
+ writer.Key(key.AsStringRef());
+ WriteValue(writer, payload);
+ }
+ }
+ writer.EndMap();
+ break;
+ case ENodeType::Attr: {
+ writer.BeginAttributes();
+ TUnboxedValue key, payload;
+ for (const auto it = x.GetDictIterator(); it.NextPair(key, payload);) {
+ writer.Key(key.AsStringRef());
+ WriteValue(writer, payload);
+ }
+ writer.EndAttributes();
+ WriteValue(writer, x.GetVariantItem());
+ }
+ break;
+ }
+void SerializeYsonDomImpl(const NUdf::TUnboxedValue& dom, TWriter& writer) {
+ writer.BeginStream();
+ WriteValue(writer, dom);
+ writer.EndStream();
+NUdf::TUnboxedValue TryParseYsonDom(const TStringBuf yson, const NUdf::IValueBuilder* valueBuilder) {
+ auto reader = TReader(NInput::FromMemory(yson), EStreamType::Node);
+ const auto& begin = reader.NextEvent();
+ Y_ASSERT(begin.Type() == EEventType::BeginStream);
+ auto value = ParseValue(reader, valueBuilder);
+ const auto& end = reader.NextEvent();
+ Y_ASSERT(end.Type() == EEventType::EndStream);
+ return value;
+bool IsValidYson(const TStringBuf yson) try {
+ auto reader = TReader(NInput::FromMemory(yson), EStreamType::Node);
+ const auto& begin = reader.NextEvent();
+ if (begin.Type() != EEventType::BeginStream)
+ return false;
+ if (!CheckValue(reader))
+ return false;
+ const auto& end = reader.NextEvent();
+ return end.Type() == EEventType::EndStream;
+} catch (const NException::TBadStream&) {
+ return false;
+TString SerializeYsonDomToBinary(const NUdf::TUnboxedValue& dom) {
+ TString result;
+ TWriter writer = MakeBinaryWriter(NOutput::FromString(&result), EStreamType::Node);
+ SerializeYsonDomImpl(dom, writer);
+ return result;
+TString SerializeYsonDomToText(const NUdf::TUnboxedValue& dom) {
+ TString result;
+ TWriter writer = MakeTextWriter(NOutput::FromString(&result), EStreamType::Node);
+ SerializeYsonDomImpl(dom, writer);
+ return result;
+TString SerializeYsonDomToPrettyText(const NUdf::TUnboxedValue& dom) {
+ TString result;
+ TWriter writer = MakePrettyTextWriter(NOutput::FromString(&result), EStreamType::Node);
+ SerializeYsonDomImpl(dom, writer);
+ return result;
diff --git a/yql/essentials/minikql/dom/yson.h b/yql/essentials/minikql/dom/yson.h
new file mode 100644
index 0000000000..2fb6ac1ee3
--- /dev/null
+++ b/yql/essentials/minikql/dom/yson.h
@@ -0,0 +1,18 @@
+#pragma once
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+namespace NYql::NDom {
+bool IsValidYson(const TStringBuf yson);
+NUdf::TUnboxedValue TryParseYsonDom(const TStringBuf yson, const NUdf::IValueBuilder* valueBuilder);
+TString SerializeYsonDomToBinary(const NUdf::TUnboxedValue& dom);
+TString SerializeYsonDomToText(const NUdf::TUnboxedValue& dom);
+TString SerializeYsonDomToPrettyText(const NUdf::TUnboxedValue& dom);
diff --git a/yql/essentials/minikql/jsonpath/ast_builder.cpp b/yql/essentials/minikql/jsonpath/ast_builder.cpp
new file mode 100644
index 0000000000..fadf003bfc
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/ast_builder.cpp
@@ -0,0 +1,499 @@
+#include "ast_builder.h"
+#include "ast_nodes.h"
+#include "parse_double.h"
+#include <yql/essentials/core/issue/protos/issue_id.pb.h>
+#include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h>
+#include <yql/essentials/ast/yql_ast_escaping.h>
+#include <util/generic/singleton.h>
+#include <util/system/compiler.h>
+#include <util/string/cast.h>
+#include <util/string/builder.h>
+#include <util/charset/utf8.h>
+#include <util/system/cpu_id.h>
+#include <cmath>
+using namespace NYql;
+using namespace NYql::NJsonPath;
+using namespace NJsonPathGenerated;
+using namespace NReWrapper;
+namespace {
+constexpr ui32 RegexpLibId = NReWrapper::TSerialization::YDB_REWRAPPER_LIB_ID;
+TPosition GetPos(const TToken& token) {
+ return TPosition(token.GetColumn(), token.GetLine());
+bool TryStringContent(const TString& str, TString& result, TString& error, bool onlyDoubleQuoted = true) {
+ result.clear();
+ error.clear();
+ const bool doubleQuoted = str.StartsWith('"') && str.EndsWith('"');
+ const bool singleQuoted = str.StartsWith('\'') && str.EndsWith('\'');
+ if (!doubleQuoted && !singleQuoted) {
+ error = "String must be quoted";
+ return false;
+ }
+ if (singleQuoted && onlyDoubleQuoted) {
+ error = "Only double quoted strings allowed";
+ return false;
+ }
+ char quoteChar = doubleQuoted ? '"' : '\'';
+ size_t readBytes = 0;
+ TStringBuf atom(str);
+ atom.Skip(1);
+ TStringOutput sout(result);
+ result.reserve(str.size());
+ auto unescapeResult = UnescapeArbitraryAtom(atom, quoteChar, &sout, &readBytes);
+ if (unescapeResult == EUnescapeResult::OK) {
+ return true;
+ } else {
+ error = UnescapeResultToString(unescapeResult);
+ return false;
+ }
+TAstBuilder::TAstBuilder(TIssues& issues)
+ : Issues(issues)
+void TAstBuilder::Error(TPosition pos, const TStringBuf message) {
+ Issues.AddIssue(pos, message);
+ Issues.back().SetCode(TIssuesIds::JSONPATH_PARSE_ERROR, TSeverityIds::S_ERROR);
+TArrayAccessNode::TSubscript TAstBuilder::BuildArraySubscript(const TRule_array_subscript& node) {
+ TAstNodePtr from = BuildExpr(node.GetRule_expr1());
+ TAstNodePtr to = nullptr;
+ if (node.HasBlock2()) {
+ to = BuildExpr(node.GetBlock2().GetRule_expr2());
+ }
+ return {from, to};
+TAstNodePtr TAstBuilder::BuildArrayAccessor(const TRule_array_accessor& node, TAstNodePtr input) {
+ TVector<TArrayAccessNode::TSubscript> subscripts;
+ subscripts.reserve(1 + node.Block3Size());
+ subscripts.push_back(BuildArraySubscript(node.GetRule_array_subscript2()));
+ for (size_t i = 0; i < node.Block3Size(); i++) {
+ subscripts.push_back(BuildArraySubscript(node.GetBlock3(i).GetRule_array_subscript2()));
+ }
+ return new TArrayAccessNode(GetPos(node.GetToken1()), subscripts, input);
+TAstNodePtr TAstBuilder::BuildWildcardArrayAccessor(const TRule_wildcard_array_accessor& node, TAstNodePtr input) {
+ return new TWildcardArrayAccessNode(GetPos(node.GetToken1()), input);
+TString TAstBuilder::BuildIdentifier(const TRule_identifier& node) {
+ switch (node.GetAltCase()) {
+ case TRule_identifier::kAltIdentifier1:
+ return node.GetAlt_identifier1().GetToken1().GetValue();
+ case TRule_identifier::kAltIdentifier2:
+ return node.GetAlt_identifier2().GetRule_keyword1().GetToken1().GetValue();
+ case TRule_identifier::ALT_NOT_SET:
+ Y_ABORT("Alternative for 'identifier' rule is not set");
+ }
+TAstNodePtr TAstBuilder::BuildMemberAccessor(const TRule_member_accessor& node, TAstNodePtr input) {
+ TString name;
+ const auto& nameBlock = node.GetBlock2();
+ switch (nameBlock.GetAltCase()) {
+ case TRule_member_accessor_TBlock2::kAlt1:
+ name = BuildIdentifier(nameBlock.GetAlt1().GetRule_identifier1());
+ break;
+ case TRule_member_accessor_TBlock2::kAlt2: {
+ const auto& token = nameBlock.GetAlt2().GetToken1();
+ TString error;
+ if (!TryStringContent(token.GetValue(), name, error, /* onlyDoubleQuoted */ false)) {
+ Error(GetPos(token), error);
+ return nullptr;
+ }
+ break;
+ }
+ case TRule_member_accessor_TBlock2::ALT_NOT_SET:
+ Y_ABORT("Alternative for 'member_accessor' rule is not set");
+ }
+ return new TMemberAccessNode(GetPos(node.GetToken1()), name, input);
+TAstNodePtr TAstBuilder::BuildWildcardMemberAccessor(const TRule_wildcard_member_accessor& node, TAstNodePtr input) {
+ const auto& token = node.GetToken2();
+ return new TWildcardMemberAccessNode(GetPos(token), input);
+TAstNodePtr TAstBuilder::BuildFilter(const TRule_filter& node, TAstNodePtr input) {
+ const auto predicate = BuildExpr(node.GetRule_expr3());
+ return new TFilterPredicateNode(GetPos(node.GetToken2()), predicate, input);
+TAstNodePtr TAstBuilder::BuildMethod(const TRule_method& node, TAstNodePtr input) {
+ const auto& token = node.GetToken2();
+ const auto pos = GetPos(token);
+ const auto& value = token.GetValue();
+ auto type = EMethodType::Double;
+ if (value == "abs") {
+ type = EMethodType::Abs;
+ } else if (value == "floor") {
+ type = EMethodType::Floor;
+ } else if (value == "ceiling") {
+ type = EMethodType::Ceiling;
+ } else if (value == "type") {
+ type = EMethodType::Type;
+ } else if (value == "size") {
+ type = EMethodType::Size;
+ } else if (value == "keyvalue") {
+ type = EMethodType::KeyValue;
+ }
+ return new TMethodCallNode(pos, type, input);
+TAstNodePtr TAstBuilder::BuildAccessorOp(const TRule_accessor_op& node, TAstNodePtr input) {
+ switch (node.GetAltCase()) {
+ case TRule_accessor_op::kAltAccessorOp1:
+ return BuildMemberAccessor(node.GetAlt_accessor_op1().GetRule_member_accessor1(), input);
+ case TRule_accessor_op::kAltAccessorOp2:
+ return BuildWildcardMemberAccessor(node.GetAlt_accessor_op2().GetRule_wildcard_member_accessor1(), input);
+ case TRule_accessor_op::kAltAccessorOp3:
+ return BuildArrayAccessor(node.GetAlt_accessor_op3().GetRule_array_accessor1(), input);
+ case TRule_accessor_op::kAltAccessorOp4:
+ return BuildWildcardArrayAccessor(node.GetAlt_accessor_op4().GetRule_wildcard_array_accessor1(), input);
+ case TRule_accessor_op::kAltAccessorOp5:
+ return BuildFilter(node.GetAlt_accessor_op5().GetRule_filter1(), input);
+ case TRule_accessor_op::kAltAccessorOp6:
+ return BuildMethod(node.GetAlt_accessor_op6().GetRule_method1(), input);
+ case TRule_accessor_op::ALT_NOT_SET:
+ Y_ABORT("Alternative for 'accessor_op' rule is not set");
+ }
+TAstNodePtr TAstBuilder::BuildPrimary(const TRule_primary& node) {
+ switch (node.GetAltCase()) {
+ case TRule_primary::kAltPrimary1: {
+ const auto& token = node.GetAlt_primary1().GetToken1();
+ const auto& numberString = token.GetValue();
+ const double parsedValue = ParseDouble(numberString);
+ if (Y_UNLIKELY(std::isnan(parsedValue))) {
+ Y_ABORT("Invalid number was allowed by JsonPath grammar");
+ }
+ if (Y_UNLIKELY(std::isinf(parsedValue))) {
+ Error(GetPos(token), "Number literal is infinity");
+ return nullptr;
+ }
+ return new TNumberLiteralNode(GetPos(token), parsedValue);
+ }
+ case TRule_primary::kAltPrimary2: {
+ const auto& token = node.GetAlt_primary2().GetToken1();
+ return new TContextObjectNode(GetPos(token));
+ }
+ case TRule_primary::kAltPrimary3: {
+ const auto& token = node.GetAlt_primary3().GetToken1();
+ return new TLastArrayIndexNode(GetPos(token));
+ }
+ case TRule_primary::kAltPrimary4: {
+ const auto& primary = node.GetAlt_primary4().GetBlock1();
+ const auto input = BuildExpr(primary.GetRule_expr2());
+ if (primary.HasBlock4()) {
+ const auto& token = primary.GetBlock4().GetToken1();
+ return new TIsUnknownPredicateNode(GetPos(token), input);
+ }
+ return input;
+ }
+ case TRule_primary::kAltPrimary5: {
+ const auto& token = node.GetAlt_primary5().GetToken1();
+ return new TVariableNode(GetPos(token), token.GetValue().substr(1));
+ }
+ case TRule_primary::kAltPrimary6: {
+ const auto& token = node.GetAlt_primary6().GetToken1();
+ return new TBooleanLiteralNode(GetPos(token), true);
+ }
+ case TRule_primary::kAltPrimary7: {
+ const auto& token = node.GetAlt_primary7().GetToken1();
+ return new TBooleanLiteralNode(GetPos(token), false);
+ }
+ case TRule_primary::kAltPrimary8: {
+ const auto& token = node.GetAlt_primary8().GetToken1();
+ return new TNullLiteralNode(GetPos(token));
+ }
+ case TRule_primary::kAltPrimary9: {
+ const auto& token = node.GetAlt_primary9().GetToken1();
+ TString value;
+ TString error;
+ if (!TryStringContent(token.GetValue(), value, error)) {
+ Error(GetPos(token), error);
+ return nullptr;
+ }
+ return new TStringLiteralNode(GetPos(token), value);
+ }
+ case TRule_primary::kAltPrimary10: {
+ const auto& token = node.GetAlt_primary10().GetToken1();
+ return new TFilterObjectNode(GetPos(token));
+ }
+ case TRule_primary::ALT_NOT_SET:
+ Y_ABORT("Alternative for 'primary' rule is not set");
+ }
+TAstNodePtr TAstBuilder::BuildAccessorExpr(const TRule_accessor_expr& node) {
+ TAstNodePtr input = BuildPrimary(node.GetRule_primary1());
+ for (size_t i = 0; i < node.Block2Size(); i++) {
+ input = BuildAccessorOp(node.GetBlock2(i).GetRule_accessor_op1(), input);
+ }
+ return input;
+TAstNodePtr TAstBuilder::BuildPlainExpr(const TRule_plain_expr& node) {
+ return BuildAccessorExpr(node.GetRule_accessor_expr1());
+TAstNodePtr TAstBuilder::BuildLikeRegexExpr(const TRule_like_regex_expr& node, TAstNodePtr input) {
+ const auto& regexToken = node.GetToken2();
+ TString regex;
+ TString error;
+ if (!TryStringContent(regexToken.GetValue(), regex, error)) {
+ Error(GetPos(regexToken), error);
+ return nullptr;
+ }
+ ui32 parsedFlags = 0;
+ if (node.HasBlock3()) {
+ TString flags;
+ const auto& flagsToken = node.GetBlock3().GetToken2();
+ if (!TryStringContent(flagsToken.GetValue(), flags, error)) {
+ Error(GetPos(flagsToken), error);
+ return nullptr;
+ }
+ for (char flag : flags) {
+ switch (flag) {
+ case 'i':
+ parsedFlags |= FLAGS_CASELESS;
+ break;
+ default:
+ Error(GetPos(flagsToken), TStringBuilder() << "Unsupported regex flag '" << flag << "'");
+ break;
+ }
+ }
+ }
+ IRePtr compiledRegex;
+ try {
+ compiledRegex = NDispatcher::Compile(regex, parsedFlags, RegexpLibId);
+ } catch (const NReWrapper::TCompileException& e) {
+ Error(GetPos(regexToken), e.AsStrBuf());
+ return nullptr;
+ }
+ return new TLikeRegexPredicateNode(GetPos(node.GetToken1()), input, std::move(compiledRegex));
+TAstNodePtr TAstBuilder::BuildPredicateExpr(const TRule_predicate_expr& node) {
+ switch (node.GetAltCase()) {
+ case TRule_predicate_expr::kAltPredicateExpr1: {
+ const auto& predicate = node.GetAlt_predicate_expr1().GetBlock1();
+ const auto input = BuildPlainExpr(predicate.GetRule_plain_expr1());
+ if (!predicate.HasBlock2()) {
+ return input;
+ }
+ const auto& block = predicate.GetBlock2();
+ switch (block.GetAltCase()) {
+ case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::kAlt1: {
+ const auto& innerBlock = block.GetAlt1().GetRule_starts_with_expr1();
+ const auto& prefix = BuildPlainExpr(innerBlock.GetRule_plain_expr3());
+ return new TStartsWithPredicateNode(GetPos(innerBlock.GetToken1()), input, prefix);
+ }
+ case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::kAlt2: {
+ return BuildLikeRegexExpr(block.GetAlt2().GetRule_like_regex_expr1(), input);
+ }
+ case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::ALT_NOT_SET:
+ Y_ABORT("Alternative for inner block of 'predicate_expr' rule is not set");
+ }
+ }
+ case TRule_predicate_expr::kAltPredicateExpr2: {
+ const auto& predicate = node.GetAlt_predicate_expr2().GetBlock1();
+ const auto input = BuildExpr(predicate.GetRule_expr3());
+ return new TExistsPredicateNode(GetPos(predicate.GetToken1()), input);
+ }
+ case TRule_predicate_expr::ALT_NOT_SET:
+ Y_ABORT("Alternative for 'predicate' rule is not set");
+ }
+TAstNodePtr TAstBuilder::BuildUnaryExpr(const TRule_unary_expr& node) {
+ const auto predicateExpr = BuildPredicateExpr(node.GetRule_predicate_expr2());
+ if (!node.HasBlock1()) {
+ return predicateExpr;
+ }
+ const auto& opToken = node.GetBlock1().GetToken1();
+ const auto& opValue = opToken.GetValue();
+ auto operation = EUnaryOperation::Plus;
+ if (opValue == "-") {
+ operation = EUnaryOperation::Minus;
+ } else if (opValue == "!") {
+ operation = EUnaryOperation::Not;
+ }
+ return new TUnaryOperationNode(GetPos(opToken), operation, predicateExpr);
+TAstNodePtr TAstBuilder::BuildMulExpr(const TRule_mul_expr& node) {
+ TAstNodePtr result = BuildUnaryExpr(node.GetRule_unary_expr1());
+ for (size_t i = 0; i < node.Block2Size(); i++) {
+ const auto& block = node.GetBlock2(i);
+ const auto& opToken = block.GetToken1();
+ const auto& opValue = opToken.GetValue();
+ auto operation = EBinaryOperation::Multiply;
+ if (opValue == "/") {
+ operation = EBinaryOperation::Divide;
+ } else if (opValue == "%") {
+ operation = EBinaryOperation::Modulo;
+ }
+ const auto rightOperand = BuildUnaryExpr(block.GetRule_unary_expr2());
+ result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand);
+ }
+ return result;
+TAstNodePtr TAstBuilder::BuildAddExpr(const TRule_add_expr& node) {
+ TAstNodePtr result = BuildMulExpr(node.GetRule_mul_expr1());
+ for (size_t i = 0; i < node.Block2Size(); i++) {
+ const auto& block = node.GetBlock2(i);
+ const auto& opToken = block.GetToken1();
+ auto operation = EBinaryOperation::Add;
+ if (opToken.GetValue() == "-") {
+ operation = EBinaryOperation::Substract;
+ }
+ const auto rightOperand = BuildMulExpr(block.GetRule_mul_expr2());
+ result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand);
+ }
+ return result;
+TAstNodePtr TAstBuilder::BuildCompareExpr(const TRule_compare_expr& node) {
+ TAstNodePtr result = BuildAddExpr(node.GetRule_add_expr1());
+ if (node.HasBlock2()) {
+ const auto& block = node.GetBlock2();
+ const auto& opToken = block.GetToken1();
+ const auto& opValue = opToken.GetValue();
+ auto operation = EBinaryOperation::Less;
+ if (opValue == "<=") {
+ operation = EBinaryOperation::LessEqual;
+ } else if (opValue == ">") {
+ operation = EBinaryOperation::Greater;
+ } else if (opValue == ">=") {
+ operation = EBinaryOperation::GreaterEqual;
+ }
+ const auto rightOperand = BuildAddExpr(block.GetRule_add_expr2());
+ result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand);
+ }
+ return result;
+TAstNodePtr TAstBuilder::BuildEqualExpr(const TRule_equal_expr& node) {
+ TAstNodePtr result = BuildCompareExpr(node.GetRule_compare_expr1());
+ if (node.HasBlock2()) {
+ const auto& block = node.GetBlock2();
+ const auto& opToken = block.GetToken1();
+ const auto& opValue = opToken.GetValue();
+ auto operation = EBinaryOperation::Equal;
+ if (opValue == "<>" || opValue == "!=") {
+ operation = EBinaryOperation::NotEqual;
+ }
+ const auto rightOperand = BuildCompareExpr(block.GetRule_compare_expr2());
+ result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand);
+ }
+ return result;
+TAstNodePtr TAstBuilder::BuildAndExpr(const TRule_and_expr& node) {
+ TAstNodePtr result = BuildEqualExpr(node.GetRule_equal_expr1());
+ for (size_t i = 0; i < node.Block2Size(); i++) {
+ const auto& block = node.GetBlock2(i);
+ const auto& opToken = block.GetToken1();
+ const auto rightOperand = BuildEqualExpr(block.GetRule_equal_expr2());
+ result = new TBinaryOperationNode(GetPos(opToken), EBinaryOperation::And, result, rightOperand);
+ }
+ return result;
+TAstNodePtr TAstBuilder::BuildOrExpr(const TRule_or_expr& node) {
+ TAstNodePtr result = BuildAndExpr(node.GetRule_and_expr1());
+ for (size_t i = 0; i < node.Block2Size(); i++) {
+ const auto& block = node.GetBlock2(i);
+ const auto& opToken = block.GetToken1();
+ const auto rightOperand = BuildAndExpr(block.GetRule_and_expr2());
+ result = new TBinaryOperationNode(GetPos(opToken), EBinaryOperation::Or, result, rightOperand);
+ }
+ return result;
+TAstNodePtr TAstBuilder::BuildExpr(const TRule_expr& node) {
+ return BuildOrExpr(node.GetRule_or_expr1());
+TAstNodePtr TAstBuilder::BuildJsonPath(const TRule_jsonpath& node) {
+ TPosition pos;
+ auto mode = EJsonPathMode::Lax;
+ if (node.HasBlock1()) {
+ const auto& modeToken = node.GetBlock1().GetToken1();
+ pos = GetPos(modeToken);
+ if (modeToken.GetValue() == "strict") {
+ mode = EJsonPathMode::Strict;
+ }
+ }
+ const auto expr = BuildExpr(node.GetRule_expr2());
+ return new TRootNode(pos, expr, mode);
+TAstNodePtr TAstBuilder::Build(const TJsonPathParserAST& ast) {
+ return BuildJsonPath(ast.GetRule_jsonpath());
+namespace NYql::NJsonPath {
+ui32 GetReLibId() {
+ return RegexpLibId;
diff --git a/yql/essentials/minikql/jsonpath/ast_builder.h b/yql/essentials/minikql/jsonpath/ast_builder.h
new file mode 100644
index 0000000000..66a47483b3
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/ast_builder.h
@@ -0,0 +1,52 @@
+#pragma once
+#include "ast_nodes.h"
+#include <yql/essentials/parser/proto_ast/gen/jsonpath/JsonPathParser.pb.h>
+namespace NYql::NJsonPath {
+class TAstBuilder {
+ TAstBuilder(TIssues& issues);
+ TAstNodePtr Build(const NJsonPathGenerated::TJsonPathParserAST& ast);
+ TArrayAccessNode::TSubscript BuildArraySubscript(const NJsonPathGenerated::TRule_array_subscript& node);
+ TAstNodePtr BuildArrayAccessor(const NJsonPathGenerated::TRule_array_accessor& node, TAstNodePtr input);
+ TAstNodePtr BuildWildcardArrayAccessor(const NJsonPathGenerated::TRule_wildcard_array_accessor& node, TAstNodePtr input);
+ TString BuildIdentifier(const NJsonPathGenerated::TRule_identifier& node);
+ TAstNodePtr BuildMemberAccessor(const NJsonPathGenerated::TRule_member_accessor& node, TAstNodePtr input);
+ TAstNodePtr BuildWildcardMemberAccessor(const NJsonPathGenerated::TRule_wildcard_member_accessor& node, TAstNodePtr input);
+ TAstNodePtr BuildFilter(const NJsonPathGenerated::TRule_filter& node, TAstNodePtr input);
+ TAstNodePtr BuildMethod(const NJsonPathGenerated::TRule_method& node, TAstNodePtr input);
+ TAstNodePtr BuildAccessorOp(const NJsonPathGenerated::TRule_accessor_op& node, TAstNodePtr input);
+ TAstNodePtr BuildAccessorExpr(const NJsonPathGenerated::TRule_accessor_expr& node);
+ TAstNodePtr BuildPrimary(const NJsonPathGenerated::TRule_primary& node);
+ TAstNodePtr BuildPlainExpr(const NJsonPathGenerated::TRule_plain_expr& node);
+ TAstNodePtr BuildLikeRegexExpr(const NJsonPathGenerated::TRule_like_regex_expr& node, TAstNodePtr input);
+ TAstNodePtr BuildPredicateExpr(const NJsonPathGenerated::TRule_predicate_expr& node);
+ TAstNodePtr BuildUnaryExpr(const NJsonPathGenerated::TRule_unary_expr& node);
+ TAstNodePtr BuildMulExpr(const NJsonPathGenerated::TRule_mul_expr& node);
+ TAstNodePtr BuildAddExpr(const NJsonPathGenerated::TRule_add_expr& node);
+ TAstNodePtr BuildCompareExpr(const NJsonPathGenerated::TRule_compare_expr& node);
+ TAstNodePtr BuildEqualExpr(const NJsonPathGenerated::TRule_equal_expr& node);
+ TAstNodePtr BuildAndExpr(const NJsonPathGenerated::TRule_and_expr& node);
+ TAstNodePtr BuildOrExpr(const NJsonPathGenerated::TRule_or_expr& node);
+ TAstNodePtr BuildExpr(const NJsonPathGenerated::TRule_expr& node);
+ TAstNodePtr BuildJsonPath(const NJsonPathGenerated::TRule_jsonpath& node);
+ void Error(TPosition pos, const TStringBuf message);
+ TIssues& Issues;
diff --git a/yql/essentials/minikql/jsonpath/ast_nodes.cpp b/yql/essentials/minikql/jsonpath/ast_nodes.cpp
new file mode 100644
index 0000000000..5a51c2e90e
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/ast_nodes.cpp
@@ -0,0 +1,383 @@
+#include "ast_nodes.h"
+namespace NYql::NJsonPath {
+TAstNode::TAstNode(TPosition pos)
+ : Pos(pos)
+TPosition TAstNode::GetPos() const {
+ return Pos;
+EReturnType TAstNode::GetReturnType() const {
+ return EReturnType::Any;
+TRootNode::TRootNode(TPosition pos, TAstNodePtr expr, EJsonPathMode mode)
+ : TAstNode(pos)
+ , Expr(expr)
+ , Mode(mode)
+const TAstNodePtr TRootNode::GetExpr() const {
+ return Expr;
+EJsonPathMode TRootNode::GetMode() const {
+ return Mode;
+void TRootNode::Accept(IAstNodeVisitor& visitor) const {
+ return visitor.VisitRoot(*this);
+EReturnType TRootNode::GetReturnType() const {
+ return Expr->GetReturnType();
+TContextObjectNode::TContextObjectNode(TPosition pos)
+ : TAstNode(pos)
+void TContextObjectNode::Accept(IAstNodeVisitor& visitor) const {
+ return visitor.VisitContextObject(*this);
+TVariableNode::TVariableNode(TPosition pos, const TString& name)
+ : TAstNode(pos)
+ , Name(name)
+const TString& TVariableNode::GetName() const {
+ return Name;
+void TVariableNode::Accept(IAstNodeVisitor& visitor) const {
+ visitor.VisitVariable(*this);
+TLastArrayIndexNode::TLastArrayIndexNode(TPosition pos)
+ : TAstNode(pos)
+void TLastArrayIndexNode::Accept(IAstNodeVisitor& visitor) const {
+ visitor.VisitLastArrayIndex(*this);
+TNumberLiteralNode::TNumberLiteralNode(TPosition pos, double value)
+ : TAstNode(pos)
+ , Value(value)
+double TNumberLiteralNode::GetValue() const {
+ return Value;
+void TNumberLiteralNode::Accept(IAstNodeVisitor& visitor) const {
+ return visitor.VisitNumberLiteral(*this);
+TMemberAccessNode::TMemberAccessNode(TPosition pos, const TString& member, TAstNodePtr input)
+ : TAstNode(pos)
+ , Member(member)
+ , Input(input)
+const TStringBuf TMemberAccessNode::GetMember() const {
+ return Member;
+const TAstNodePtr TMemberAccessNode::GetInput() const {
+ return Input;
+void TMemberAccessNode::Accept(IAstNodeVisitor& visitor) const {
+ return visitor.VisitMemberAccess(*this);
+TWildcardMemberAccessNode::TWildcardMemberAccessNode(TPosition pos, TAstNodePtr input)
+ : TAstNode(pos)
+ , Input(input)
+const TAstNodePtr TWildcardMemberAccessNode::GetInput() const {
+ return Input;
+void TWildcardMemberAccessNode::Accept(IAstNodeVisitor& visitor) const {
+ return visitor.VisitWildcardMemberAccess(*this);
+TArrayAccessNode::TArrayAccessNode(TPosition pos, TVector<TSubscript> subscripts, TAstNodePtr input)
+ : TAstNode(pos)
+ , Subscripts(subscripts)
+ , Input(input)
+const TVector<TArrayAccessNode::TSubscript>& TArrayAccessNode::GetSubscripts() const {
+ return Subscripts;
+const TAstNodePtr TArrayAccessNode::GetInput() const {
+ return Input;
+void TArrayAccessNode::Accept(IAstNodeVisitor& visitor) const {
+ return visitor.VisitArrayAccess(*this);
+TWildcardArrayAccessNode::TWildcardArrayAccessNode(TPosition pos, TAstNodePtr input)
+ : TAstNode(pos)
+ , Input(input)
+const TAstNodePtr TWildcardArrayAccessNode::GetInput() const {
+ return Input;
+void TWildcardArrayAccessNode::Accept(IAstNodeVisitor& visitor) const {
+ return visitor.VisitWildcardArrayAccess(*this);
+TUnaryOperationNode::TUnaryOperationNode(TPosition pos, EUnaryOperation op, TAstNodePtr expr)
+ : TAstNode(pos)
+ , Operation(op)
+ , Expr(expr)
+EUnaryOperation TUnaryOperationNode::GetOp() const {
+ return Operation;
+const TAstNodePtr TUnaryOperationNode::GetExpr() const {
+ return Expr;
+void TUnaryOperationNode::Accept(IAstNodeVisitor& visitor) const {
+ return visitor.VisitUnaryOperation(*this);
+EReturnType TUnaryOperationNode::GetReturnType() const {
+ return Operation == EUnaryOperation::Not ? EReturnType::Bool : EReturnType::Any;
+TBinaryOperationNode::TBinaryOperationNode(TPosition pos, EBinaryOperation op, TAstNodePtr leftExpr, TAstNodePtr rightExpr)
+ : TAstNode(pos)
+ , Operation(op)
+ , LeftExpr(leftExpr)
+ , RightExpr(rightExpr)
+EBinaryOperation TBinaryOperationNode::GetOp() const {
+ return Operation;
+const TAstNodePtr TBinaryOperationNode::GetLeftExpr() const {
+ return LeftExpr;
+const TAstNodePtr TBinaryOperationNode::GetRightExpr() const {
+ return RightExpr;
+void TBinaryOperationNode::Accept(IAstNodeVisitor& visitor) const {
+ return visitor.VisitBinaryOperation(*this);
+EReturnType TBinaryOperationNode::GetReturnType() const {
+ switch (Operation) {
+ case EBinaryOperation::Less:
+ case EBinaryOperation::LessEqual:
+ case EBinaryOperation::Greater:
+ case EBinaryOperation::GreaterEqual:
+ case EBinaryOperation::Equal:
+ case EBinaryOperation::NotEqual:
+ case EBinaryOperation::And:
+ case EBinaryOperation::Or:
+ return EReturnType::Bool;
+ default:
+ return EReturnType::Any;
+ }
+TBooleanLiteralNode::TBooleanLiteralNode(TPosition pos, bool value)
+ : TAstNode(pos)
+ , Value(value)
+bool TBooleanLiteralNode::GetValue() const {
+ return Value;
+void TBooleanLiteralNode::Accept(IAstNodeVisitor& visitor) const {
+ return visitor.VisitBooleanLiteral(*this);
+TNullLiteralNode::TNullLiteralNode(TPosition pos)
+ : TAstNode(pos)
+void TNullLiteralNode::Accept(IAstNodeVisitor& visitor) const {
+ return visitor.VisitNullLiteral(*this);
+TStringLiteralNode::TStringLiteralNode(TPosition pos, const TString& value)
+ : TAstNode(pos)
+ , Value(value)
+const TString& TStringLiteralNode::GetValue() const {
+ return Value;
+void TStringLiteralNode::Accept(IAstNodeVisitor& visitor) const {
+ return visitor.VisitStringLiteral(*this);
+TFilterObjectNode::TFilterObjectNode(TPosition pos)
+ : TAstNode(pos)
+void TFilterObjectNode::Accept(IAstNodeVisitor& visitor) const {
+ return visitor.VisitFilterObject(*this);
+TFilterPredicateNode::TFilterPredicateNode(TPosition pos, TAstNodePtr predicate, TAstNodePtr input)
+ : TAstNode(pos)
+ , Predicate(predicate)
+ , Input(input)
+const TAstNodePtr TFilterPredicateNode::GetPredicate() const {
+ return Predicate;
+const TAstNodePtr TFilterPredicateNode::GetInput() const {
+ return Input;
+void TFilterPredicateNode::Accept(IAstNodeVisitor& visitor) const {
+ return visitor.VisitFilterPredicate(*this);
+TMethodCallNode::TMethodCallNode(TPosition pos, EMethodType type, TAstNodePtr input)
+ : TAstNode(pos)
+ , Type(type)
+ , Input(input)
+EMethodType TMethodCallNode::GetType() const {
+ return Type;
+const TAstNodePtr TMethodCallNode::GetInput() const {
+ return Input;
+void TMethodCallNode::Accept(IAstNodeVisitor& visitor) const {
+ return visitor.VisitMethodCall(*this);
+TStartsWithPredicateNode::TStartsWithPredicateNode(TPosition pos, TAstNodePtr input, TAstNodePtr prefix)
+ : TAstNode(pos)
+ , Input(input)
+ , Prefix(prefix)
+const TAstNodePtr TStartsWithPredicateNode::GetInput() const {
+ return Input;
+const TAstNodePtr TStartsWithPredicateNode::GetPrefix() const {
+ return Prefix;
+EReturnType TStartsWithPredicateNode::GetReturnType() const {
+ return EReturnType::Bool;
+void TStartsWithPredicateNode::Accept(IAstNodeVisitor& visitor) const {
+ return visitor.VisitStartsWithPredicate(*this);
+TExistsPredicateNode::TExistsPredicateNode(TPosition pos, TAstNodePtr input)
+ : TAstNode(pos)
+ , Input(input)
+const TAstNodePtr TExistsPredicateNode::GetInput() const {
+ return Input;
+EReturnType TExistsPredicateNode::GetReturnType() const {
+ return EReturnType::Bool;
+void TExistsPredicateNode::Accept(IAstNodeVisitor& visitor) const {
+ return visitor.VisitExistsPredicate(*this);
+TIsUnknownPredicateNode::TIsUnknownPredicateNode(TPosition pos, TAstNodePtr input)
+ : TAstNode(pos)
+ , Input(input)
+const TAstNodePtr TIsUnknownPredicateNode::GetInput() const {
+ return Input;
+EReturnType TIsUnknownPredicateNode::GetReturnType() const {
+ return EReturnType::Bool;
+void TIsUnknownPredicateNode::Accept(IAstNodeVisitor& visitor) const {
+ return visitor.VisitIsUnknownPredicate(*this);
+TLikeRegexPredicateNode::TLikeRegexPredicateNode(TPosition pos, TAstNodePtr input, NReWrapper::IRePtr&& regex)
+ : TAstNode(pos)
+ , Input(input)
+ , Regex(std::move(regex))
+const TAstNodePtr TLikeRegexPredicateNode::GetInput() const {
+ return Input;
+const NReWrapper::IRePtr& TLikeRegexPredicateNode::GetRegex() const {
+ return Regex;
+EReturnType TLikeRegexPredicateNode::GetReturnType() const {
+ return EReturnType::Bool;
+void TLikeRegexPredicateNode::Accept(IAstNodeVisitor& visitor) const {
+ return visitor.VisitLikeRegexPredicate(*this);
diff --git a/yql/essentials/minikql/jsonpath/ast_nodes.h b/yql/essentials/minikql/jsonpath/ast_nodes.h
new file mode 100644
index 0000000000..6ccb8a56ea
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/ast_nodes.h
@@ -0,0 +1,401 @@
+#pragma once
+#include <yql/essentials/public/issue/yql_issue.h>
+#include <library/cpp/json/json_value.h>
+#include <yql/essentials/minikql/jsonpath/rewrapper/re.h>
+namespace NYql::NJsonPath {
+class TRootNode;
+class TContextObjectNode;
+class TVariableNode;
+class TLastArrayIndexNode;
+class TNumberLiteralNode;
+class TAccessorExprNode;
+class TMemberAccessNode;
+class TWildcardMemberAccessNode;
+class TArrayAccessNode;
+class TWildcardArrayAccessNode;
+class TUnaryOperationNode;
+class TBinaryOperationNode;
+class TBooleanLiteralNode;
+class TNullLiteralNode;
+class TStringLiteralNode;
+class TFilterObjectNode;
+class TFilterPredicateNode;
+class TMethodCallNode;
+class TStartsWithPredicateNode;
+class TExistsPredicateNode;
+class TIsUnknownPredicateNode;
+class TLikeRegexPredicateNode;
+enum class EJsonPathMode {
+ Lax = 0,
+ Strict = 1,
+class IAstNodeVisitor {
+ virtual void VisitRoot(const TRootNode& node) = 0;
+ virtual void VisitContextObject(const TContextObjectNode& node) = 0;
+ virtual void VisitVariable(const TVariableNode& node) = 0;
+ virtual void VisitLastArrayIndex(const TLastArrayIndexNode& node) = 0;
+ virtual void VisitNumberLiteral(const TNumberLiteralNode& node) = 0;
+ virtual void VisitMemberAccess(const TMemberAccessNode& node) = 0;
+ virtual void VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) = 0;
+ virtual void VisitArrayAccess(const TArrayAccessNode& node) = 0;
+ virtual void VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) = 0;
+ virtual void VisitUnaryOperation(const TUnaryOperationNode& node) = 0;
+ virtual void VisitBinaryOperation(const TBinaryOperationNode& node) = 0;
+ virtual void VisitBooleanLiteral(const TBooleanLiteralNode& node) = 0;
+ virtual void VisitNullLiteral(const TNullLiteralNode& node) = 0;
+ virtual void VisitStringLiteral(const TStringLiteralNode& node) = 0;
+ virtual void VisitFilterObject(const TFilterObjectNode& node) = 0;
+ virtual void VisitFilterPredicate(const TFilterPredicateNode& node) = 0;
+ virtual void VisitMethodCall(const TMethodCallNode& node) = 0;
+ virtual void VisitStartsWithPredicate(const TStartsWithPredicateNode& node) = 0;
+ virtual void VisitExistsPredicate(const TExistsPredicateNode& node) = 0;
+ virtual void VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) = 0;
+ virtual void VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) = 0;
+ virtual ~IAstNodeVisitor() = default;
+enum class EReturnType {
+ Any = 0,
+ Bool = 1,
+class TAstNode : public TSimpleRefCount<TAstNode> {
+ explicit TAstNode(TPosition pos);
+ TPosition GetPos() const;
+ virtual void Accept(IAstNodeVisitor& visitor) const = 0;
+ virtual EReturnType GetReturnType() const;
+ virtual ~TAstNode() = default;
+ TPosition Pos;
+using TAstNodePtr = TIntrusivePtr<TAstNode>;
+class TRootNode : public TAstNode {
+ TRootNode(TPosition pos, TAstNodePtr expr, EJsonPathMode mode);
+ const TAstNodePtr GetExpr() const;
+ EJsonPathMode GetMode() const;
+ void Accept(IAstNodeVisitor& visitor) const override;
+ EReturnType GetReturnType() const override;
+ TAstNodePtr Expr;
+ EJsonPathMode Mode;
+class TContextObjectNode : public TAstNode {
+ explicit TContextObjectNode(TPosition pos);
+ void Accept(IAstNodeVisitor& visitor) const override;
+class TVariableNode : public TAstNode {
+ TVariableNode(TPosition pos, const TString& name);
+ const TString& GetName() const;
+ void Accept(IAstNodeVisitor& visitor) const override;
+ TString Name;
+class TLastArrayIndexNode : public TAstNode {
+ explicit TLastArrayIndexNode(TPosition pos);
+ void Accept(IAstNodeVisitor& visitor) const override;
+class TNumberLiteralNode : public TAstNode {
+ TNumberLiteralNode(TPosition pos, double value);
+ double GetValue() const;
+ void Accept(IAstNodeVisitor& visitor) const override;
+ double Value;
+class TMemberAccessNode : public TAstNode {
+ TMemberAccessNode(TPosition pos, const TString& member, TAstNodePtr input);
+ const TStringBuf GetMember() const;
+ const TAstNodePtr GetInput() const;
+ void Accept(IAstNodeVisitor& visitor) const override;
+ TString Member;
+ TAstNodePtr Input;
+class TWildcardMemberAccessNode : public TAstNode {
+ TWildcardMemberAccessNode(TPosition pos, TAstNodePtr input);
+ const TAstNodePtr GetInput() const;
+ void Accept(IAstNodeVisitor& visitor) const override;
+ TAstNodePtr Input;
+class TArrayAccessNode : public TAstNode {
+ struct TSubscript {
+ TAstNodePtr From;
+ TAstNodePtr To;
+ };
+ TArrayAccessNode(TPosition pos, TVector<TSubscript> subscripts, TAstNodePtr input);
+ const TVector<TSubscript>& GetSubscripts() const;
+ const TAstNodePtr GetInput() const;
+ void Accept(IAstNodeVisitor& visitor) const override;
+ TVector<TSubscript> Subscripts;
+ TAstNodePtr Input;
+class TWildcardArrayAccessNode : public TAstNode {
+ TWildcardArrayAccessNode(TPosition pos, TAstNodePtr input);
+ const TAstNodePtr GetInput() const;
+ void Accept(IAstNodeVisitor& visitor) const override;
+ TAstNodePtr Input;
+enum class EUnaryOperation {
+ Plus = 0,
+ Minus = 1,
+ Not = 2,
+class TUnaryOperationNode : public TAstNode {
+ TUnaryOperationNode(TPosition pos, EUnaryOperation op, TAstNodePtr expr);
+ EUnaryOperation GetOp() const;
+ const TAstNodePtr GetExpr() const;
+ void Accept(IAstNodeVisitor& visitor) const override;
+ EReturnType GetReturnType() const override;
+ EUnaryOperation Operation;
+ TAstNodePtr Expr;
+enum class EBinaryOperation {
+ Add = 0,
+ Substract = 1,
+ Multiply = 2,
+ Divide = 3,
+ Modulo = 4,
+ Less = 5,
+ LessEqual = 6,
+ Greater = 7,
+ GreaterEqual = 8,
+ Equal = 9,
+ NotEqual = 10,
+ And = 11,
+ Or = 12,
+class TBinaryOperationNode : public TAstNode {
+ TBinaryOperationNode(TPosition pos, EBinaryOperation op, TAstNodePtr leftExpr, TAstNodePtr rightExpr);
+ EBinaryOperation GetOp() const;
+ const TAstNodePtr GetLeftExpr() const;
+ const TAstNodePtr GetRightExpr() const;
+ void Accept(IAstNodeVisitor& visitor) const override;
+ EReturnType GetReturnType() const override;
+ EBinaryOperation Operation;
+ TAstNodePtr LeftExpr;
+ TAstNodePtr RightExpr;
+class TBooleanLiteralNode : public TAstNode {
+ TBooleanLiteralNode(TPosition pos, bool value);
+ bool GetValue() const;
+ void Accept(IAstNodeVisitor& visitor) const override;
+ bool Value;
+class TNullLiteralNode : public TAstNode {
+ explicit TNullLiteralNode(TPosition pos);
+ void Accept(IAstNodeVisitor& visitor) const override;
+class TStringLiteralNode : public TAstNode {
+ TStringLiteralNode(TPosition pos, const TString& value);
+ const TString& GetValue() const;
+ void Accept(IAstNodeVisitor& visitor) const override;
+ TString Value;
+class TFilterObjectNode : public TAstNode {
+ explicit TFilterObjectNode(TPosition pos);
+ void Accept(IAstNodeVisitor& visitor) const override;
+class TFilterPredicateNode : public TAstNode {
+ TFilterPredicateNode(TPosition pos, TAstNodePtr predicate, TAstNodePtr input);
+ const TAstNodePtr GetPredicate() const;
+ const TAstNodePtr GetInput() const;
+ void Accept(IAstNodeVisitor& visitor) const override;
+ TAstNodePtr Predicate;
+ TAstNodePtr Input;
+enum class EMethodType {
+ Abs = 0,
+ Floor = 1,
+ Ceiling = 2,
+ Double = 3,
+ Type = 4,
+ Size = 5,
+ KeyValue = 6,
+class TMethodCallNode : public TAstNode {
+ TMethodCallNode(TPosition pos, EMethodType type, TAstNodePtr input);
+ EMethodType GetType() const;
+ const TAstNodePtr GetInput() const;
+ void Accept(IAstNodeVisitor& visitor) const override;
+ EMethodType Type;
+ TAstNodePtr Input;
+class TStartsWithPredicateNode : public TAstNode {
+ TStartsWithPredicateNode(TPosition pos, TAstNodePtr input, TAstNodePtr prefix);
+ const TAstNodePtr GetInput() const;
+ const TAstNodePtr GetPrefix() const;
+ EReturnType GetReturnType() const override;
+ void Accept(IAstNodeVisitor& visitor) const override;
+ TAstNodePtr Input;
+ TAstNodePtr Prefix;
+class TExistsPredicateNode : public TAstNode {
+ TExistsPredicateNode(TPosition pos, TAstNodePtr input);
+ const TAstNodePtr GetInput() const;
+ EReturnType GetReturnType() const override;
+ void Accept(IAstNodeVisitor& visitor) const override;
+ TAstNodePtr Input;
+class TIsUnknownPredicateNode : public TAstNode {
+ TIsUnknownPredicateNode(TPosition pos, TAstNodePtr input);
+ const TAstNodePtr GetInput() const;
+ EReturnType GetReturnType() const override;
+ void Accept(IAstNodeVisitor& visitor) const override;
+ TAstNodePtr Input;
+class TLikeRegexPredicateNode : public TAstNode {
+ TLikeRegexPredicateNode(TPosition pos, TAstNodePtr input, NReWrapper::IRePtr&& regex);
+ const TAstNodePtr GetInput() const;
+ const NReWrapper::IRePtr& GetRegex() const;
+ EReturnType GetReturnType() const override;
+ void Accept(IAstNodeVisitor& visitor) const override;
+ TAstNodePtr Input;
+ NReWrapper::IRePtr Regex;
diff --git a/yql/essentials/minikql/jsonpath/benchmark/main.cpp b/yql/essentials/minikql/jsonpath/benchmark/main.cpp
new file mode 100644
index 0000000000..456a09c399
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/benchmark/main.cpp
@@ -0,0 +1,114 @@
+#include <yql/essentials/minikql/dom/json.h>
+#include <yql/essentials/minikql/jsonpath/jsonpath.h>
+#include <contrib/ydb/library/yql/minikql/computation/mkql_value_builder.h>
+#include <contrib/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h>
+#include <contrib/ydb/library/yql/minikql/invoke_builtins/mkql_builtins.h>
+#include <contrib/ydb/library/yql/minikql/mkql_mem_info.h>
+#include <contrib/ydb/library/yql/minikql/mkql_function_registry.h>
+#include <contrib/ydb/library/yql/minikql/mkql_alloc.h>
+#include <contrib/ydb/library/yql/minikql/mkql_node.h>
+#include <library/cpp/json/json_value.h>
+#include <library/cpp/testing/benchmark/bench.h>
+#include <util/random/fast.h>
+using namespace NJson;
+using namespace NYql;
+using namespace NYql::NDom;
+using namespace NYql::NUdf;
+using namespace NYql::NJsonPath;
+using namespace NJson;
+using namespace NKikimr::NMiniKQL;
+TString RandomString(ui32 min, ui32 max) {
+ static TReallyFastRng32 rand(0);
+ TString result;
+ const ui32 length = rand.Uniform(min, max + 1);
+ result.reserve(length);
+ for (ui32 i = 0; i < length; ++i) {
+ result.push_back(char(rand.Uniform('a', 'z' + 1)));
+ }
+ return result;
+TString RandomString(ui32 length) {
+ return RandomString(length, length);
+TString GenerateRandomJson() {
+ TJsonMap result;
+ TJsonMap id;
+ id.InsertValue("id", TJsonValue(RandomString(24)));
+ id.InsertValue("issueId", TJsonValue(RandomString(24)));
+ result.InsertValue("_id", std::move(id));
+ result.InsertValue("@class", TJsonValue(RandomString(60)));
+ result.InsertValue("author", TJsonValue(RandomString(10)));
+ result.InsertValue("transitionId", TJsonValue(RandomString(24)));
+ TJsonArray comments;
+ for (ui32 i = 0; i < 30; i++) {
+ TJsonMap comment;
+ comment.InsertValue("id", TJsonValue(RandomString(24)));
+ comment.InsertValue("newText", TJsonValue(RandomString(150)));
+ comments.AppendValue(std::move(comment));
+ }
+ TJsonMap changes;
+ changes.InsertValue("comment", std::move(comments));
+ result.InsertValue("changes", std::move(changes));
+ return result.GetStringRobust();
+const size_t MAX_PARSE_ERRORS = 100;
+#define PREPARE() \
+ TIntrusivePtr<IFunctionRegistry> FunctionRegistry(CreateFunctionRegistry(CreateBuiltinRegistry())); \
+ TScopedAlloc Alloc(__LOCATION__); \
+ TTypeEnvironment Env(Alloc); \
+ TMemoryUsageInfo MemInfo("Memory"); \
+ THolderFactory HolderFactory(Alloc.Ref(), MemInfo, FunctionRegistry.Get()); \
+ TDefaultValueBuilder ValueBuilder(HolderFactory); \
+Y_CPU_BENCHMARK(JsonPath, iface) {
+ const TString json = GenerateRandomJson();
+ const TUnboxedValue dom = TryParseJsonDom(json, &ValueBuilder);
+ for (size_t i = 0; i < iface.Iterations(); i++) {
+ TIssues issues;
+ const auto jsonPath = ParseJsonPath("$.'_id'.issueId", issues, MAX_PARSE_ERRORS);
+ const auto result = ExecuteJsonPath(jsonPath, TValue(dom), TVariablesMap(), &ValueBuilder);
+ Y_ABORT_UNLESS(!result.IsError());
+ }
+Y_CPU_BENCHMARK(JsonPathLikeRegexWithCompile, iface) {
+ const TString json = GenerateRandomJson();
+ const TUnboxedValue dom = TryParseJsonDom(json, &ValueBuilder);
+ for (size_t i = 0; i < iface.Iterations(); i++) {
+ TIssues issues;
+ const auto jsonPath = ParseJsonPath("$[*] like_regex \"[0-9]+\"", issues, MAX_PARSE_ERRORS);
+ const auto result = ExecuteJsonPath(jsonPath, TValue(dom), TVariablesMap(), &ValueBuilder);
+ Y_ABORT_UNLESS(!result.IsError());
+ }
+Y_CPU_BENCHMARK(JsonPathLikeRegex, iface) {
+ const TString json = GenerateRandomJson();
+ const TUnboxedValue dom = TryParseJsonDom(json, &ValueBuilder);
+ TIssues issues;
+ const auto jsonPath = ParseJsonPath("$[*] like_regex \"[0-9]+\"", issues, MAX_PARSE_ERRORS);
+ for (size_t i = 0; i < iface.Iterations(); i++) {
+ const auto result = ExecuteJsonPath(jsonPath, TValue(dom), TVariablesMap(), &ValueBuilder);
+ Y_ABORT_UNLESS(!result.IsError());
+ }
diff --git a/yql/essentials/minikql/jsonpath/benchmark/ya.make b/yql/essentials/minikql/jsonpath/benchmark/ya.make
new file mode 100644
index 0000000000..b26163510f
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/benchmark/ya.make
@@ -0,0 +1,19 @@
+ library/cpp/json
+ yql/essentials/minikql/dom
+ contrib/ydb/library/yql/minikql/invoke_builtins/llvm14
+ yql/essentials/minikql/jsonpath
+ yql/essentials/public/issue
+ yql/essentials/public/udf/service/exception_policy
+ contrib/ydb/library/yql/sql/pg_dummy
+ main.cpp
diff --git a/yql/essentials/minikql/jsonpath/binary.cpp b/yql/essentials/minikql/jsonpath/binary.cpp
new file mode 100644
index 0000000000..8d75a6d3b9
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/binary.cpp
@@ -0,0 +1,604 @@
+#include "binary.h"
+#include <yql/essentials/utils/yql_panic.h>
+namespace NYql::NJsonPath {
+bool TArraySubscriptOffsets::IsRange() const {
+ return ToOffset > 0;
+const TStringBuf TJsonPathItem::GetString() const {
+ return std::get<TStringBuf>(Data);
+const TVector<TArraySubscriptOffsets>& TJsonPathItem::GetSubscripts() const {
+ return std::get<TVector<TArraySubscriptOffsets>>(Data);
+const TBinaryOpArgumentsOffset& TJsonPathItem::GetBinaryOpArguments() const {
+ return std::get<TBinaryOpArgumentsOffset>(Data);
+double TJsonPathItem::GetNumber() const {
+ return std::get<double>(Data);
+bool TJsonPathItem::GetBoolean() const {
+ return std::get<bool>(Data);
+TFilterPredicateOffset TJsonPathItem::GetFilterPredicateOffset() const {
+ return std::get<TFilterPredicateOffset>(Data);
+TStartsWithPrefixOffset TJsonPathItem::GetStartsWithPrefixOffset() const {
+ return std::get<TStartsWithPrefixOffset>(Data);
+const NReWrapper::IRePtr& TJsonPathItem::GetRegex() const {
+ return std::get<NReWrapper::IRePtr>(Data);
+TJsonPathReader::TJsonPathReader(const TJsonPathPtr path)
+ : Path(path)
+ , InitialPos(0)
+ , Mode(ReadMode(InitialPos))
+const TJsonPathItem& TJsonPathReader::ReadFirst() {
+ return ReadFromPos(InitialPos);
+const TJsonPathItem& TJsonPathReader::ReadInput(const TJsonPathItem& item) {
+ YQL_ENSURE(item.InputItemOffset.Defined());
+ return ReadFromPos(*item.InputItemOffset);
+const TJsonPathItem& TJsonPathReader::ReadFromSubscript(const TArraySubscriptOffsets& subscript) {
+ return ReadFromPos(subscript.FromOffset);
+const TJsonPathItem& TJsonPathReader::ReadToSubscript(const TArraySubscriptOffsets& subscript) {
+ YQL_ENSURE(subscript.IsRange());
+ return ReadFromPos(subscript.ToOffset);
+const TJsonPathItem& TJsonPathReader::ReadLeftOperand(const TJsonPathItem& node) {
+ return ReadFromPos(node.GetBinaryOpArguments().LeftOffset);
+const TJsonPathItem& TJsonPathReader::ReadRightOperand(const TJsonPathItem& node) {
+ return ReadFromPos(node.GetBinaryOpArguments().RightOffset);
+const TJsonPathItem& TJsonPathReader::ReadFilterPredicate(const TJsonPathItem& node) {
+ return ReadFromPos(node.GetFilterPredicateOffset().Offset);
+const TJsonPathItem& TJsonPathReader::ReadPrefix(const TJsonPathItem& node) {
+ return ReadFromPos(node.GetStartsWithPrefixOffset().Offset);
+EJsonPathMode TJsonPathReader::GetMode() const {
+ return Mode;
+const TJsonPathItem& TJsonPathReader::ReadFromPos(TUint pos) {
+ YQL_ENSURE(pos < Path->Size());
+ const auto it = ItemCache.find(pos);
+ if (it != ItemCache.end()) {
+ return it->second;
+ }
+ TJsonPathItem& result = ItemCache[pos];
+ result.Type = ReadType(pos);
+ const auto row = ReadUint(pos);
+ const auto column = ReadUint(pos);
+ result.Pos = TPosition(column, row, "jsonpath");
+ switch (result.Type) {
+ // Items without input
+ case EJsonPathItemType::FilterObject:
+ case EJsonPathItemType::NullLiteral:
+ case EJsonPathItemType::ContextObject:
+ case EJsonPathItemType::LastArrayIndex:
+ break;
+ case EJsonPathItemType::Variable:
+ case EJsonPathItemType::StringLiteral:
+ result.Data = ReadString(pos);
+ break;
+ case EJsonPathItemType::NumberLiteral:
+ result.Data = ReadDouble(pos);
+ break;
+ case EJsonPathItemType::BooleanLiteral:
+ result.Data = ReadBool(pos);
+ break;
+ // Items with single input
+ case EJsonPathItemType::TypeMethod:
+ case EJsonPathItemType::SizeMethod:
+ case EJsonPathItemType::KeyValueMethod:
+ case EJsonPathItemType::AbsMethod:
+ case EJsonPathItemType::FloorMethod:
+ case EJsonPathItemType::CeilingMethod:
+ case EJsonPathItemType::DoubleMethod:
+ case EJsonPathItemType::WildcardArrayAccess:
+ case EJsonPathItemType::WildcardMemberAccess:
+ case EJsonPathItemType::UnaryMinus:
+ case EJsonPathItemType::UnaryPlus:
+ case EJsonPathItemType::UnaryNot:
+ case EJsonPathItemType::IsUnknownPredicate:
+ case EJsonPathItemType::ExistsPredicate:
+ result.InputItemOffset = ReadUint(pos);
+ break;
+ case EJsonPathItemType::MemberAccess:
+ result.Data = ReadString(pos);
+ result.InputItemOffset = ReadUint(pos);
+ break;
+ case EJsonPathItemType::ArrayAccess:
+ result.Data = ReadSubscripts(pos);
+ result.InputItemOffset = ReadUint(pos);
+ break;
+ case EJsonPathItemType::FilterPredicate:
+ result.Data = TFilterPredicateOffset{ReadUint(pos)};
+ result.InputItemOffset = ReadUint(pos);
+ break;
+ case EJsonPathItemType::StartsWithPredicate:
+ result.Data = TStartsWithPrefixOffset{ReadUint(pos)};
+ result.InputItemOffset = ReadUint(pos);
+ break;
+ case EJsonPathItemType::LikeRegexPredicate: {
+ const auto serializedRegex = ReadString(pos);
+ auto regex = NReWrapper::NDispatcher::Deserialize(serializedRegex);
+ result.Data = std::move(regex);
+ result.InputItemOffset = ReadUint(pos);
+ break;
+ }
+ // Items with 2 inputs
+ case EJsonPathItemType::BinaryAdd:
+ case EJsonPathItemType::BinarySubstract:
+ case EJsonPathItemType::BinaryMultiply:
+ case EJsonPathItemType::BinaryDivide:
+ case EJsonPathItemType::BinaryModulo:
+ case EJsonPathItemType::BinaryLess:
+ case EJsonPathItemType::BinaryLessEqual:
+ case EJsonPathItemType::BinaryGreater:
+ case EJsonPathItemType::BinaryGreaterEqual:
+ case EJsonPathItemType::BinaryEqual:
+ case EJsonPathItemType::BinaryNotEqual:
+ case EJsonPathItemType::BinaryAnd:
+ case EJsonPathItemType::BinaryOr:
+ TBinaryOpArgumentsOffset data;
+ data.LeftOffset = ReadUint(pos);
+ data.RightOffset = ReadUint(pos);
+ result.Data = data;
+ break;
+ }
+ return result;
+TUint TJsonPathReader::ReadUint(TUint& pos) {
+ return ReadPOD<TUint>(pos);
+double TJsonPathReader::ReadDouble(TUint& pos) {
+ return ReadPOD<double>(pos);
+bool TJsonPathReader::ReadBool(TUint& pos) {
+ return ReadPOD<bool>(pos);
+EJsonPathItemType TJsonPathReader::ReadType(TUint& pos) {
+ return static_cast<EJsonPathItemType>(ReadUint(pos));
+EJsonPathMode TJsonPathReader::ReadMode(TUint& pos) {
+ return static_cast<EJsonPathMode>(ReadUint(pos));
+const TStringBuf TJsonPathReader::ReadString(TUint& pos) {
+ TUint length = ReadUint(pos);
+ TStringBuf result(Path->Begin() + pos, length);
+ pos += length;
+ return result;
+TVector<TArraySubscriptOffsets> TJsonPathReader::ReadSubscripts(TUint& pos) {
+ const auto count = ReadUint(pos);
+ TVector<TArraySubscriptOffsets> result(count);
+ for (size_t i = 0; i < count; i++) {
+ result[i].FromOffset = ReadUint(pos);
+ result[i].ToOffset = ReadUint(pos);
+ }
+ return result;
+void TJsonPathBuilder::VisitRoot(const TRootNode& node) {
+ // Block structure:
+ // <(1) TUint>
+ // Components:
+ // (1) Must be casted to EJsonPathMode. Jsonpath execution mode
+ WriteMode(node.GetMode());
+ node.GetExpr()->Accept(*this);
+void TJsonPathBuilder::VisitContextObject(const TContextObjectNode& node) {
+ WriteZeroInputItem(EJsonPathItemType::ContextObject, node);
+void TJsonPathBuilder::VisitVariable(const TVariableNode& node) {
+ WriteZeroInputItem(EJsonPathItemType::Variable, node);
+ WriteString(node.GetName());
+void TJsonPathBuilder::VisitLastArrayIndex(const TLastArrayIndexNode& node) {
+ WriteZeroInputItem(EJsonPathItemType::LastArrayIndex, node);
+void TJsonPathBuilder::VisitNumberLiteral(const TNumberLiteralNode& node) {
+ WriteZeroInputItem(EJsonPathItemType::NumberLiteral, node);
+ WriteDouble(node.GetValue());
+void TJsonPathBuilder::VisitMemberAccess(const TMemberAccessNode& node) {
+ // Block structure:
+ // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) char[]> <(6) TUint>
+ // Components:
+ // (1) Must be casted to EJsonPathItemType. Member access item type
+ // (2) Row of the position in the source jsonpath
+ // (3) Column of the position in the source jsonpath
+ // (4) Length of member name string
+ // (5) Member name string
+ // (6) Offset of the input item
+ WriteType(EJsonPathItemType::MemberAccess);
+ WritePos(node);
+ WriteString(node.GetMember());
+ WriteNextPosition();
+ node.GetInput()->Accept(*this);
+void TJsonPathBuilder::VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) {
+ WriteSingleInputItem(EJsonPathItemType::WildcardMemberAccess, node, node.GetInput());
+void TJsonPathBuilder::VisitArrayAccess(const TArrayAccessNode& node) {
+ // Block structure:
+ // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) pair<TUint, TUint>[]> <(6) TUint> <(7) items>
+ // Components:
+ // (1) Must be casted to EJsonPathItemType. Array access item type
+ // (2) Row of the position in the source jsonpath
+ // (3) Column of the position in the source jsonpath
+ // (4) Count of subscripts stored
+ // (5) Array of pairs with offsets to subscript items. If subscript is a single index, only first element
+ // is set to it's offset and second is zero. If subscript is a range, both pair elements are valid offsets
+ // to the elements of range (lower and upper bound).
+ // (6) Offset of the input item
+ // (7) Array of subcsripts. For details about encoding see VisitArraySubscript
+ WriteType(EJsonPathItemType::ArrayAccess);
+ WritePos(node);
+ // (4) Write count of subscripts stored
+ const auto& subscripts = node.GetSubscripts();
+ const auto count = subscripts.size();
+ WriteUint(count);
+ // (5) We do not know sizes of each subscript. Write array of zeros for offsets
+ const auto indexStart = CurrentEndPos();
+ TVector<TUint> offsets(2 * count);
+ WriteUintSequence(offsets);
+ // (6) Reserve space for input offset to rewrite it later
+ const auto inputStart = CurrentEndPos();
+ WriteFinishPosition();
+ // (7) Write all subscripts and record offset for each of them
+ for (size_t i = 0; i < count; i++) {
+ offsets[2 * i] = CurrentEndPos();
+ subscripts[i].From->Accept(*this);
+ if (subscripts[i].To) {
+ offsets[2 * i + 1] = CurrentEndPos();
+ subscripts[i].To->Accept(*this);
+ }
+ }
+ // (5) Rewrite offsets with correct values
+ RewriteUintSequence(offsets, indexStart);
+ // (6) Rewrite input offset
+ RewriteUint(CurrentEndPos(), inputStart);
+ node.GetInput()->Accept(*this);
+void TJsonPathBuilder::VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) {
+ WriteSingleInputItem(EJsonPathItemType::WildcardArrayAccess, node, node.GetInput());
+void TJsonPathBuilder::VisitUnaryOperation(const TUnaryOperationNode& node) {
+ EJsonPathItemType type;
+ switch (node.GetOp()) {
+ case EUnaryOperation::Plus:
+ type = EJsonPathItemType::UnaryPlus;
+ break;
+ case EUnaryOperation::Minus:
+ type = EJsonPathItemType::UnaryMinus;
+ break;
+ case EUnaryOperation::Not:
+ type = EJsonPathItemType::UnaryNot;
+ break;
+ }
+ WriteSingleInputItem(type, node, node.GetExpr());
+void TJsonPathBuilder::VisitBinaryOperation(const TBinaryOperationNode& node) {
+ EJsonPathItemType type;
+ switch (node.GetOp()) {
+ case EBinaryOperation::Add:
+ type = EJsonPathItemType::BinaryAdd;
+ break;
+ case EBinaryOperation::Substract:
+ type = EJsonPathItemType::BinarySubstract;
+ break;
+ case EBinaryOperation::Multiply:
+ type = EJsonPathItemType::BinaryMultiply;
+ break;
+ case EBinaryOperation::Divide:
+ type = EJsonPathItemType::BinaryDivide;
+ break;
+ case EBinaryOperation::Modulo:
+ type = EJsonPathItemType::BinaryModulo;
+ break;
+ case EBinaryOperation::Less:
+ type = EJsonPathItemType::BinaryLess;
+ break;
+ case EBinaryOperation::LessEqual:
+ type = EJsonPathItemType::BinaryLessEqual;
+ break;
+ case EBinaryOperation::Greater:
+ type = EJsonPathItemType::BinaryGreater;
+ break;
+ case EBinaryOperation::GreaterEqual:
+ type = EJsonPathItemType::BinaryGreaterEqual;
+ break;
+ case EBinaryOperation::Equal:
+ type = EJsonPathItemType::BinaryEqual;
+ break;
+ case EBinaryOperation::NotEqual:
+ type = EJsonPathItemType::BinaryNotEqual;
+ break;
+ case EBinaryOperation::And:
+ type = EJsonPathItemType::BinaryAnd;
+ break;
+ case EBinaryOperation::Or:
+ type = EJsonPathItemType::BinaryOr;
+ break;
+ }
+ WriteTwoInputsItem(type, node, node.GetLeftExpr(), node.GetRightExpr());
+void TJsonPathBuilder::VisitBooleanLiteral(const TBooleanLiteralNode& node) {
+ WriteZeroInputItem(EJsonPathItemType::BooleanLiteral, node);
+ WriteBool(node.GetValue());
+void TJsonPathBuilder::VisitNullLiteral(const TNullLiteralNode& node) {
+ WriteZeroInputItem(EJsonPathItemType::NullLiteral, node);
+void TJsonPathBuilder::VisitStringLiteral(const TStringLiteralNode& node) {
+ WriteZeroInputItem(EJsonPathItemType::StringLiteral, node);
+ WriteString(node.GetValue());
+void TJsonPathBuilder::VisitFilterObject(const TFilterObjectNode& node) {
+ WriteZeroInputItem(EJsonPathItemType::FilterObject, node);
+void TJsonPathBuilder::VisitFilterPredicate(const TFilterPredicateNode& node) {
+ WriteTwoInputsItem(EJsonPathItemType::FilterPredicate, node, node.GetPredicate(), node.GetInput());
+void TJsonPathBuilder::VisitMethodCall(const TMethodCallNode& node) {
+ EJsonPathItemType type;
+ switch (node.GetType()) {
+ case EMethodType::Abs:
+ type = EJsonPathItemType::AbsMethod;
+ break;
+ case EMethodType::Floor:
+ type = EJsonPathItemType::FloorMethod;
+ break;
+ case EMethodType::Ceiling:
+ type = EJsonPathItemType::CeilingMethod;
+ break;
+ case EMethodType::Double:
+ type = EJsonPathItemType::DoubleMethod;
+ break;
+ case EMethodType::Type:
+ type = EJsonPathItemType::TypeMethod;
+ break;
+ case EMethodType::Size:
+ type = EJsonPathItemType::SizeMethod;
+ break;
+ case EMethodType::KeyValue:
+ type = EJsonPathItemType::KeyValueMethod;
+ break;
+ }
+ WriteSingleInputItem(type, node, node.GetInput());
+TJsonPathPtr TJsonPathBuilder::ShrinkAndGetResult() {
+ Result->ShrinkToFit();
+ return Result;
+void TJsonPathBuilder::VisitStartsWithPredicate(const TStartsWithPredicateNode& node) {
+ WriteTwoInputsItem(EJsonPathItemType::StartsWithPredicate, node, node.GetPrefix(), node.GetInput());
+void TJsonPathBuilder::VisitExistsPredicate(const TExistsPredicateNode& node) {
+ WriteSingleInputItem(EJsonPathItemType::ExistsPredicate, node, node.GetInput());
+void TJsonPathBuilder::VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) {
+ WriteSingleInputItem(EJsonPathItemType::IsUnknownPredicate, node, node.GetInput());
+void TJsonPathBuilder::VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) {
+ // Block structure:
+ // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) char[]> <(6) TUint>
+ // Components:
+ // (1) Must be casted to EJsonPathItemType. Member access item type
+ // (2) Row of the position in the source jsonpath
+ // (3) Column of the position in the source jsonpath
+ // (4) Length of serialized Hyperscan database
+ // (5) Serialized Hyperscan database
+ // (6) Offset of the input item
+ WriteType(EJsonPathItemType::LikeRegexPredicate);
+ WritePos(node);
+ const TString serializedRegex = node.GetRegex()->Serialize();
+ WriteString(serializedRegex);
+ WriteNextPosition();
+ node.GetInput()->Accept(*this);
+void TJsonPathBuilder::WriteZeroInputItem(EJsonPathItemType type, const TAstNode& node) {
+ // Block structure:
+ // <(1) TUint> <(2) TUint> <(3) TUint>
+ // Components:
+ // (1) Item type
+ // (2) Row of the position in the source jsonpath
+ // (3) Column of the position in the source jsonpath
+ WriteType(type);
+ WritePos(node);
+void TJsonPathBuilder::WriteSingleInputItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr input) {
+ // Block structure:
+ // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) item>
+ // Components:
+ // (1) Item type
+ // (2) Row of the position in the source jsonpath
+ // (3) Column of the position in the source jsonpath
+ // (4) Offset of the input item
+ // (5) Input item
+ WriteZeroInputItem(type, node);
+ WriteNextPosition();
+ input->Accept(*this);
+void TJsonPathBuilder::WriteTwoInputsItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr firstInput, const TAstNodePtr secondInput) {
+ // Block structure:
+ // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) TUint> <(6) item> <(7) item>
+ // Components:
+ // (1) Item type
+ // (2) Row of the position in the source jsonpath
+ // (3) Column of the position in the source jsonpath
+ // (4) Offset of the first input
+ // (5) Offset of the second input
+ // (6) JsonPath item representing first input
+ // (7) JsonPath item representing right input
+ WriteZeroInputItem(type, node);
+ // (4) and (5) Fill offsets with zeros
+ const auto indexStart = CurrentEndPos();
+ WriteUint(0);
+ WriteUint(0);
+ // (6) Write first input and record it's offset
+ const auto firstInputStart = CurrentEndPos();
+ firstInput->Accept(*this);
+ // (7) Write second input and record it's offset
+ const auto secondInputStart = CurrentEndPos();
+ secondInput->Accept(*this);
+ // (4) and (5) Rewrite offsets with correct values
+ RewriteUintSequence({firstInputStart, secondInputStart}, indexStart);
+void TJsonPathBuilder::WritePos(const TAstNode& node) {
+ WriteUint(node.GetPos().Row);
+ WriteUint(node.GetPos().Column);
+void TJsonPathBuilder::WriteType(EJsonPathItemType type) {
+ WriteUint(static_cast<TUint>(type));
+void TJsonPathBuilder::WriteMode(EJsonPathMode mode) {
+ WriteUint(static_cast<TUint>(mode));
+void TJsonPathBuilder::WriteNextPosition() {
+ WriteUint(CurrentEndPos() + sizeof(TUint));
+void TJsonPathBuilder::WriteFinishPosition() {
+ WriteUint(0);
+void TJsonPathBuilder::WriteString(TStringBuf value) {
+ WriteUint(value.size());
+ Result->Append(value.data(), value.size());
+void TJsonPathBuilder::RewriteUintSequence(const TVector<TUint>& sequence, TUint offset) {
+ const auto length = sequence.size() * sizeof(TUint);
+ Y_ASSERT(offset + length < CurrentEndPos());
+ MemCopy(Result->Data() + offset, reinterpret_cast<const char*>(sequence.data()), length);
+void TJsonPathBuilder::WriteUintSequence(const TVector<TUint>& sequence) {
+ const auto length = sequence.size() * sizeof(TUint);
+ Result->Append(reinterpret_cast<const char*>(sequence.data()), length);
+void TJsonPathBuilder::RewriteUint(TUint value, TUint offset) {
+ Y_ASSERT(offset + sizeof(TUint) < CurrentEndPos());
+ MemCopy(Result->Data() + offset, reinterpret_cast<const char*>(&value), sizeof(TUint));
+void TJsonPathBuilder::WriteUint(TUint value) {
+ WritePOD(value);
+void TJsonPathBuilder::WriteDouble(double value) {
+ WritePOD(value);
+void TJsonPathBuilder::WriteBool(bool value) {
+ WritePOD(value);
+TUint TJsonPathBuilder::CurrentEndPos() const {
+ return Result->Size();
diff --git a/yql/essentials/minikql/jsonpath/binary.h b/yql/essentials/minikql/jsonpath/binary.h
new file mode 100644
index 0000000000..7ce2626152
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/binary.h
@@ -0,0 +1,275 @@
+#pragma once
+#include "ast_nodes.h"
+#include <yql/essentials/minikql/jsonpath/rewrapper/re.h>
+#include <util/system/unaligned_mem.h>
+#include <util/generic/buffer.h>
+#include <util/generic/ptr.h>
+#include <util/generic/maybe.h>
+#include <util/generic/hash.h>
+#include <variant>
+#include <type_traits>
+namespace NYql::NJsonPath {
+class TJsonPath : public TSimpleRefCount<TJsonPath>, public TBuffer {
+using TJsonPathPtr = TIntrusivePtr<TJsonPath>;
+using TUint = ui64;
+enum class EJsonPathItemType {
+ MemberAccess = 0,
+ WildcardMemberAccess = 1,
+ ArrayAccess = 2,
+ WildcardArrayAccess = 3,
+ ContextObject = 4,
+ NumberLiteral = 5,
+ LastArrayIndex = 6,
+ UnaryPlus = 7,
+ UnaryMinus = 8,
+ BinaryAdd = 9,
+ BinarySubstract = 10,
+ BinaryMultiply = 11,
+ BinaryDivide = 12,
+ BinaryModulo = 13,
+ Variable = 14,
+ BinaryLess = 15,
+ BinaryLessEqual = 16,
+ BinaryGreater = 17,
+ BinaryGreaterEqual = 18,
+ BinaryEqual = 19,
+ BinaryNotEqual = 20,
+ BinaryAnd = 21,
+ BinaryOr = 22,
+ UnaryNot = 23,
+ BooleanLiteral = 24,
+ NullLiteral = 25,
+ StringLiteral = 26,
+ FilterObject = 27,
+ FilterPredicate = 28,
+ AbsMethod = 29,
+ FloorMethod = 30,
+ CeilingMethod = 31,
+ DoubleMethod = 32,
+ TypeMethod = 33,
+ SizeMethod = 34,
+ KeyValueMethod = 35,
+ StartsWithPredicate = 36,
+ ExistsPredicate = 37,
+ IsUnknownPredicate = 38,
+ LikeRegexPredicate = 39,
+struct TArraySubscriptOffsets {
+ TUint FromOffset = 0;
+ TUint ToOffset = 0;
+ bool IsRange() const;
+struct TBinaryOpArgumentsOffset {
+ TUint LeftOffset = 0;
+ TUint RightOffset = 0;
+struct TFilterPredicateOffset {
+ TUint Offset = 0;
+struct TStartsWithPrefixOffset {
+ TUint Offset = 0;
+struct TJsonPathItem {
+ // Position in the source jsonpath
+ TPosition Pos;
+ // Type of item
+ EJsonPathItemType Type;
+ // Offset in buffer pointing to the input item
+ TMaybe<TUint> InputItemOffset;
+ // Data associated with this item. To determine which variant
+ // type was filled callee must examine Type field.
+ // WARNING: Some item types do not fill Data field at all! You must
+ // check item type before accesing this field.
+ std::variant<
+ TStringBuf,
+ TVector<TArraySubscriptOffsets>,
+ TBinaryOpArgumentsOffset,
+ TFilterPredicateOffset,
+ TStartsWithPrefixOffset,
+ NReWrapper::IRePtr,
+ double,
+ bool
+ > Data;
+ const TStringBuf GetString() const;
+ const TVector<TArraySubscriptOffsets>& GetSubscripts() const;
+ const TBinaryOpArgumentsOffset& GetBinaryOpArguments() const;
+ const NReWrapper::IRePtr& GetRegex() const;
+ double GetNumber() const;
+ bool GetBoolean() const;
+ TFilterPredicateOffset GetFilterPredicateOffset() const;
+ TStartsWithPrefixOffset GetStartsWithPrefixOffset() const;
+ // Pointer to the binary representation of jsonpath.
+ // We do not use this directly but Data field can reference to it.
+ // For example if this item is a string then Data contains TStringBuf
+ // pointing to some part inside buffer. We must ensure that it is not
+ // destructed while this item is alive so we keep shared pointer to it.
+ const TJsonPathPtr JsonPath;
+class TJsonPathBuilder : public IAstNodeVisitor {
+ TJsonPathBuilder()
+ : Result(new TJsonPath())
+ {
+ }
+ void VisitRoot(const TRootNode& node) override;
+ void VisitContextObject(const TContextObjectNode& node) override;
+ void VisitVariable(const TVariableNode& node) override;
+ void VisitLastArrayIndex(const TLastArrayIndexNode& node) override;
+ void VisitNumberLiteral(const TNumberLiteralNode& node) override;
+ void VisitMemberAccess(const TMemberAccessNode& node) override;
+ void VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) override;
+ void VisitArrayAccess(const TArrayAccessNode& node) override;
+ void VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) override;
+ void VisitUnaryOperation(const TUnaryOperationNode& node) override;
+ void VisitBinaryOperation(const TBinaryOperationNode& node) override;
+ void VisitBooleanLiteral(const TBooleanLiteralNode& node) override;
+ void VisitNullLiteral(const TNullLiteralNode& node) override;
+ void VisitStringLiteral(const TStringLiteralNode& node) override;
+ void VisitFilterObject(const TFilterObjectNode& node) override;
+ void VisitFilterPredicate(const TFilterPredicateNode& node) override;
+ void VisitMethodCall(const TMethodCallNode& node) override;
+ void VisitStartsWithPredicate(const TStartsWithPredicateNode& node) override;
+ void VisitExistsPredicate(const TExistsPredicateNode& node) override;
+ void VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) override;
+ void VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) override;
+ TJsonPathPtr ShrinkAndGetResult();
+ void WriteZeroInputItem(EJsonPathItemType type, const TAstNode& node);
+ void WriteSingleInputItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr input);
+ void WriteTwoInputsItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr firstInput, const TAstNodePtr secondInput);
+ void WritePos(const TAstNode& node);
+ void WriteType(EJsonPathItemType type);
+ void WriteMode(EJsonPathMode mode);
+ void WriteNextPosition();
+ void WriteFinishPosition();
+ void WriteString(TStringBuf value);
+ void RewriteUintSequence(const TVector<TUint>& sequence, TUint offset);
+ void WriteUintSequence(const TVector<TUint>& sequence);
+ void RewriteUint(TUint value, TUint offset);
+ void WriteUint(TUint value);
+ void WriteDouble(double value);
+ void WriteBool(bool value);
+ template <typename T>
+ void WritePOD(const T& value) {
+ static_assert(std::is_pod_v<T>, "Type must be POD");
+ Result->Append(reinterpret_cast<const char*>(&value), sizeof(T));
+ }
+ TUint CurrentEndPos() const;
+ TJsonPathPtr Result;
+class TJsonPathReader {
+ TJsonPathReader(const TJsonPathPtr path);
+ const TJsonPathItem& ReadFirst();
+ const TJsonPathItem& ReadInput(const TJsonPathItem& node);
+ const TJsonPathItem& ReadFromSubscript(const TArraySubscriptOffsets& subscript);
+ const TJsonPathItem& ReadToSubscript(const TArraySubscriptOffsets& subscript);
+ const TJsonPathItem& ReadLeftOperand(const TJsonPathItem& node);
+ const TJsonPathItem& ReadRightOperand(const TJsonPathItem& node);
+ const TJsonPathItem& ReadFilterPredicate(const TJsonPathItem& node);
+ const TJsonPathItem& ReadPrefix(const TJsonPathItem& node);
+ EJsonPathMode GetMode() const;
+ const TJsonPathItem& ReadFromPos(TUint pos);
+ TUint ReadUint(TUint& pos);
+ double ReadDouble(TUint& pos);
+ bool ReadBool(TUint& pos);
+ EJsonPathItemType ReadType(TUint& pos);
+ EJsonPathMode ReadMode(TUint& pos);
+ const TStringBuf ReadString(TUint& pos);
+ TVector<TArraySubscriptOffsets> ReadSubscripts(TUint& pos);
+ template <typename T>
+ T ReadPOD(TUint& pos) {
+ static_assert(std::is_pod_v<T>, "Type must be POD");
+ T value = ReadUnaligned<T>(Path->Begin() + pos);
+ pos += sizeof(T);
+ return std::move(value);
+ }
+ const TJsonPathPtr Path;
+ TUint InitialPos;
+ EJsonPathMode Mode;
+ THashMap<TUint, TJsonPathItem> ItemCache;
diff --git a/yql/essentials/minikql/jsonpath/executor.cpp b/yql/essentials/minikql/jsonpath/executor.cpp
new file mode 100644
index 0000000000..db2ea213f3
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/executor.cpp
@@ -0,0 +1,1064 @@
+#include "executor.h"
+#include "parse_double.h"
+#include <yql/essentials/core/issue/protos/issue_id.pb.h>
+#include <yql/essentials/minikql/dom/node.h>
+#include <util/generic/scope.h>
+#include <util/generic/maybe.h>
+#include <util/system/compiler.h>
+#include <cmath>
+namespace NYql::NJsonPath {
+using namespace NJson;
+using namespace NUdf;
+using namespace NDom;
+namespace {
+bool IsObjectOrArray(const TValue& value) {
+ return value.IsArray() || value.IsObject();
+TIssue MakeError(TPosition pos, TIssueCode code, const TStringBuf message) {
+ TIssue error(pos, message);
+ error.SetCode(code, TSeverityIds::S_ERROR);
+ return error;
+TIssue MakeError(const TJsonPathItem& item, TIssueCode code, const TStringBuf message) {
+ return MakeError(item.Pos, code, message);
+TResult::TResult(TJsonNodes&& nodes)
+ : Result(std::move(nodes))
+TResult::TResult(const TJsonNodes& nodes)
+ : Result(nodes)
+TResult::TResult(TIssue&& issue)
+ : Result(std::move(issue))
+const TJsonNodes& TResult::GetNodes() const {
+ return std::get<TJsonNodes>(Result);
+TJsonNodes& TResult::GetNodes() {
+ return std::get<TJsonNodes>(Result);
+const TIssue& TResult::GetError() const {
+ return std::get<TIssue>(Result);
+bool TResult::IsError() const {
+ return std::holds_alternative<TIssue>(Result);
+ const TJsonPathPtr path,
+ const TJsonNodes& input,
+ const TVariablesMap& variables,
+ const IValueBuilder* valueBuilder)
+ : Reader(path)
+ , Input(input)
+ , Variables(variables)
+ , ValueBuilder(valueBuilder)
+bool TExecutor::IsZero(double value) {
+ return -EPSILON <= value && value <= EPSILON;
+bool TExecutor::IsLess(double a, double b) {
+ return (b - a) > EPSILON;
+bool TExecutor::IsGreater(double a, double b) {
+ return (a - b) > EPSILON;
+bool TExecutor::IsEqual(double a, double b) {
+ return IsZero(a - b);
+bool TExecutor::IsStrict() const {
+ return Reader.GetMode() == EJsonPathMode::Strict;
+bool TExecutor::IsLax() const {
+ return Reader.GetMode() == EJsonPathMode::Lax;
+TResult TExecutor::Execute() {
+ return Execute(Reader.ReadFirst());
+TResult TExecutor::Execute(const TJsonPathItem& item) {
+ switch (item.Type) {
+ case EJsonPathItemType::MemberAccess:
+ return MemberAccess(item);
+ case EJsonPathItemType::WildcardMemberAccess:
+ return WildcardMemberAccess(item);
+ case EJsonPathItemType::ContextObject:
+ return ContextObject();
+ case EJsonPathItemType::Variable:
+ return Variable(item);
+ case EJsonPathItemType::NumberLiteral:
+ return NumberLiteral(item);
+ case EJsonPathItemType::ArrayAccess:
+ return ArrayAccess(item);
+ case EJsonPathItemType::WildcardArrayAccess:
+ return WildcardArrayAccess(item);
+ case EJsonPathItemType::LastArrayIndex:
+ return LastArrayIndex(item);
+ case EJsonPathItemType::UnaryMinus:
+ case EJsonPathItemType::UnaryPlus:
+ return UnaryArithmeticOp(item);
+ case EJsonPathItemType::BinaryAdd:
+ case EJsonPathItemType::BinarySubstract:
+ case EJsonPathItemType::BinaryMultiply:
+ case EJsonPathItemType::BinaryDivide:
+ case EJsonPathItemType::BinaryModulo:
+ return BinaryArithmeticOp(item);
+ case EJsonPathItemType::BinaryAnd:
+ case EJsonPathItemType::BinaryOr:
+ return BinaryLogicalOp(item);
+ case EJsonPathItemType::UnaryNot:
+ return UnaryLogicalOp(item);
+ case EJsonPathItemType::BooleanLiteral:
+ return BooleanLiteral(item);
+ case EJsonPathItemType::NullLiteral:
+ return NullLiteral();
+ case EJsonPathItemType::StringLiteral:
+ return StringLiteral(item);
+ case EJsonPathItemType::FilterObject:
+ return FilterObject(item);
+ case EJsonPathItemType::FilterPredicate:
+ return FilterPredicate(item);
+ case EJsonPathItemType::BinaryLess:
+ case EJsonPathItemType::BinaryLessEqual:
+ case EJsonPathItemType::BinaryGreater:
+ case EJsonPathItemType::BinaryGreaterEqual:
+ case EJsonPathItemType::BinaryEqual:
+ case EJsonPathItemType::BinaryNotEqual:
+ return CompareOp(item);
+ case EJsonPathItemType::AbsMethod:
+ case EJsonPathItemType::FloorMethod:
+ case EJsonPathItemType::CeilingMethod:
+ return NumericMethod(item);
+ case EJsonPathItemType::DoubleMethod:
+ return DoubleMethod(item);
+ case EJsonPathItemType::TypeMethod:
+ return TypeMethod(item);
+ case EJsonPathItemType::SizeMethod:
+ return SizeMethod(item);
+ case EJsonPathItemType::KeyValueMethod:
+ return KeyValueMethod(item);
+ case EJsonPathItemType::StartsWithPredicate:
+ return StartsWithPredicate(item);
+ case EJsonPathItemType::IsUnknownPredicate:
+ return IsUnknownPredicate(item);
+ case EJsonPathItemType::ExistsPredicate:
+ return ExistsPredicate(item);
+ case EJsonPathItemType::LikeRegexPredicate:
+ return LikeRegexPredicate(item);
+ }
+TResult TExecutor::ContextObject() {
+ return Input;
+TResult TExecutor::Variable(const TJsonPathItem& item) {
+ const auto it = Variables.find(item.GetString());
+ if (it == Variables.end()) {
+ return MakeError(item, TIssuesIds::JSONPATH_UNDEFINED_VARIABLE, TStringBuilder() << "Undefined variable '" << item.GetString() << "'");
+ }
+ return TJsonNodes({it->second});
+TResult TExecutor::LastArrayIndex(const TJsonPathItem& item) {
+ if (ArraySubscriptSource.empty()) {
+ return MakeError(item, TIssuesIds::JSONPATH_LAST_OUTSIDE_OF_ARRAY_SUBSCRIPT, "'last' is only allowed inside array subscripts");
+ }
+ const auto& array = ArraySubscriptSource.top();
+ const i64 arraySize = array.GetSize();
+ // NOTE: For empty arrays `last` equals `-1`. This is intended, PostgreSQL 12 has the same behaviour
+ return TJsonNodes({TValue(MakeDouble(static_cast<double>(arraySize - 1)))});
+TResult TExecutor::NumberLiteral(const TJsonPathItem& item) {
+ return TJsonNodes({TValue(MakeDouble(item.GetNumber()))});
+TResult TExecutor::MemberAccess(const TJsonPathItem& item) {
+ const auto input = Execute(Reader.ReadInput(item));
+ if (input.IsError()) {
+ return input;
+ }
+ TJsonNodes result;
+ for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) {
+ if (!node.IsObject()) {
+ if (IsStrict()) {
+ return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_OBJECT, "Expected object");
+ } else {
+ continue;
+ }
+ }
+ if (const auto payload = node.Lookup(item.GetString())) {
+ result.push_back(*payload);
+ continue;
+ }
+ if (IsStrict()) {
+ return MakeError(item, TIssuesIds::JSONPATH_MEMBER_NOT_FOUND, "Member not found");
+ }
+ }
+ return std::move(result);
+TResult TExecutor::WildcardMemberAccess(const TJsonPathItem& item) {
+ const auto input = Execute(Reader.ReadInput(item));
+ if (input.IsError()) {
+ return input;
+ }
+ TJsonNodes result;
+ for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) {
+ if (!node.IsObject()) {
+ if (IsStrict()) {
+ return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_OBJECT, "Expected object");
+ } else {
+ continue;
+ }
+ }
+ TValue key;
+ TValue value;
+ auto it = node.GetObjectIterator();
+ while (it.Next(key, value)) {
+ result.push_back(value);
+ }
+ }
+ return std::move(result);
+TMaybe<TIssue> TExecutor::EnsureSingleSubscript(TPosition pos, const TJsonNodes& index, i64& result) {
+ if (index.size() != 1) {
+ return MakeError(pos, TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX, "Expected single number item for array index");
+ }
+ const auto& indexValue = index[0];
+ if (!indexValue.IsNumber()) {
+ return MakeError(pos, TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX, "Array index must be number");
+ }
+ result = static_cast<i64>(std::floor(indexValue.GetNumber()));
+ return Nothing();
+TMaybe<TIssue> TExecutor::EnsureArraySubscripts(const TJsonPathItem& item, TVector<TArraySubscript>& result) {
+ for (const auto& subscript : item.GetSubscripts()) {
+ const auto& fromItem = Reader.ReadFromSubscript(subscript);
+ const auto fromResult = Execute(fromItem);
+ if (fromResult.IsError()) {
+ return fromResult.GetError();
+ }
+ i64 fromIndex = 0;
+ TMaybe<TIssue> error = EnsureSingleSubscript(fromItem.Pos, fromResult.GetNodes(), fromIndex);
+ if (error) {
+ return error;
+ }
+ if (!subscript.IsRange()) {
+ result.emplace_back(fromIndex, fromItem.Pos);
+ continue;
+ }
+ const auto& toItem = Reader.ReadToSubscript(subscript);
+ const auto toResult = Execute(toItem);
+ if (toResult.IsError()) {
+ return toResult.GetError();
+ }
+ i64 toIndex = 0;
+ error = EnsureSingleSubscript(toItem.Pos, toResult.GetNodes(), toIndex);
+ if (error) {
+ return error;
+ }
+ result.emplace_back(fromIndex, fromItem.Pos, toIndex, toItem.Pos);
+ }
+ return Nothing();
+TResult TExecutor::ArrayAccess(const TJsonPathItem& item) {
+ const auto input = Execute(Reader.ReadInput(item));
+ if (input.IsError()) {
+ return input;
+ }
+ TJsonNodes result;
+ for (const auto& node : OptionalArrayWrapNodes(input.GetNodes())) {
+ if (!node.IsArray()) {
+ return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_ARRAY, "Expected array");
+ }
+ ArraySubscriptSource.push(node);
+ ArraySubscriptSource.pop();
+ };
+ // Check for "hard" errors in array subscripts. These are forbidden even in lax mode
+ // NOTE: We intentionally execute subscripts expressions for each array in the input
+ // because they can contain `last` keyword which value is different for each array
+ TVector<TArraySubscript> subscripts;
+ TMaybe<TIssue> error = EnsureArraySubscripts(item, subscripts);
+ if (error) {
+ return std::move(*error);
+ }
+ const ui64 arraySize = node.GetSize();
+ for (const auto& idx : subscripts) {
+ // Check bounds for first subscript
+ if (idx.GetFrom() < 0 || idx.GetFrom() >= static_cast<i64>(arraySize)) {
+ if (IsStrict()) {
+ return MakeError(idx.GetFromPos(), TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS, "Array index out of bounds");
+ } else {
+ continue;
+ }
+ }
+ // If there is no second subcripts, just return corresponding array element
+ if (!idx.IsRange()) {
+ result.push_back(node.GetElement(idx.GetFrom()));
+ continue;
+ }
+ // Check bounds for second subscript
+ if (idx.GetTo() < 0 || idx.GetTo() >= static_cast<i64>(arraySize)) {
+ if (IsStrict()) {
+ return MakeError(idx.GetToPos(), TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS, "Array index out of bounds");
+ } else {
+ continue;
+ }
+ }
+ // In strict mode invalid ranges are forbidden
+ if (idx.GetFrom() > idx.GetTo() && IsStrict()) {
+ return MakeError(idx.GetFromPos(), TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX_RANGE, "Range lower bound is greater than upper bound");
+ }
+ for (i64 i = idx.GetFrom(); i <= idx.GetTo(); i++) {
+ result.push_back(node.GetElement(i));
+ }
+ }
+ }
+ return std::move(result);
+TResult TExecutor::WildcardArrayAccess(const TJsonPathItem& item) {
+ const auto input = Execute(Reader.ReadInput(item));
+ if (input.IsError()) {
+ return input;
+ }
+ TJsonNodes result;
+ for (const auto& node : OptionalArrayWrapNodes(input.GetNodes())) {
+ if (!node.IsArray()) {
+ return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_ARRAY, "Expected array");
+ }
+ auto it = node.GetArrayIterator();
+ TValue value;
+ while (it.Next(value)) {
+ result.push_back(value);
+ }
+ }
+ return std::move(result);
+TResult TExecutor::UnaryArithmeticOp(const TJsonPathItem& item) {
+ const auto& operandItem = Reader.ReadInput(item);
+ const auto operandsResult = Execute(operandItem);
+ if (operandsResult.IsError()) {
+ return operandsResult;
+ }
+ const auto& operands = operandsResult.GetNodes();
+ TJsonNodes result;
+ result.reserve(operands.size());
+ for (const auto& operand : operands) {
+ if (!operand.IsNumber()) {
+ return MakeError(
+ TStringBuilder() << "Unsupported type for unary operations"
+ );
+ }
+ if (item.Type == EJsonPathItemType::UnaryPlus) {
+ result.push_back(operand);
+ continue;
+ }
+ const auto value = operand.GetNumber();
+ result.push_back(TValue(MakeDouble(-value)));
+ }
+ return std::move(result);
+TMaybe<TIssue> TExecutor::EnsureBinaryArithmeticOpArgument(TPosition pos, const TJsonNodes& nodes, double& result) {
+ if (nodes.size() != 1) {
+ return MakeError(pos, TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT, "Expected exactly 1 item as an operand for binary operation");
+ }
+ const auto& value = nodes[0];
+ if (!value.IsNumber()) {
+ return MakeError(
+ TStringBuilder() << "Unsupported type for binary operations"
+ );
+ }
+ result = value.GetNumber();
+ return Nothing();
+TResult TExecutor::BinaryArithmeticOp(const TJsonPathItem& item) {
+ const auto& leftItem = Reader.ReadLeftOperand(item);
+ const auto leftResult = Execute(leftItem);
+ if (leftResult.IsError()) {
+ return leftResult;
+ }
+ double left = 0;
+ TMaybe<TIssue> error = EnsureBinaryArithmeticOpArgument(leftItem.Pos, leftResult.GetNodes(), left);
+ if (error) {
+ return std::move(*error);
+ }
+ const auto& rightItem = Reader.ReadRightOperand(item);
+ const auto rightResult = Execute(rightItem);
+ if (rightResult.IsError()) {
+ return rightResult;
+ }
+ double right = 0;
+ error = EnsureBinaryArithmeticOpArgument(rightItem.Pos, rightResult.GetNodes(), right);
+ if (error) {
+ return std::move(*error);
+ }
+ double result = 0;
+ switch (item.Type) {
+ case EJsonPathItemType::BinaryAdd:
+ result = left + right;
+ break;
+ case EJsonPathItemType::BinarySubstract:
+ result = left - right;
+ break;
+ case EJsonPathItemType::BinaryMultiply:
+ result = left * right;
+ break;
+ case EJsonPathItemType::BinaryDivide:
+ if (IsZero(right)) {
+ return MakeError(rightItem, TIssuesIds::JSONPATH_DIVISION_BY_ZERO, "Division by zero");
+ }
+ result = left / right;
+ break;
+ case EJsonPathItemType::BinaryModulo:
+ if (IsZero(right)) {
+ return MakeError(rightItem, TIssuesIds::JSONPATH_DIVISION_BY_ZERO, "Division by zero");
+ }
+ result = std::fmod(left, right);
+ break;
+ default:
+ YQL_ENSURE(false, "Expected binary arithmetic operation");
+ }
+ if (Y_UNLIKELY(std::isinf(result))) {
+ return MakeError(item, TIssuesIds::JSONPATH_BINARY_OPERATION_RESULT_INFINITY, "Binary operation result is infinity");
+ }
+ return TJsonNodes({TValue(MakeDouble(result))});
+TMaybe<TIssue> TExecutor::EnsureLogicalOpArgument(TPosition pos, const TJsonNodes& nodes, TMaybe<bool>& result) {
+ if (nodes.size() != 1) {
+ return MakeError(pos, TIssuesIds::JSONPATH_INVALID_LOGICAL_OPERATION_ARGUMENT, "Expected exactly 1 item as an operand for logical operation");
+ }
+ const auto& value = nodes[0];
+ if (value.IsNull()) {
+ result = Nothing();
+ } else if (value.IsBool()) {
+ result = value.GetBool();
+ } else {
+ return MakeError(pos, TIssuesIds::JSONPATH_INVALID_LOGICAL_OPERATION_ARGUMENT, "Unsupported type for logical operation");
+ }
+ return Nothing();
+TResult TExecutor::BinaryLogicalOp(const TJsonPathItem& item) {
+ const auto& leftItem = Reader.ReadLeftOperand(item);
+ const auto leftResult = Execute(leftItem);
+ if (leftResult.IsError()) {
+ return leftResult;
+ }
+ TMaybe<bool> left;
+ TMaybe<TIssue> error = EnsureLogicalOpArgument(leftItem.Pos, leftResult.GetNodes(), left);
+ if (error) {
+ return std::move(*error);
+ }
+ const auto& rightItem = Reader.ReadRightOperand(item);
+ const auto rightResult = Execute(rightItem);
+ if (rightResult.IsError()) {
+ return rightResult;
+ }
+ TMaybe<bool> right;
+ error = EnsureLogicalOpArgument(rightItem.Pos, rightResult.GetNodes(), right);
+ if (error) {
+ return std::move(*error);
+ }
+ switch (item.Type) {
+ case EJsonPathItemType::BinaryAnd: {
+ /*
+ AND truth table (taken from SQL JSON standard)
+ | && | true | false | null |
+ | ----- | ----- | ----- | ----- |
+ | true | true | false | null |
+ | false | false | false | false |
+ | null | null | false | null |
+ */
+ if (left.Defined() && right.Defined()) {
+ return TJsonNodes({TValue(MakeBool(*left && *right))});
+ }
+ const bool falseVsNull = !left.GetOrElse(true) && !right.Defined();
+ const bool nullVsFalse = !right.GetOrElse(true) && !left.Defined();
+ if (falseVsNull || nullVsFalse) {
+ return TJsonNodes({TValue(MakeBool(false))});
+ }
+ return TJsonNodes({TValue(MakeEntity())});
+ }
+ case EJsonPathItemType::BinaryOr: {
+ /*
+ OR truth table (taken from SQL JSON standard)
+ | || | true | false | null |
+ | ----- | ----- | ----- | ----- |
+ | true | true | true | true |
+ | false | true | false | null |
+ | null | true | null | null |
+ */
+ if (left.Defined() && right.Defined()) {
+ return TJsonNodes({TValue(MakeBool(*left || *right))});
+ }
+ const bool trueVsNull = left.GetOrElse(false) && !right.Defined();
+ const bool nullVsTrue = right.GetOrElse(false) && !left.Defined();
+ if (trueVsNull || nullVsTrue) {
+ return TJsonNodes({TValue(MakeBool(true))});
+ }
+ return TJsonNodes({TValue(MakeEntity())});
+ }
+ default:
+ YQL_ENSURE(false, "Expected binary logical operation");
+ }
+TResult TExecutor::UnaryLogicalOp(const TJsonPathItem& item) {
+ /*
+ NOT truth table (taken from SQL JSON standard)
+ | x | !x |
+ | ----- | ----- |
+ | true | false |
+ | false | true |
+ | null | null |
+ */
+ const auto& operandItem = Reader.ReadInput(item);
+ const auto operandResult = Execute(operandItem);
+ if (operandResult.IsError()) {
+ return operandResult;
+ }
+ TMaybe<bool> operand;
+ TMaybe<TIssue> error = EnsureLogicalOpArgument(operandItem.Pos, operandResult.GetNodes(), operand);
+ if (error) {
+ return std::move(*error);
+ }
+ if (!operand.Defined()) {
+ return TJsonNodes({TValue(MakeEntity())});
+ }
+ return TJsonNodes({TValue(MakeBool(!(*operand)))});
+TResult TExecutor::BooleanLiteral(const TJsonPathItem& item) {
+ return TJsonNodes({TValue(MakeBool(item.GetBoolean()))});
+TResult TExecutor::NullLiteral() {
+ return TJsonNodes({TValue(MakeEntity())});
+TResult TExecutor::StringLiteral(const TJsonPathItem& item) {
+ return TJsonNodes({TValue(MakeString(item.GetString(), ValueBuilder))});
+TMaybe<bool> TExecutor::CompareValues(const TValue& left, const TValue& right, EJsonPathItemType operation) {
+ if (IsObjectOrArray(left) || IsObjectOrArray(right)) {
+ // Comparisons of objects and arrays are prohibited
+ return Nothing();
+ }
+ if (left.IsNull() && right.IsNull()) {
+ // null == null is true, but all other comparisons are false
+ return operation == EJsonPathItemType::BinaryEqual;
+ }
+ if (left.IsNull() || right.IsNull()) {
+ // All operations between null and non-null are false
+ return false;
+ }
+ auto doCompare = [&operation](const auto& left, const auto& right) {
+ switch (operation) {
+ case EJsonPathItemType::BinaryEqual:
+ return left == right;
+ case EJsonPathItemType::BinaryNotEqual:
+ return left != right;
+ case EJsonPathItemType::BinaryLess:
+ return left < right;
+ case EJsonPathItemType::BinaryLessEqual:
+ return left <= right;
+ case EJsonPathItemType::BinaryGreater:
+ return left > right;
+ case EJsonPathItemType::BinaryGreaterEqual:
+ return left >= right;
+ default:
+ YQL_ENSURE(false, "Expected compare operation");
+ }
+ };
+ if (left.IsBool() && right.IsBool()) {
+ return doCompare(left.GetBool(), right.GetBool());
+ } else if (left.IsString() && right.IsString()) {
+ // NOTE: Strings are compared as byte arrays.
+ // YQL does the same thing for UTF-8 strings and according to SQL/JSON
+ // standard JsonPath must use the same semantics.
+ //
+ // However this is not correct in logical meaning. Let us consider strings:
+ // Even though these two strings are different byte sequences, they are identical
+ // from UTF-8 perspective.
+ return doCompare(left.GetString(), right.GetString());
+ }
+ if (!left.IsNumber() || !right.IsNumber()) {
+ return Nothing();
+ }
+ const auto leftNumber = left.GetNumber();
+ const auto rightNumber = right.GetNumber();
+ switch (operation) {
+ case EJsonPathItemType::BinaryEqual:
+ return IsEqual(leftNumber, rightNumber);
+ case EJsonPathItemType::BinaryNotEqual:
+ return !IsEqual(leftNumber, rightNumber);
+ case EJsonPathItemType::BinaryLess:
+ return IsLess(leftNumber, rightNumber);
+ case EJsonPathItemType::BinaryLessEqual:
+ return !IsGreater(leftNumber, rightNumber);
+ case EJsonPathItemType::BinaryGreater:
+ return IsGreater(leftNumber, rightNumber);
+ case EJsonPathItemType::BinaryGreaterEqual:
+ return !IsLess(leftNumber, rightNumber);
+ default:
+ YQL_ENSURE(false, "Expected compare operation");
+ }
+TResult TExecutor::CompareOp(const TJsonPathItem& item) {
+ const auto& leftItem = Reader.ReadLeftOperand(item);
+ const auto leftResult = Execute(leftItem);
+ if (leftResult.IsError()) {
+ return TJsonNodes({TValue(MakeEntity())});
+ }
+ const auto& rightItem = Reader.ReadRightOperand(item);
+ const auto rightResult = Execute(rightItem);
+ if (rightResult.IsError()) {
+ return TJsonNodes({TValue(MakeEntity())});
+ }
+ const auto leftNodes = OptionalUnwrapArrays(leftResult.GetNodes());
+ const auto rightNodes = OptionalUnwrapArrays(rightResult.GetNodes());
+ bool error = false;
+ bool found = false;
+ for (const auto& left : leftNodes) {
+ for (const auto& right : rightNodes) {
+ const auto result = CompareValues(left, right, item.Type);
+ if (!result.Defined()) {
+ error = true;
+ } else {
+ found |= *result;
+ }
+ if (IsLax() && (error || found)) {
+ break;
+ }
+ }
+ if (IsLax() && (error || found)) {
+ break;
+ }
+ }
+ if (error) {
+ return TJsonNodes({TValue(MakeEntity())});
+ }
+ return TJsonNodes({TValue(MakeBool(found))});
+TResult TExecutor::FilterObject(const TJsonPathItem& item) {
+ if (CurrentFilterObject.empty()) {
+ return MakeError(item, TIssuesIds::JSONPATH_FILTER_OBJECT_OUTSIDE_OF_FILTER, "'@' is only allowed inside filters");
+ }
+ return TJsonNodes({CurrentFilterObject.top()});
+TResult TExecutor::FilterPredicate(const TJsonPathItem& item) {
+ const auto input = Execute(Reader.ReadInput(item));
+ if (input.IsError()) {
+ return input;
+ }
+ const auto& predicateItem = Reader.ReadFilterPredicate(item);
+ TJsonNodes result;
+ for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) {
+ CurrentFilterObject.push(node);
+ CurrentFilterObject.pop();
+ };
+ const auto predicateResult = Execute(predicateItem);
+ if (predicateResult.IsError()) {
+ continue;
+ }
+ const auto& predicateNodes = predicateResult.GetNodes();
+ if (predicateNodes.size() != 1) {
+ continue;
+ }
+ const auto& value = predicateNodes[0];
+ if (value.IsBool() && value.GetBool()) {
+ result.push_back(node);
+ continue;
+ }
+ }
+ return std::move(result);
+TResult TExecutor::NumericMethod(const TJsonPathItem& item) {
+ const auto& input = Execute(Reader.ReadInput(item));
+ if (input.IsError()) {
+ return input;
+ }
+ TJsonNodes result;
+ for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) {
+ if (!node.IsNumber()) {
+ return MakeError(item, TIssuesIds::JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT, "Unsupported type for numeric method");
+ }
+ double applied = node.GetNumber();
+ switch (item.Type) {
+ case EJsonPathItemType::AbsMethod:
+ applied = std::fabs(applied);
+ break;
+ case EJsonPathItemType::FloorMethod:
+ applied = std::floor(applied);
+ break;
+ case EJsonPathItemType::CeilingMethod:
+ applied = std::ceil(applied);
+ break;
+ default:
+ YQL_ENSURE(false, "Expected numeric method");
+ }
+ result.push_back(TValue(MakeDouble(applied)));
+ }
+ return std::move(result);
+TResult TExecutor::DoubleMethod(const TJsonPathItem& item) {
+ const auto& input = Execute(Reader.ReadInput(item));
+ if (input.IsError()) {
+ return input;
+ }
+ TJsonNodes result;
+ for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) {
+ if (!node.IsString()) {
+ return MakeError(item, TIssuesIds::JSONPATH_INVALID_DOUBLE_METHOD_ARGUMENT, "Unsupported type for double() method");
+ }
+ const double parsed = ParseDouble(node.GetString());
+ if (std::isnan(parsed)) {
+ return MakeError(item, TIssuesIds::JSONPATH_INVALID_NUMBER_STRING, "Error parsing number from string");
+ }
+ if (std::isinf(parsed)) {
+ return MakeError(item, TIssuesIds::JSONPATH_INFINITE_NUMBER_STRING, "Parsed number is infinity");
+ }
+ result.push_back(TValue(MakeDouble(parsed)));
+ }
+ return std::move(result);
+TResult TExecutor::TypeMethod(const TJsonPathItem& item) {
+ const auto& input = Execute(Reader.ReadInput(item));
+ if (input.IsError()) {
+ return input;
+ }
+ TJsonNodes result;
+ for (const auto& node : input.GetNodes()) {
+ TStringBuf type;
+ switch (node.GetType()) {
+ case EValueType::Null:
+ type = "null";
+ break;
+ case EValueType::Bool:
+ type = "boolean";
+ break;
+ case EValueType::Number:
+ type = "number";
+ break;
+ case EValueType::String:
+ type = "string";
+ break;
+ case EValueType::Array:
+ type = "array";
+ break;
+ case EValueType::Object:
+ type = "object";
+ break;
+ }
+ result.push_back(TValue(MakeString(type, ValueBuilder)));
+ }
+ return std::move(result);
+TResult TExecutor::SizeMethod(const TJsonPathItem& item) {
+ const auto& input = Execute(Reader.ReadInput(item));
+ if (input.IsError()) {
+ return input;
+ }
+ TJsonNodes result;
+ for (const auto& node : input.GetNodes()) {
+ ui64 size = 1;
+ if (node.IsArray()) {
+ size = node.GetSize();
+ }
+ result.push_back(TValue(MakeDouble(static_cast<double>(size))));
+ }
+ return std::move(result);
+TResult TExecutor::KeyValueMethod(const TJsonPathItem& item) {
+ const auto& input = Execute(Reader.ReadInput(item));
+ if (input.IsError()) {
+ return input;
+ }
+ TJsonNodes result;
+ TPair row[2];
+ TPair& nameEntry = row[0];
+ TPair& valueEntry = row[1];
+ for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) {
+ if (!node.IsObject()) {
+ return MakeError(item, TIssuesIds::JSONPATH_INVALID_KEYVALUE_METHOD_ARGUMENT, "Unsupported type for keyvalue() method");
+ }
+ TValue key;
+ TValue value;
+ auto it = node.GetObjectIterator();
+ while (it.Next(key, value)) {
+ nameEntry.first = MakeString("name", ValueBuilder);
+ nameEntry.second = key.ConvertToUnboxedValue(ValueBuilder);
+ valueEntry.first = MakeString("value", ValueBuilder);
+ valueEntry.second = value.ConvertToUnboxedValue(ValueBuilder);
+ result.push_back(TValue(MakeDict(row, 2)));
+ }
+ }
+ return std::move(result);
+TResult TExecutor::StartsWithPredicate(const TJsonPathItem& item) {
+ const auto& input = Execute(Reader.ReadInput(item));
+ if (input.IsError()) {
+ return input;
+ }
+ const auto& inputNodes = input.GetNodes();
+ if (inputNodes.size() != 1) {
+ return MakeError(item, TIssuesIds::JSONPATH_INVALID_STARTS_WITH_ARGUMENT, "Expected exactly 1 item as input argument for starts with predicate");
+ }
+ const auto& inputString = inputNodes[0];
+ if (!inputString.IsString()) {
+ return MakeError(item, TIssuesIds::JSONPATH_INVALID_STARTS_WITH_ARGUMENT, "Type of input argument for starts with predicate must be string");
+ }
+ const auto prefix = Execute(Reader.ReadPrefix(item));
+ if (prefix.IsError()) {
+ return prefix;
+ }
+ bool error = false;
+ bool found = false;
+ for (const auto& node : prefix.GetNodes()) {
+ if (node.IsString()) {
+ found |= inputString.GetString().StartsWith(node.GetString());
+ } else {
+ error = true;
+ }
+ if (IsLax() && (found || error)) {
+ break;
+ }
+ }
+ if (error) {
+ return TJsonNodes({TValue(MakeEntity())});
+ }
+ return TJsonNodes({TValue(MakeBool(found))});
+TResult TExecutor::IsUnknownPredicate(const TJsonPathItem& item) {
+ const auto input = Execute(Reader.ReadInput(item));
+ if (input.IsError()) {
+ return input;
+ }
+ const auto& nodes = input.GetNodes();
+ if (nodes.size() != 1) {
+ return MakeError(item, TIssuesIds::JSONPATH_INVALID_IS_UNKNOWN_ARGUMENT, "Expected exactly 1 item as an argument for is unknown predicate");
+ }
+ const auto& node = nodes[0];
+ if (node.IsNull()) {
+ return TJsonNodes({TValue(MakeBool(true))});
+ }
+ if (!node.IsBool()) {
+ return MakeError(item, TIssuesIds::JSONPATH_INVALID_IS_UNKNOWN_ARGUMENT, "is unknown predicate supports only bool and null types for its argument");
+ }
+ return TJsonNodes({TValue(MakeBool(false))});
+TResult TExecutor::ExistsPredicate(const TJsonPathItem& item) {
+ const auto input = Execute(Reader.ReadInput(item));
+ if (input.IsError()) {
+ return TJsonNodes({TValue(MakeEntity())});
+ }
+ const auto& nodes = input.GetNodes();
+ return TJsonNodes({TValue(MakeBool(!nodes.empty()))});
+TResult TExecutor::LikeRegexPredicate(const TJsonPathItem& item) {
+ const auto input = Execute(Reader.ReadInput(item));
+ if (input.IsError()) {
+ return input;
+ }
+ const auto& regex = item.GetRegex();
+ bool error = false;
+ bool found = false;
+ for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) {
+ if (node.IsString()) {
+ found |= regex->Matches(node.GetString());
+ } else {
+ error = true;
+ }
+ if (IsLax() && (found || error)) {
+ break;
+ }
+ }
+ if (error) {
+ return TJsonNodes({TValue(MakeEntity())});
+ }
+ return TJsonNodes({TValue(MakeBool(found))});
+TJsonNodes TExecutor::OptionalUnwrapArrays(const TJsonNodes& input) {
+ if (IsStrict()) {
+ return input;
+ }
+ TJsonNodes result;
+ for (const auto& node : input) {
+ if (!node.IsArray()) {
+ result.push_back(node);
+ continue;
+ }
+ auto it = node.GetArrayIterator();
+ TValue value;
+ while (it.Next(value)) {
+ result.push_back(value);
+ }
+ }
+ return result;
+TJsonNodes TExecutor::OptionalArrayWrapNodes(const TJsonNodes& input) {
+ if (IsStrict()) {
+ return input;
+ }
+ TJsonNodes result;
+ for (const auto& node : input) {
+ if (node.IsArray()) {
+ result.push_back(node);
+ continue;
+ }
+ TUnboxedValue nodeCopy(node.ConvertToUnboxedValue(ValueBuilder));
+ result.push_back(TValue(MakeList(&nodeCopy, 1, ValueBuilder)));
+ }
+ return result;
diff --git a/yql/essentials/minikql/jsonpath/executor.h b/yql/essentials/minikql/jsonpath/executor.h
new file mode 100644
index 0000000000..9b80a21133
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/executor.h
@@ -0,0 +1,198 @@
+#pragma once
+#include "binary.h"
+#include "value.h"
+#include <yql/essentials/public/issue/yql_issue.h>
+#include <yql/essentials/utils/yql_panic.h>
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_allocator.h>
+#include <library/cpp/json/json_value.h>
+#include <library/cpp/containers/stack_vector/stack_vec.h>
+#include <util/generic/ptr.h>
+#include <util/generic/stack.h>
+#include <util/generic/hash.h>
+#include <util/generic/maybe.h>
+#include <variant>
+namespace NYql::NJsonPath {
+using TJsonNodes = TSmallVec<TValue>;
+class TResult {
+ TResult(TJsonNodes&& nodes);
+ TResult(const TJsonNodes& nodes);
+ TResult(TIssue&& issue);
+ const TJsonNodes& GetNodes() const;
+ TJsonNodes& GetNodes();
+ const TIssue& GetError() const;
+ bool IsError() const;
+ std::variant<TJsonNodes, TIssue> Result;
+class TArraySubscript {
+ TArraySubscript(i64 from, TPosition fromPos)
+ : From(from)
+ , FromPos(fromPos)
+ , HasTo(false)
+ {
+ }
+ TArraySubscript(i64 from, TPosition fromPos, i64 to, TPosition toPos)
+ : From(from)
+ , FromPos(fromPos)
+ , To(to)
+ , ToPos(toPos)
+ , HasTo(true)
+ {
+ }
+ i64 GetFrom() const {
+ return From;
+ }
+ TPosition GetFromPos() const {
+ return FromPos;
+ }
+ i64 GetTo() const {
+ YQL_ENSURE(IsRange());
+ return To;
+ }
+ TPosition GetToPos() const {
+ return ToPos;
+ }
+ bool IsRange() const {
+ return HasTo;
+ }
+ i64 From = 0;
+ TPosition FromPos;
+ i64 To = 0;
+ TPosition ToPos;
+ bool HasTo;
+using TVariablesMap = THashMap<TString, TValue>;
+class TExecutor {
+ TExecutor(
+ const TJsonPathPtr path,
+ const TJsonNodes& input,
+ const TVariablesMap& variables,
+ const NUdf::IValueBuilder* valueBuilder);
+ TResult Execute();
+ constexpr static double EPSILON = 1e-20;
+ static bool IsZero(double value);
+ static bool IsEqual(double a, double b);
+ static bool IsLess(double a, double b);
+ static bool IsGreater(double a, double b);
+ bool IsStrict() const;
+ bool IsLax() const;
+ TResult Execute(const TJsonPathItem& item);
+ TResult ContextObject();
+ TResult Variable(const TJsonPathItem& item);
+ TResult LastArrayIndex(const TJsonPathItem& item);
+ TResult NumberLiteral(const TJsonPathItem& item);
+ TResult MemberAccess(const TJsonPathItem& item);
+ TResult WildcardMemberAccess(const TJsonPathItem& item);
+ TMaybe<TIssue> EnsureSingleSubscript(TPosition pos, const TJsonNodes& index, i64& result);
+ TMaybe<TIssue> EnsureArraySubscripts(const TJsonPathItem& item, TVector<TArraySubscript>& result);
+ TResult ArrayAccess(const TJsonPathItem& item);
+ TResult WildcardArrayAccess(const TJsonPathItem& item);
+ TResult UnaryArithmeticOp(const TJsonPathItem& item);
+ TMaybe<TIssue> EnsureBinaryArithmeticOpArgument(TPosition pos, const TJsonNodes& nodes, double& result);
+ TResult BinaryArithmeticOp(const TJsonPathItem& item);
+ TMaybe<TIssue> EnsureLogicalOpArgument(TPosition pos, const TJsonNodes& nodes, TMaybe<bool>& result);
+ TResult BinaryLogicalOp(const TJsonPathItem& item);
+ TResult UnaryLogicalOp(const TJsonPathItem& item);
+ TResult BooleanLiteral(const TJsonPathItem& item);
+ TResult NullLiteral();
+ TResult StringLiteral(const TJsonPathItem& item);
+ TMaybe<bool> CompareValues(const TValue& left, const TValue& right, EJsonPathItemType operation);
+ TResult CompareOp(const TJsonPathItem& item);
+ TResult FilterObject(const TJsonPathItem& item);
+ TResult FilterPredicate(const TJsonPathItem& item);
+ TResult NumericMethod(const TJsonPathItem& item);
+ TResult DoubleMethod(const TJsonPathItem& item);
+ TResult TypeMethod(const TJsonPathItem& item);
+ TResult SizeMethod(const TJsonPathItem& item);
+ TResult KeyValueMethod(const TJsonPathItem& item);
+ TResult StartsWithPredicate(const TJsonPathItem& item);
+ TResult IsUnknownPredicate(const TJsonPathItem& item);
+ TResult ExistsPredicate(const TJsonPathItem& item);
+ TResult LikeRegexPredicate(const TJsonPathItem& item);
+ TJsonNodes OptionalUnwrapArrays(const TJsonNodes& input);
+ TJsonNodes OptionalArrayWrapNodes(const TJsonNodes& input);
+ TStack<TValue> ArraySubscriptSource;
+ TStack<TValue> CurrentFilterObject;
+ TJsonPathReader Reader;
+ TJsonNodes Input;
+ const TVariablesMap& Variables;
+ const NUdf::IValueBuilder* ValueBuilder;
diff --git a/yql/essentials/minikql/jsonpath/jsonpath.cpp b/yql/essentials/minikql/jsonpath/jsonpath.cpp
new file mode 100644
index 0000000000..c48bb08cf1
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/jsonpath.cpp
@@ -0,0 +1,129 @@
+#include "jsonpath.h"
+#include "binary.h"
+#include "ast_builder.h"
+#include "executor.h"
+#include "type_check.h"
+#include "value.h"
+#include <yql/essentials/core/issue/protos/issue_id.pb.h>
+#include <yql/essentials/parser/proto_ast/gen/jsonpath/JsonPathLexer.h>
+#include <yql/essentials/parser/proto_ast/gen/jsonpath/JsonPathParser.h>
+#include <yql/essentials/parser/proto_ast/gen/jsonpath/JsonPathParser.pb.h>
+#include <yql/essentials/parser/proto_ast/antlr3/proto_ast_antlr3.h>
+#include <google/protobuf/message.h>
+#include <util/string/strip.h>
+#if defined(_tsan_enabled_)
+#include <util/system/mutex.h>
+using namespace NYql;
+using namespace NYql::NUdf;
+using namespace NJson;
+namespace {
+#if defined(_tsan_enabled_)
+TMutex SanitizerJsonPathTranslationMutex;
+class TParseErrorsCollector : public NProtoAST::IErrorCollector {
+ TParseErrorsCollector(TIssues& issues, size_t maxErrors)
+ : IErrorCollector(maxErrors)
+ , Issues(issues)
+ {
+ }
+ void AddError(ui32 line, ui32 column, const TString& message) override {
+ Issues.AddIssue(TPosition(column, line, "jsonpath"), StripString(message));
+ Issues.back().SetCode(TIssuesIds::JSONPATH_PARSE_ERROR, TSeverityIds::S_ERROR);
+ }
+ TIssues& Issues;
+namespace NYql::NJsonPath {
+const TAstNodePtr ParseJsonPathAst(const TStringBuf path, TIssues& issues, size_t maxParseErrors) {
+ if (!IsUtf(path)) {
+ issues.AddIssue(TPosition(1, 1, "jsonpath"), "JsonPath must be UTF-8 encoded string");
+ issues.back().SetCode(TIssuesIds::JSONPATH_PARSE_ERROR, TSeverityIds::S_ERROR);
+ return {};
+ }
+ google::protobuf::Arena arena;
+ const google::protobuf::Message* rawAst = nullptr;
+ {
+ #if defined(_tsan_enabled_)
+ TGuard<TMutex> guard(SanitizerJsonPathTranslationMutex);
+ #endif
+ NProtoAST::TProtoASTBuilder3<NALP::JsonPathParser, NALP::JsonPathLexer> builder(path, "JsonPath", &arena);
+ TParseErrorsCollector collector(issues, maxParseErrors);
+ rawAst = builder.BuildAST(collector);
+ }
+ if (rawAst == nullptr) {
+ return nullptr;
+ }
+ const google::protobuf::Descriptor* descriptor = rawAst->GetDescriptor();
+ if (descriptor && descriptor->name() != "TJsonPathParserAST") {
+ return nullptr;
+ }
+ const auto* protoAst = static_cast<const NJsonPathGenerated::TJsonPathParserAST*>(rawAst);
+ TAstBuilder astBuilder(issues);
+ TAstNodePtr ast = astBuilder.Build(*protoAst);
+ if (!issues.Empty()) {
+ return nullptr;
+ }
+ // At this point AST is guaranteed to be valid. We return it even if
+ // type checker finds some logical errors.
+ TJsonPathTypeChecker checker(issues);
+ ast->Accept(checker);
+ return ast;
+const TJsonPathPtr PackBinaryJsonPath(const TAstNodePtr ast) {
+ TJsonPathBuilder builder;
+ ast->Accept(builder);
+ return builder.ShrinkAndGetResult();
+const TJsonPathPtr ParseJsonPath(const TStringBuf path, TIssues& issues, size_t maxParseErrors) {
+ const auto ast = ParseJsonPathAst(path, issues, maxParseErrors);
+ if (!issues.Empty()) {
+ return {};
+ }
+ return PackBinaryJsonPath(ast);
+TResult ExecuteJsonPath(
+ const TJsonPathPtr jsonPath,
+ const TValue& json,
+ const TVariablesMap& variables,
+ const NUdf::IValueBuilder* valueBuilder) {
+ TExecutor executor(jsonPath, {json}, variables, valueBuilder);
+ return executor.Execute();
+TVariablesMap DictToVariables(const NUdf::TUnboxedValue& dict) {
+ TVariablesMap variables;
+ TUnboxedValue key;
+ TUnboxedValue payload;
+ auto it = dict.GetDictIterator();
+ while (it.NextPair(key, payload)) {
+ variables[key.AsStringRef()] = TValue(payload);
+ }
+ return variables;
diff --git a/yql/essentials/minikql/jsonpath/jsonpath.h b/yql/essentials/minikql/jsonpath/jsonpath.h
new file mode 100644
index 0000000000..455739b005
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/jsonpath.h
@@ -0,0 +1,24 @@
+#pragma once
+#include "executor.h"
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+namespace NYql::NJsonPath {
+const TAstNodePtr ParseJsonPathAst(const TStringBuf path, TIssues& issues, size_t maxParseErrors);
+const TJsonPathPtr PackBinaryJsonPath(const TAstNodePtr ast, TIssues& issues);
+const TJsonPathPtr ParseJsonPath(const TStringBuf path, TIssues& issues, size_t maxParseErrors);
+TVariablesMap DictToVariables(const NUdf::TUnboxedValue& dict);
+TResult ExecuteJsonPath(
+ const TJsonPathPtr jsonPath,
+ const TValue& json,
+ const TVariablesMap& variables,
+ const NUdf::IValueBuilder* valueBuilder);
diff --git a/yql/essentials/minikql/jsonpath/parse_double.cpp b/yql/essentials/minikql/jsonpath/parse_double.cpp
new file mode 100644
index 0000000000..f20476bfe9
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/parse_double.cpp
@@ -0,0 +1,33 @@
+#include "parse_double.h"
+#include <contrib/libs/double-conversion/double-conversion/double-conversion.h>
+#include <cmath>
+namespace NYql::NJsonPath {
+using double_conversion::StringToDoubleConverter;
+double ParseDouble(const TStringBuf literal) {
+ // FromString<double> from util/string/cast.h is permissive to junk in string.
+ // In our case junk in string means bug in grammar.
+ // See https://a.yandex-team.ru/arc/trunk/arcadia/util/string/cast.cpp?rev=6456750#L692
+ struct TStringToNumberConverter: public StringToDoubleConverter {
+ inline TStringToNumberConverter()
+ : StringToDoubleConverter(
+ /* empty_string_value */ 0.0,
+ /* junk_string_value */ NAN,
+ /* infinity_symbol */ nullptr,
+ /* nan_symbol */ nullptr
+ )
+ {
+ }
+ };
+ int parsedCharactersCount = 0;
+ return Singleton<TStringToNumberConverter>()->StringToDouble(literal.data(), literal.length(), &parsedCharactersCount);
diff --git a/yql/essentials/minikql/jsonpath/parse_double.h b/yql/essentials/minikql/jsonpath/parse_double.h
new file mode 100644
index 0000000000..8481bf7e82
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/parse_double.h
@@ -0,0 +1,10 @@
+#include <util/generic/string.h>
+namespace NYql::NJsonPath {
+// Parses double literal. Respects exponential format like `-23.5e-10`.
+// On parsing error returns NaN double value (can be checked using `std::isnan`).
+// On double overflow returns INF double value (can be checked using `std::isinf`).
+double ParseDouble(const TStringBuf literal);
+} \ No newline at end of file
diff --git a/yql/essentials/minikql/jsonpath/rewrapper/dispatcher.cpp b/yql/essentials/minikql/jsonpath/rewrapper/dispatcher.cpp
new file mode 100644
index 0000000000..da670f4485
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/rewrapper/dispatcher.cpp
@@ -0,0 +1,67 @@
+#include "registrator.h"
+#include "re.h"
+#include <util/generic/fwd.h>
+#include <util/generic/vector.h>
+#include <util/generic/singleton.h>
+#include <util/generic/yexception.h>
+#include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h>
+namespace NReWrapper {
+namespace NRegistrator {
+struct TLib {
+ ui64 Id;
+ TCompiler Compiler;
+ TDeserializer Deserializer;
+using TModules = TVector<TLib>;
+TModules* GetModules() {
+ return Singleton<TModules>();
+void AddLibrary(ui32 id, TCompiler compiler, TDeserializer deserializer) {
+ Y_ABORT_UNLESS(id > 0);
+ if (GetModules()->size() < id) {
+ GetModules()->resize(id);
+ }
+ GetModules()->at(id - 1) = TLib{id, compiler, deserializer};
+namespace NDispatcher {
+void ThrowOnOutOfRange(ui32 id) {
+ if (NRegistrator::GetModules()->size() < id || id == 0) {
+ ythrow yexception()
+ << "Libs with id: " << id
+ << " was not found. Total added libs: " << NRegistrator::GetModules()->size();
+ }
+IRePtr Deserialize(const TStringBuf& serializedRegex) {
+ TSerialization proto;
+ TString str(serializedRegex);
+ auto res = proto.ParseFromString(str);
+ if (!res) {
+ proto.SetHyperscan(str);
+ }
+ ui64 id = (ui64)proto.GetDataCase();;
+ ThrowOnOutOfRange(id);
+ return NRegistrator::GetModules()->at(id - 1).Deserializer(proto);
+IRePtr Compile(const TStringBuf& regex, unsigned int flags, ui32 id) {
+ ThrowOnOutOfRange(id);
+ return NRegistrator::GetModules()->at(id - 1).Compiler(regex, flags);
diff --git a/yql/essentials/minikql/jsonpath/rewrapper/hyperscan/hyperscan.cpp b/yql/essentials/minikql/jsonpath/rewrapper/hyperscan/hyperscan.cpp
new file mode 100644
index 0000000000..2fc490b6f4
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/rewrapper/hyperscan/hyperscan.cpp
@@ -0,0 +1,69 @@
+#include <yql/essentials/minikql/jsonpath/rewrapper/re.h>
+#include <yql/essentials/minikql/jsonpath/rewrapper/registrator.h>
+#include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h>
+#include <library/cpp/regex/hyperscan/hyperscan.h>
+#include <util/charset/utf8.h>
+namespace NReWrapper {
+namespace NHyperscan {
+namespace {
+class THyperscan : public IRe {
+ THyperscan(::NHyperscan::TDatabase&& db)
+ : Database(std::move(db))
+ { }
+ bool Matches(const TStringBuf& text) const override {
+ if (!Scratch) {
+ Scratch = ::NHyperscan::MakeScratch(Database);
+ }
+ return ::NHyperscan::Matches(Database, Scratch, text);
+ }
+ TString Serialize() const override {
+ // Compatibility with old versions
+ return ::NHyperscan::Serialize(Database);
+ * TSerialization proto;
+ * proto.SetHyperscan(::NHyperscan::Serialize(Database));
+ * TString data;
+ * auto res = proto.SerializeToString(&data);
+ * Y_ABORT_UNLESS(res);
+ * return data;
+ */
+ }
+ ::NHyperscan::TDatabase Database;
+ mutable ::NHyperscan::TScratch Scratch;
+IRePtr Compile(const TStringBuf& regex, unsigned int flags) {
+ unsigned int hyperscanFlags = 0;
+ try {
+ if (UTF8Detect(regex)) {
+ hyperscanFlags |= HS_FLAG_UTF8;
+ }
+ if (NX86::HaveAVX2()) {
+ hyperscanFlags |= HS_CPU_FEATURES_AVX2;
+ }
+ if (flags & FLAGS_CASELESS) {
+ hyperscanFlags |= HS_FLAG_CASELESS;
+ }
+ return std::make_unique<THyperscan>(::NHyperscan::Compile(regex, hyperscanFlags));
+ } catch (const ::NHyperscan::TCompileException& ex) {
+ ythrow TCompileException() << ex.what();
+ }
+IRePtr Deserialize(const TSerialization& proto) {
+ return std::make_unique<THyperscan>(::NHyperscan::Deserialize(proto.GetHyperscan()));
+REGISTER_RE_LIB(TSerialization::kHyperscan, Compile, Deserialize)
diff --git a/yql/essentials/minikql/jsonpath/rewrapper/hyperscan/ya.make b/yql/essentials/minikql/jsonpath/rewrapper/hyperscan/ya.make
new file mode 100644
index 0000000000..4cc999c064
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/rewrapper/hyperscan/ya.make
@@ -0,0 +1,13 @@
+ library/cpp/regex/hyperscan
+ yql/essentials/minikql/jsonpath/rewrapper
+ GLOBAL hyperscan.cpp
diff --git a/yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.proto b/yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.proto
new file mode 100644
index 0000000000..922ec74b26
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.proto
@@ -0,0 +1,15 @@
+syntax = "proto3";
+package NReWrapper;
+message TRe2Serialization {
+ string Regexp = 1;
+ uint64 Flags = 2;
+message TSerialization {
+ oneof Data {
+ bytes Hyperscan = 1;
+ TRe2Serialization Re2 = 2;
+ }
diff --git a/yql/essentials/minikql/jsonpath/rewrapper/proto/ya.make b/yql/essentials/minikql/jsonpath/rewrapper/proto/ya.make
new file mode 100644
index 0000000000..ca4cf4aae1
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/rewrapper/proto/ya.make
@@ -0,0 +1,9 @@
+ serialization.proto
diff --git a/yql/essentials/minikql/jsonpath/rewrapper/re.h b/yql/essentials/minikql/jsonpath/rewrapper/re.h
new file mode 100644
index 0000000000..3f564ad1ad
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/rewrapper/re.h
@@ -0,0 +1,31 @@
+#pragma once
+#include <memory>
+#include <util/generic/fwd.h>
+#include <util/generic/yexception.h>
+namespace NReWrapper {
+class TCompileException : public yexception {
+enum EFlags {
+class IRe {
+ virtual ~IRe() = default;
+ virtual bool Matches(const TStringBuf& text) const = 0;
+ virtual TString Serialize() const = 0;
+using IRePtr = std::unique_ptr<IRe>;
+namespace NDispatcher {
+ IRePtr Compile(const TStringBuf& regex, unsigned int flags, ui32 id);
+ IRePtr Deserialize(const TStringBuf& serializedRegex);
diff --git a/yql/essentials/minikql/jsonpath/rewrapper/re2/re2.cpp b/yql/essentials/minikql/jsonpath/rewrapper/re2/re2.cpp
new file mode 100644
index 0000000000..694472f632
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/rewrapper/re2/re2.cpp
@@ -0,0 +1,89 @@
+#include <yql/essentials/minikql/jsonpath/rewrapper/re.h>
+#include <yql/essentials/minikql/jsonpath/rewrapper/registrator.h>
+#include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h>
+#include <contrib/libs/re2/re2/re2.h>
+#include <util/charset/utf8.h>
+namespace NReWrapper {
+using namespace re2;
+namespace NRe2 {
+namespace {
+RE2::Options CreateOptions(const TStringBuf& regex, unsigned int flags) {
+ RE2::Options options;
+ bool needUtf8 = (UTF8Detect(regex) == UTF8);
+ options.set_encoding(
+ needUtf8
+ ? RE2::Options::Encoding::EncodingUTF8
+ : RE2::Options::Encoding::EncodingLatin1
+ );
+ options.set_case_sensitive(!(flags & FLAGS_CASELESS));
+ return options;
+class TRe2 : public IRe {
+ TRe2(const TStringBuf& regex, unsigned int flags)
+ : Regexp(StringPiece(regex.data(), regex.size()), CreateOptions(regex, flags))
+ {
+ auto re2 = RawRegexp.MutableRe2();
+ re2->set_regexp(TString(regex));
+ re2->set_flags(flags);
+ }
+ TRe2(const TSerialization& proto)
+ : Regexp(StringPiece(proto.GetRe2().GetRegexp().data(), proto.GetRe2().GetRegexp().size()),
+ CreateOptions(proto.GetRe2().GetRegexp(), proto.GetRe2().GetFlags()))
+ , RawRegexp(proto)
+ { }
+ bool Matches(const TStringBuf& text) const override {
+ const StringPiece piece(text.data(), text.size());
+ RE2::Anchor anchor = RE2::UNANCHORED;
+ return Regexp.Match(piece, 0, text.size(), anchor, nullptr, 0);
+ }
+ TString Serialize() const override {
+ TString data;
+ auto res = RawRegexp.SerializeToString(&data);
+ return data;
+ }
+ bool Ok(TString* error) const {
+ if (Regexp.ok()) {
+ return true;
+ } else {
+ *error = Regexp.error();
+ return false;
+ }
+ }
+ RE2 Regexp;
+ TSerialization RawRegexp;
+IRePtr Compile(const TStringBuf& regex, unsigned int flags) {
+ auto ptr = std::make_unique<TRe2>(regex, flags);
+ TString error;
+ if (!ptr->Ok(&error)) {
+ ythrow TCompileException() << error;
+ }
+ return ptr;
+IRePtr Deserialize(const TSerialization& p) {
+ return std::make_unique<TRe2>(p);
+REGISTER_RE_LIB(TSerialization::kRe2, Compile, Deserialize)
diff --git a/yql/essentials/minikql/jsonpath/rewrapper/re2/ya.make b/yql/essentials/minikql/jsonpath/rewrapper/re2/ya.make
new file mode 100644
index 0000000000..5520d19414
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/rewrapper/re2/ya.make
@@ -0,0 +1,13 @@
+ contrib/libs/re2
+ yql/essentials/minikql/jsonpath/rewrapper
+ GLOBAL re2.cpp
diff --git a/yql/essentials/minikql/jsonpath/rewrapper/registrator.h b/yql/essentials/minikql/jsonpath/rewrapper/registrator.h
new file mode 100644
index 0000000000..724b529910
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/rewrapper/registrator.h
@@ -0,0 +1,28 @@
+#pragma once
+#include <util/generic/fwd.h>
+#define REGISTER_RE_LIB(...) \
+ namespace { \
+ struct TReWrapperStaticRegistrator { \
+ inline TReWrapperStaticRegistrator() { \
+ NRegistrator::AddLibrary(__VA_ARGS__); \
+ } \
+ }
+namespace NReWrapper {
+class IRe;
+class TSerialization;
+using IRePtr = std::unique_ptr<IRe>;
+namespace NRegistrator {
+using TCompiler = IRePtr(*)(const TStringBuf&, unsigned int);
+using TDeserializer = IRePtr(*)(const TSerialization&);
+void AddLibrary(ui32 id, TCompiler compiler, TDeserializer deserializer);
diff --git a/yql/essentials/minikql/jsonpath/rewrapper/ut/hyperscan_ut.cpp b/yql/essentials/minikql/jsonpath/rewrapper/ut/hyperscan_ut.cpp
new file mode 100644
index 0000000000..3df53e44b0
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/rewrapper/ut/hyperscan_ut.cpp
@@ -0,0 +1,37 @@
+#include <yql/essentials/minikql/jsonpath/rewrapper/re.h>
+#include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h>
+#include <library/cpp/testing/unittest/registar.h>
+#include <library/cpp/regex/hyperscan/hyperscan.h>
+namespace NReWrapper {
+namespace NDispatcher {
+Y_UNIT_TEST_SUITE(ReWrapperDispatcherTestHyperscan) {
+ Y_UNIT_TEST(LegacySerialization) {
+ unsigned int hyperscanFlags = 0;
+ hyperscanFlags |= HS_FLAG_UTF8;
+ if (NX86::HaveAVX2()) {
+ hyperscanFlags |= HS_CPU_FEATURES_AVX2;
+ }
+ auto database = ::NHyperscan::Compile("[0-9]+", hyperscanFlags);
+ auto string = ::NHyperscan::Serialize(database);
+ auto wrapper = Deserialize(string);
+ UNIT_ASSERT_VALUES_EQUAL(wrapper->Matches("123"), true);
+ UNIT_ASSERT_VALUES_EQUAL(wrapper->Matches("abc"), false);
+ }
+ Y_UNIT_TEST(Serialization) {
+ auto w1 = Compile("[0-9]+", 0, NReWrapper::TSerialization::kHyperscan);
+ auto string = w1->Serialize();
+ auto w2 = Deserialize(string);
+ UNIT_ASSERT_VALUES_EQUAL(w1->Matches("123"), true);
+ UNIT_ASSERT_VALUES_EQUAL(w1->Matches("abc"), false);
+ UNIT_ASSERT_VALUES_EQUAL(w2->Matches("123"), true);
+ UNIT_ASSERT_VALUES_EQUAL(w2->Matches("abc"), false);
+ }
diff --git a/yql/essentials/minikql/jsonpath/rewrapper/ut/re2_ut.cpp b/yql/essentials/minikql/jsonpath/rewrapper/ut/re2_ut.cpp
new file mode 100644
index 0000000000..0e4d9e2889
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/rewrapper/ut/re2_ut.cpp
@@ -0,0 +1,23 @@
+#include <yql/essentials/minikql/jsonpath/rewrapper/re.h>
+#include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h>
+#include <library/cpp/testing/unittest/registar.h>
+namespace NReWrapper {
+namespace NDispatcher {
+Y_UNIT_TEST_SUITE(ReWrapperDispatcherRe2) {
+ Y_UNIT_TEST(Serialization) {
+ auto w1 = Compile("[0-9]+", 0, NReWrapper::TSerialization::kRe2);
+ auto string = w1->Serialize();
+ auto w2 = Deserialize(string);
+ UNIT_ASSERT_VALUES_EQUAL(w1->Matches("123"), true);
+ UNIT_ASSERT_VALUES_EQUAL(w1->Matches("abc"), false);
+ UNIT_ASSERT_VALUES_EQUAL(w2->Matches("123"), true);
+ UNIT_ASSERT_VALUES_EQUAL(w2->Matches("abc"), false);
+ }
diff --git a/yql/essentials/minikql/jsonpath/rewrapper/ut/ya.make b/yql/essentials/minikql/jsonpath/rewrapper/ut/ya.make
new file mode 100644
index 0000000000..26f57235a8
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/rewrapper/ut/ya.make
@@ -0,0 +1,25 @@
+ hyperscan_ut.cpp
+ re2_ut.cpp
+ )
+ yql/essentials/minikql/jsonpath/rewrapper
+ yql/essentials/minikql/jsonpath/rewrapper/hyperscan
+ yql/essentials/minikql/jsonpath/rewrapper/re2
+ )
+ re2_ut.cpp
+ )
+ yql/essentials/minikql/jsonpath/rewrapper
+ yql/essentials/minikql/jsonpath/rewrapper/re2
+ )
diff --git a/yql/essentials/minikql/jsonpath/rewrapper/ya.make b/yql/essentials/minikql/jsonpath/rewrapper/ya.make
new file mode 100644
index 0000000000..92e7b8e9b9
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/rewrapper/ya.make
@@ -0,0 +1,21 @@
+ yql/essentials/minikql/jsonpath/rewrapper/proto
+ dispatcher.cpp
+ hyperscan
+ proto
+ re2
+ ut
diff --git a/yql/essentials/minikql/jsonpath/type_check.cpp b/yql/essentials/minikql/jsonpath/type_check.cpp
new file mode 100644
index 0000000000..f6ef00c9b2
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/type_check.cpp
@@ -0,0 +1,132 @@
+#include "type_check.h"
+#include <yql/essentials/core/issue/protos/issue_id.pb.h>
+namespace NYql::NJsonPath {
+TJsonPathTypeChecker::TJsonPathTypeChecker(TIssues& issues)
+ : Issues(issues)
+void TJsonPathTypeChecker::VisitRoot(const TRootNode& node) {
+ node.GetExpr()->Accept(*this);
+void TJsonPathTypeChecker::VisitContextObject(const TContextObjectNode& node) {
+ Y_UNUSED(node);
+void TJsonPathTypeChecker::VisitVariable(const TVariableNode& node) {
+ Y_UNUSED(node);
+void TJsonPathTypeChecker::VisitLastArrayIndex(const TLastArrayIndexNode& node) {
+ Y_UNUSED(node);
+void TJsonPathTypeChecker::VisitNumberLiteral(const TNumberLiteralNode& node) {
+ Y_UNUSED(node);
+void TJsonPathTypeChecker::VisitMemberAccess(const TMemberAccessNode& node) {
+ node.GetInput()->Accept(*this);
+void TJsonPathTypeChecker::VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) {
+ node.GetInput()->Accept(*this);
+void TJsonPathTypeChecker::VisitArrayAccess(const TArrayAccessNode& node) {
+ node.GetInput()->Accept(*this);
+ for (const auto& subscript : node.GetSubscripts()) {
+ subscript.From->Accept(*this);
+ if (subscript.To) {
+ subscript.To->Accept(*this);
+ }
+ }
+void TJsonPathTypeChecker::VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) {
+ node.GetInput()->Accept(*this);
+void TJsonPathTypeChecker::VisitUnaryOperation(const TUnaryOperationNode& node) {
+ if (node.GetOp() == EUnaryOperation::Not && node.GetExpr()->GetReturnType() != EReturnType::Bool) {
+ Error(node.GetExpr(), "Logical not needs boolean argument");
+ }
+ node.GetExpr()->Accept(*this);
+void TJsonPathTypeChecker::VisitBinaryOperation(const TBinaryOperationNode& node) {
+ if (node.GetOp() == EBinaryOperation::And || node.GetOp() == EBinaryOperation::Or) {
+ if (node.GetLeftExpr()->GetReturnType() != EReturnType::Bool) {
+ Error(node.GetLeftExpr(), "Left argument of logical operation needs to be boolean");
+ }
+ if (node.GetRightExpr()->GetReturnType() != EReturnType::Bool) {
+ Error(node.GetRightExpr(), "Right argument of logical operation needs to be boolean");
+ }
+ }
+ node.GetLeftExpr()->Accept(*this);
+ node.GetRightExpr()->Accept(*this);
+void TJsonPathTypeChecker::VisitBooleanLiteral(const TBooleanLiteralNode& node) {
+ Y_UNUSED(node);
+void TJsonPathTypeChecker::VisitNullLiteral(const TNullLiteralNode& node) {
+ Y_UNUSED(node);
+void TJsonPathTypeChecker::VisitStringLiteral(const TStringLiteralNode& node) {
+ Y_UNUSED(node);
+void TJsonPathTypeChecker::VisitFilterObject(const TFilterObjectNode& node) {
+ Y_UNUSED(node);
+void TJsonPathTypeChecker::VisitFilterPredicate(const TFilterPredicateNode& node) {
+ node.GetInput()->Accept(*this);
+ if (node.GetPredicate()->GetReturnType() != EReturnType::Bool) {
+ Error(node.GetPredicate(), "Filter must return boolean value");
+ }
+ node.GetPredicate()->Accept(*this);
+void TJsonPathTypeChecker::VisitMethodCall(const TMethodCallNode& node) {
+ node.GetInput()->Accept(*this);
+void TJsonPathTypeChecker::VisitStartsWithPredicate(const TStartsWithPredicateNode& node) {
+ node.GetInput()->Accept(*this);
+ node.GetPrefix()->Accept(*this);
+void TJsonPathTypeChecker::VisitExistsPredicate(const TExistsPredicateNode& node) {
+ node.GetInput()->Accept(*this);
+void TJsonPathTypeChecker::VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) {
+ if (node.GetInput()->GetReturnType() != EReturnType::Bool) {
+ Error(node.GetInput(), "is unknown predicate expectes boolean argument");
+ }
+ node.GetInput()->Accept(*this);
+void TJsonPathTypeChecker::VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) {
+ node.GetInput()->Accept(*this);
+void TJsonPathTypeChecker::Error(const TAstNodePtr node, const TStringBuf message) {
+ Issues.AddIssue(node->GetPos(), message);
+ Issues.back().SetCode(TIssuesIds::JSONPATH_TYPE_CHECK_ERROR, TSeverityIds::S_ERROR);
diff --git a/yql/essentials/minikql/jsonpath/type_check.h b/yql/essentials/minikql/jsonpath/type_check.h
new file mode 100644
index 0000000000..0a02828a6e
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/type_check.h
@@ -0,0 +1,59 @@
+#pragma once
+#include "ast_nodes.h"
+namespace NYql::NJsonPath {
+class TJsonPathTypeChecker : public IAstNodeVisitor {
+ TJsonPathTypeChecker(TIssues& Issues);
+ void VisitRoot(const TRootNode& node) override;
+ void VisitContextObject(const TContextObjectNode& node) override;
+ void VisitVariable(const TVariableNode& node) override;
+ void VisitLastArrayIndex(const TLastArrayIndexNode& node) override;
+ void VisitNumberLiteral(const TNumberLiteralNode& node) override;
+ void VisitMemberAccess(const TMemberAccessNode& node) override;
+ void VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) override;
+ void VisitArrayAccess(const TArrayAccessNode& node) override;
+ void VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) override;
+ void VisitUnaryOperation(const TUnaryOperationNode& node) override;
+ void VisitBinaryOperation(const TBinaryOperationNode& node) override;
+ void VisitBooleanLiteral(const TBooleanLiteralNode& node) override;
+ void VisitNullLiteral(const TNullLiteralNode& node) override;
+ void VisitStringLiteral(const TStringLiteralNode& node) override;
+ void VisitFilterObject(const TFilterObjectNode& node) override;
+ void VisitFilterPredicate(const TFilterPredicateNode& node) override;
+ void VisitMethodCall(const TMethodCallNode& node) override;
+ void VisitStartsWithPredicate(const TStartsWithPredicateNode& node) override;
+ void VisitExistsPredicate(const TExistsPredicateNode& node) override;
+ void VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) override;
+ void VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) override;
+ void Error(const TAstNodePtr node, const TStringBuf message);
+ TIssues& Issues;
+} \ No newline at end of file
diff --git a/yql/essentials/minikql/jsonpath/ut/common_ut.cpp b/yql/essentials/minikql/jsonpath/ut/common_ut.cpp
new file mode 100644
index 0000000000..a32389a768
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/ut/common_ut.cpp
@@ -0,0 +1,972 @@
+#include "test_base.h"
+#include <util/string/builder.h>
+#include <cmath>
+class TJsonPathCommonTest : public TJsonPathTestBase {
+ TJsonPathCommonTest()
+ : TJsonPathTestBase()
+ {
+ }
+ UNIT_TEST_SUITE(TJsonPathCommonTest);
+ UNIT_TEST(TestPrimary);
+ UNIT_TEST(TestMemberAccess);
+ UNIT_TEST(TestWildcardMemberAccess);
+ UNIT_TEST(TestArrayAccess);
+ UNIT_TEST(TestLastArrayIndex);
+ UNIT_TEST(TestLastArrayIndexInvalid);
+ UNIT_TEST(TestNonIntegerArrayIndex);
+ UNIT_TEST(TestWildcardArrayAccess);
+ UNIT_TEST(TestUnaryOperations);
+ UNIT_TEST(TestUnaryOperationsErrors);
+ UNIT_TEST(TestBinaryArithmeticOperations);
+ UNIT_TEST(TestBinaryArithmeticOperationsErrors);
+ UNIT_TEST(TestParseErrors);
+ UNIT_TEST(TestVariables);
+ UNIT_TEST(TestDivisionByZero);
+ UNIT_TEST(TestInfinityResult);
+ UNIT_TEST(TestLogicalOperations);
+ UNIT_TEST(TestCompareOperations);
+ UNIT_TEST(TestFilter);
+ UNIT_TEST(TestFilterInvalid);
+ UNIT_TEST(TestNumericMethods);
+ UNIT_TEST(TestNumericMethodsErrors);
+ UNIT_TEST(TestDoubleMethod);
+ UNIT_TEST(TestDoubleMethodErrors);
+ UNIT_TEST(TestTypeMethod);
+ UNIT_TEST(TestSizeMethod);
+ UNIT_TEST(TestKeyValueMethod);
+ UNIT_TEST(TestKeyValueMethodErrors);
+ UNIT_TEST(TestStartsWithPredicate);
+ UNIT_TEST(TestStartsWithPredicateErrors);
+ UNIT_TEST(TestExistsPredicate);
+ UNIT_TEST(TestIsUnknownPredicate);
+ UNIT_TEST(TestLikeRegexPredicate);
+ void TestPrimary() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ // Context object $ must return whole JSON when used alone
+ {R"({"key": 123})", "$", {R"({"key":123})"}},
+ {R"([1, 2, 3])", "$", {R"([1,2,3])"}},
+ {"1.234", "$", {"1.234"}},
+ {R"("some string")", "$", {R"("some string")"}},
+ // Literal must not depend on input
+ {R"({"key": 123})", "123", {"123"}},
+ {R"([1, 2, 3])", "123", {"123"}},
+ {"1.234", "123", {"123"}},
+ {R"("some string")", "123", {"123"}},
+ // Check various ways to define number literal
+ {"1", "123.4", {"123.4"}},
+ {"1", "0.567", {"0.567"}},
+ {"1", "1234e-1", {"123.4"}},
+ {"1", "567e-3", {"0.567"}},
+ {"1", "123.4e-1", {"12.34"}},
+ {"1", "123e3", {"123000"}},
+ {"1", "123e+3", {"123000"}},
+ {"1", "1.23e+1", {"12.3"}},
+ {"1", "1.23e1", {"12.3"}},
+ {"1", "12e0", {"12"}},
+ {"1", "12.3e0", {"12.3"}},
+ {"1", "0", {"0"}},
+ {"1", "0.0", {"0"}},
+ {"1", "0.0e0", {"0"}},
+ // Check boolean and null literals
+ {"1", "null", {"null"}},
+ {"1", "false", {"false"}},
+ {"1", "true", {"true"}},
+ // Check string literals
+ {"1", "\"string\"", {"\"string\""}},
+ {"1", "\" space another space \"", {"\" space another space \""}},
+ {"1", "\"привет\"", {"\"привет\""}},
+ // NOTE: escaping is added by library/cpp/json
+ {"1", "\"\r\n\t\"", {"\"\\r\\n\\t\""}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestMemberAccess() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {R"({"key": 123, "another_key": 456})", "$.key", {"123"}},
+ {R"({"key": 123, "_another_28_key_$_": 456})", "$._another_28_key_$_", {"456"}},
+ {R"({"key": 123, "another_key": 456})", " $.another_key ", {"456"}},
+ {R"({"key": 123, "another_key": 456})", "$.key", {"123"}},
+ {R"({"k\"ey": 123, "another_key": 456})", "$.\"k\\\"ey\"", {"123"}},
+ {R"({"k\"ey": 123, "another_key": 456})", "$.'k\\\"ey'", {"123"}},
+ {R"({"key": 123, "another_key": 456})", "$.'key'", {"123"}},
+ {R"({"key": 123, "_another_28_key_$_": 456})", "$.'_another_28_key_$_'", {"456"}},
+ {R"({"key": 123, "another_key": 456})", " $.'another_key' ", {"456"}},
+ {R"({"key": 123, "another_key": 456})", "$.\"key\"", {"123"}},
+ {R"({"key": 123, "_another_28_key_$_": 456})", "$.\"_another_28_key_$_\"", {"456"}},
+ {R"({"key": 123, "another_key": 456})", " $.\"another_key\" ", {"456"}},
+ {R"({"key": 123, "another key": 456})", "$.'another key'", {"456"}},
+ {R"({"key": 123, "another key": 456})", "$.\"another key\"", {"456"}},
+ {R"({"key": 123, "прием отбой": 456})", "$.'прием отбой'", {"456"}},
+ {R"({"key": 123, "прием отбой": 456})", "$.\"прием отбой\"", {"456"}},
+ {R"({"key": {"another": 456}})", "$.key.another", {"456"}},
+ {R"({"key": {"another key": 456}})", "$.'key'.\"another key\"", {"456"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestWildcardMemberAccess() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {R"({
+ "first": 12,
+ "second": 72
+ })", "$.*", {"12", "72"}},
+ {R"({
+ "friends": {
+ "Nik": {"age": 18},
+ "Kate": {"age": 72}
+ }
+ })", "$.friends.*.age", {"72", "18"}},
+ {R"({
+ "friends": {
+ "Nik": {"age": 18},
+ "Kate": {"age": 72}
+ }
+ })", "$.*.*.*", {"72", "18"}},
+ {R"({})", "$.*.key", {}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestArrayAccess() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {R"([1, 2, 3])", "$[0]", {"1"}},
+ {R"([1, 2, 3, 4, 5, 6])", "$[0 to 2]", {"1", "2", "3"}},
+ {R"([1, 2, 3, 4, 5, 6])", "$[5, 0 to 2, 0, 0, 3 to 5, 2]", {"6", "1", "2", "3", "1", "1", "4", "5", "6", "3"}},
+ {R"({
+ "friends": [
+ {"name": "Nik", "age": 18},
+ {"name": "Kate", "age": 72},
+ {"name": "Foma", "age": 50},
+ {"name": "Jora", "age": 60}
+ ]
+ })", "$.friends[1 to 3, 0].age", {"72", "50", "60", "18"}},
+ {R"({
+ "range": {
+ "from": 1,
+ "to": 2
+ },
+ "friends": [
+ {"name": "Nik", "age": 18},
+ {"name": "Kate", "age": 72},
+ {"name": "Foma", "age": 50},
+ {"name": "Jora", "age": 60}
+ ]
+ })", "$.friends[$.range.from to $.range.to].age", {"72", "50"}},
+ {R"({
+ "range": {
+ "from": [1, 3, 4],
+ "to": {"key1": 1, "key2": 2, "key3": 3}
+ },
+ "friends": [
+ {"name": "Nik", "age": 18},
+ {"name": "Kate", "age": 72},
+ {"name": "Foma", "age": 50},
+ {"name": "Jora", "age": 60}
+ ]
+ })", "$.friends[$.range.from[1] to $.range.to.key3].age", {"60"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestLastArrayIndex() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {R"([1, 2, 3])", "$[last]", {"3"}},
+ {R"([1, 2, 3])", "$[1 to last]", {"2", "3"}},
+ {R"([1, 2, 3])", "$[last to last]", {"3"}},
+ {R"([1, 2, 3, 5, 6])", "$[1, last, last, 0, 2 to last, 3]", {"2", "6", "6", "1", "3", "5", "6", "5"}},
+ {R"([
+ [1, 2, 3, 4],
+ [5, 6, 7, 8]
+ ])", "$[*][last]", {"4", "8"}},
+ {R"({
+ "ranges": [
+ {"from": 1, "to": 3},
+ {"from": 0, "to": 1}
+ ],
+ "friends": [
+ {"name": "Nik", "age": 18},
+ {"name": "Kate", "age": 72},
+ {"name": "Foma", "age": 50},
+ {"name": "Jora", "age": 60}
+ ]
+ })", "$.friends[last, $.ranges[last].from to $.ranges[last].to, 2 to last].age", {"60", "18", "72", "50", "60"}},
+ {R"({
+ "ranges": [
+ {"from": 1.23, "to": 3.75},
+ {"from": 0.58, "to": 1.00001}
+ ],
+ "friends": [
+ {"name": "Nik", "age": 18},
+ {"name": "Kate", "age": 72},
+ {"name": "Foma", "age": 50},
+ {"name": "Jora", "age": 60}
+ ]
+ })", "$.friends[last, $.ranges[last].from to $.ranges[last].to, 2 to last].age", {"60", "18", "72", "50", "60"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestLastArrayIndexInvalid() {
+ const TVector<TRuntimeErrorTestCase> testCases = {
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error);
+ }
+ }
+ }
+ void TestNonIntegerArrayIndex() {
+ const TVector<TRuntimeErrorTestCase> testCases = {
+ {R"({
+ "range": {
+ "from": [1, 3, 4],
+ "to": {"key1": 1, "key2": 2, "key3": 3}
+ },
+ "friends": [1, 2, 3]
+ })", "$.friends[$.range.from[*] to $.range.to.*]", C(TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX)},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error);
+ }
+ }
+ }
+ void TestWildcardArrayAccess() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {R"([1, 2, 3])", "$[*]", {"1", "2", "3"}},
+ {R"([[1], [2], [3, 4, 5]])", "$[*][*]", {"1", "2", "3", "4", "5"}},
+ {R"({
+ "friends": [
+ {"name": "Nik", "age": 18},
+ {"name": "Kate", "age": 72},
+ {"name": "Foma", "age": 50},
+ {"name": "Jora", "age": 60}
+ ]
+ })", "$.friends[*].age", {"18", "72", "50", "60"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestUnaryOperations() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {R"([])", "-3", {"-3"}},
+ {R"([])", "+3", {"3"}},
+ {R"(-1)", "-$", {"1"}},
+ {R"(-1)", "+$", {"-1"}},
+ {R"({
+ "range": {
+ "from": -1,
+ "to": -2
+ },
+ "array": [1, 2, 3, 4]
+ })", "$.array[-$.range.from to -$.range.to]", {"2", "3"}},
+ {R"({
+ "range": {
+ "from": 1,
+ "to": -2
+ },
+ "array": [1, 2, 3, 4]
+ })", "$.array[+$.range.from to -$.range.to]", {"2", "3"}},
+ {R"({
+ "range": {
+ "from": -1,
+ "to": 2
+ },
+ "array": [1, 2, 3, 4]
+ })", "$.array[-$.range.from to +$.range.to]", {"2", "3"}},
+ {R"({
+ "range": {
+ "from": 1,
+ "to": 2
+ },
+ "array": [1, 2, 3, 4]
+ })", "$.array[+$.range.from to +$.range.to]", {"2", "3"}},
+ {R"([1, 2, 3])", "-$[*]", {"-1", "-2", "-3"}},
+ {"30000000000000000000000000", "-$", {"-3e+25"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestUnaryOperationsErrors() {
+ const TVector<TRuntimeErrorTestCase> testCases = {
+ {R"([1, 2, [], 4])", "-$[*]", C(TIssuesIds::JSONPATH_INVALID_UNARY_OPERATION_ARGUMENT_TYPE)},
+ {R"([1, 2, {}, 4])", "-$[*]", C(TIssuesIds::JSONPATH_INVALID_UNARY_OPERATION_ARGUMENT_TYPE)},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error);
+ }
+ }
+ }
+ void TestBinaryArithmeticOperations() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {"[]", "1 + 2", {"3"}},
+ {"[]", "1 - 2", {"-1"}},
+ {"[]", "10 * 5", {"50"}},
+ {"[]", "10 / 5", {"2"}},
+ {"[]", "13 % 5", {"3"}},
+ {"[]", "20 * 2 + 5", {"45"}},
+ {"[]", "20 / 2 + 5", {"15"}},
+ {"[]", "20 % 2 + 5", {"5"}},
+ {"[]", "20 * (2 + 5)", {"140"}},
+ {"[]", "20 / (2 + 3)", {"4"}},
+ {"[]", "20 % (2 + 5)", {"6"}},
+ {"[]", "5 / 2", {"2.5"}},
+ {"[5.24 , 2.62]", "$[0] / $[1]", {"2"}},
+ {"[5.24, 2.62]", "$[0] % $[1]", {"0"}},
+ {"[3.753, 2.35]", "$[0] % $[1]", {"1.403"}},
+ {"[]", "- 1 + 1", {"0"}},
+ {"[]", "+ 1 + 1", {"2"}},
+ {"[1, 2, 3, 4]", "$[last, last-1, last-2, last-3]", {"4", "3", "2", "1"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestBinaryArithmeticOperationsErrors() {
+ const TVector<TRuntimeErrorTestCase> testCases = {
+ {"[1, 2, 3]", "$[*] + 1", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT)},
+ {"[1, 2, 3]", "1 + $[*]", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT)},
+ {"[1, 2, 3]", "$[*] + $[*]", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT)},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error);
+ }
+ }
+ }
+ void TestParseErrors() {
+ const TVector<TString> testCases = {
+ "strict",
+ "strict smth.key",
+ "strict $.",
+ "strict $.$key",
+ "strict $.28key",
+ "strict $.ke^y",
+ "strict $.привет",
+ "strict $._пока_28_ключ_$_",
+ " strict $.пока ",
+ "lax",
+ "lax smth.key",
+ "lax $.",
+ "lax $.$key",
+ "lax $.28key",
+ "lax $.ke^y",
+ "lax $.привет",
+ "lax $._пока_28_ключ_$_",
+ " lax $.пока ",
+ "12.",
+ "12..3",
+ "12.3e",
+ "12.3e++1",
+ "12.3e--1",
+ "1e100000000000000000000000000000000",
+ "true || false",
+ "1 && (true == true)",
+ "!true",
+ "$[*] ? (@.active) . id",
+ "!(1 > 2).type()",
+ "(null) is unknown",
+ "(12 * 12) is unknown",
+ R"($ like_regex "[[[")",
+ R"($ like_regex "[0-9]+" flag "x")",
+ "$.first fjrfrfq fqijrhfqiwrjhfqrf qrfqr",
+ };
+ for (const auto& testCase : testCases) {
+ RunParseErrorTestCase(testCase);
+ }
+ }
+ void TestVariables() {
+ TVector<TVariablesTestCase> testCases = {
+ {"123", {{"var", "456"}}, "$ + $var", {"579"}},
+ {"123", {{"var", "456"}}, "$var", {"456"}},
+ {"123", {{"var", R"({"key": [1, 2, 3, 4, 5]})"}}, "$var.key[2 to last]", {"3", "4", "5"}},
+ {"123", {{"to", "1"}, {"strict", "2"}}, "$to + $strict", {"3"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunVariablesTestCase(testCase.Json, testCase.Variables, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestDivisionByZero() {
+ const TVector<TRuntimeErrorTestCase> testCases = {
+ {"0", "1 / $", C(TIssuesIds::JSONPATH_DIVISION_BY_ZERO)},
+ {"0.00000000000000000001", "1 / $", C(TIssuesIds::JSONPATH_DIVISION_BY_ZERO)},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error);
+ }
+ }
+ }
+ void TestInfinityResult() {
+ const double step = 1000000000;
+ double current = step;
+ TStringBuilder literal;
+ TStringBuilder query;
+ literal << '"' << step;
+ query << step;
+ while (!std::isinf(current)) {
+ query << " * " << step;
+ literal << "000000000";
+ current *= step;
+ }
+ literal << '"';
+ const TVector<TRuntimeErrorTestCase> testCases = {
+ {TString(literal), "$.double()", C(TIssuesIds::JSONPATH_INFINITE_NUMBER_STRING)},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error);
+ }
+ }
+ }
+ void TestLogicalOperations() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ // JsonPath does not allow to use boolean literals in boolean operators.
+ // Here we use their replacements:
+ // 1. "(1 < true)" for "null"
+ // 2. "(true == true)" for "true"
+ // 3. "(true != true)" for "false"
+ {"1", "(1 < true) || (1 < true)", {"null"}},
+ {"1", "(1 < true) || (true != true)", {"null"}},
+ {"1", "(1 < true) || (true == true)", {"true"}},
+ {"1", "(true != true) || (1 < true)", {"null"}},
+ {"1", "(true != true) || (true != true)", {"false"}},
+ {"1", "(true != true) || (true == true)", {"true"}},
+ {"1", "(true == true) || (1 < true)", {"true"}},
+ {"1", "(true == true) || (true != true)", {"true"}},
+ {"1", "(true == true) || (true == true)", {"true"}},
+ {"1", "(1 < true) && (1 < true)", {"null"}},
+ {"1", "(1 < true) && (true != true)", {"false"}},
+ {"1", "(1 < true) && (true == true)", {"null"}},
+ {"1", "(true != true) && (1 < true)", {"false"}},
+ {"1", "(true != true) && (true != true)", {"false"}},
+ {"1", "(true != true) && (true == true)", {"false"}},
+ {"1", "(true == true) && (1 < true)", {"null"}},
+ {"1", "(true == true) && (true != true)", {"false"}},
+ {"1", "(true == true) && (true == true)", {"true"}},
+ {"1", "(true != true) && (true != true) || (true == true)", {"true"}},
+ {"1", "(true != true) && ((true != true) || (true == true))", {"false"}},
+ {"1", "(true != true) || (true != true) || (true == true)", {"true"}},
+ {"1", "(true == true) && (true == true) && (true == true) && (true != true)", {"false"}},
+ {"1", "!(1 < true)", {"null"}},
+ {"1", "!(true != true)", {"true"}},
+ {"1", "!(true == true)", {"false"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestCompareOperations() {
+ const TVector<TString> operations = {"==", "<", "<=", ">", ">=", "!=", "<>"};
+ // All compare operations between null and non-null operands are false
+ for (const auto& op : operations) {
+ RunTestCase("1", TStringBuilder() << "null " << op << " 1", {"false"});
+ RunTestCase("1", TStringBuilder() << "1 " << op << " null", {"false"});
+ }
+ // If one of the operands is not scalar, comparison results to null
+ for (const auto& op : operations) {
+ RunTestCase("[[]]", TStringBuilder() << "$ " << op << " 1", {"null"});
+ RunTestCase("[[]]", TStringBuilder() << "1 " << op << " $", {"null"});
+ RunTestCase("[[]]", TStringBuilder() << "$ " << op << " $", {"null"});
+ RunTestCase("{}", TStringBuilder() << "$ " << op << " 1", {"null"});
+ RunTestCase("{}", TStringBuilder() << "1 " << op << " $", {"null"});
+ RunTestCase("{}", TStringBuilder() << "$ " << op << " $", {"null"});
+ }
+ // If both operands are null, only == is true
+ for (const auto& op : operations) {
+ const TString result = op == "==" ? "true" : "false";
+ RunTestCase("1", TStringBuilder() << "null " << op << " null", {result});
+ }
+ const TVector<TMultiOutputTestCase> testCases = {
+ // Check comparison of numbers
+ {"1", "1.23 < 4.56", {"true"}},
+ {"1", "1.23 > 4.56", {"false"}},
+ {"1", "1.23 <= 4.56", {"true"}},
+ {"1", "1.23 >= 4.56", {"false"}},
+ {"1", "1.23 == 1.23", {"true"}},
+ {"1", "1.23 != 1.23", {"false"}},
+ {"1", "1.23 <> 4.56", {"true"}},
+ {"1", "1.00000000000000000001 == 1.00000000000000000002", {"true"}},
+ // Check numbers of different kinds (int64 vs double)
+ {"1", "1 < 2.33", {"true"}},
+ {"1", "1 > 4.56", {"false"}},
+ {"1", "1 <= 4.56", {"true"}},
+ {"1", "1 >= 4.56", {"false"}},
+ {"1", "1 == 1.23", {"false"}},
+ {"1", "1 != 1.23", {"true"}},
+ {"1", "1 <> 4.56", {"true"}},
+ // Check comparison of strings
+ {"1", R"("abc" < "def")", {"true"}},
+ {"1", R"("abc" > "def")", {"false"}},
+ {"1", R"("abc" <= "def")", {"true"}},
+ {"1", R"("abc" >= "def")", {"false"}},
+ {"1", R"("abc" == "abc")", {"true"}},
+ {"1", R"("abc" != "abc")", {"false"}},
+ {"1", R"("abc" <> "def")", {"true"}},
+ // Check comparison of UTF8 strings
+ // First string is U+00e9 (LATIN SMALL LETTER E WITH ACUTE), "é"
+ // Second string is U+0065 (LATIN SMALL LETTER E) U+0301 (COMBINING ACUTE ACCENT), "é"
+ {"1", R"("é" < "é")", {"false"}},
+ {"1", R"("é" > "é")", {"true"}},
+ {"1", R"("привет" == "привет")", {"true"}},
+ // Check cross-product comparison
+ {R"({
+ "left": [1],
+ "right": [4, 5, 6]
+ })", "$.left[*] < $.right[*]", {"true"}},
+ {R"({
+ "left": [4, 5, 6],
+ "right": [1]
+ })", "$.left[*] < $.right[*]", {"false"}},
+ {R"({
+ "left": [1, 2, 3],
+ "right": [4, 5, 6]
+ })", "$.left[*] < $.right[*]", {"true"}},
+ {R"({
+ "left": [10, 30, 40],
+ "right": [1, 2, 15]
+ })", "$.left[*] < $.right[*]", {"true"}},
+ {R"({
+ "left": [10, 30, 40],
+ "right": [1, 2, 3]
+ })", "$.left[*] < $.right[*]", {"false"}},
+ // Check incomparable types
+ {"1", "1 < true", {"null"}},
+ {"1", R"(true <> "def")", {"null"}},
+ // Check error in arguments
+ {R"({
+ "array": [1, 2, 3, 4, 5],
+ "invalid_index": {
+ "key": 1
+ }
+ })", "$.array[$.invalid_index] < 3", {"null"}},
+ {R"({
+ "array": [1, 2, 3, 4, 5],
+ "invalid_index": {
+ "key": 1
+ }
+ })", "5 >= $.array[$.invalid_index]", {"null"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestFilter() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {"[1, 2, 3]", "$[*] ? (@ > 2)", {"3"}},
+ {R"([
+ {"age": 18},
+ {"age": 25},
+ {"age": 50},
+ {"age": 5}
+ ])", "$[*] ? (@.age >= 18)", {R"({"age":18})", R"({"age":25})", R"({"age":50})"}},
+ {R"([
+ {"age": 18},
+ {"age": 25},
+ {"age": 50},
+ {"age": 5}
+ ])", "$[*] ? (@.age >= 18) ? (@.age <= 30)", {R"({"age":18})", R"({"age":25})"}},
+ {R"([
+ {"age": 18},
+ {"age": 25},
+ {"age": 50},
+ {"age": 5}
+ ])", "$[*] ? (@.age >= 18) ? (@.age <= 30) . age", {"18", "25"}},
+ {R"([
+ {"age": 18},
+ {"age": 25},
+ {"age": 50},
+ {"age": 5}
+ ])", "$[*] ? (@.age >= 18 && @.age <= 30) . age", {"18", "25"}},
+ {R"([
+ {"age": 18},
+ {"age": 25},
+ {"age": 50},
+ {"age": 5}
+ ])", "$[*] ? (@.age >= 18 || @.age <= 30) . age", {"18", "25", "50", "5"}},
+ {R"([
+ {
+ "id": 1,
+ "is_valid": false,
+ "days_till_doom": 11,
+ "age_estimation": 4
+ },
+ {
+ "id": 2,
+ "is_valid": true,
+ "days_till_doom": 5,
+ "age_estimation": 3
+ },
+ {
+ "id": 3,
+ "is_valid": true,
+ "days_till_doom": 20,
+ "age_estimation": 10
+ },
+ {
+ "id": 4,
+ "is_valid": true,
+ "days_till_doom": 30,
+ "age_estimation": 2
+ }
+ ])", "$[*] ? (@.is_valid == true && @.days_till_doom > 10 && 2 * @.age_estimation <= 12).id", {"4"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestFilterInvalid() {
+ const TVector<TRuntimeErrorTestCase> testCases = {
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error);
+ }
+ }
+ }
+ void TestNumericMethods() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {"[-1.23, 4.56, 3, 0]", "$[*].abs()", {"1.23", "4.56", "3", "0"}},
+ {"[-1.23, 4.56, 3, 0]", "$[*].floor()", {"-2", "4", "3", "0"}},
+ {"[-1.23, 4.56, 3, 0]", "$[*].ceiling()", {"-1", "5", "3", "0"}},
+ {"-123.45", "$.ceiling().abs().floor()", {"123"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestNumericMethodsErrors() {
+ const TVector<TRuntimeErrorTestCase> testCases = {
+ {R"(["1", true, null])", "$[*].abs()", C(TIssuesIds::JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT)},
+ {R"(["1", true, null])", "$[*].floor()", C(TIssuesIds::JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT)},
+ {R"(["1", true, null])", "$[*].ceiling()", C(TIssuesIds::JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT)},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error);
+ }
+ }
+ }
+ void TestDoubleMethod() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {R"([
+ "123", "123.4", "0.567", "1234e-1", "567e-3", "123.4e-1",
+ "123e3", "123e+3", "1.23e+1", "1.23e1",
+ "12e0", "12.3e0", "0", "0.0", "0.0e0"
+ ])", "$[*].double()", {
+ "123", "123.4", "0.567", "123.4", "0.567", "12.34",
+ "123000", "123000", "12.3", "12.3",
+ "12", "12.3", "0", "0", "0",
+ }},
+ {R"("-123.45e1")", "$.double().abs().floor()", {"1234"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestDoubleMethodErrors() {
+ const TVector<TRuntimeErrorTestCase> testCases = {
+ {R"(["1", true, null])", "$[*].double()", C(TIssuesIds::JSONPATH_INVALID_DOUBLE_METHOD_ARGUMENT)},
+ {R"("hi stranger")", "$.double()", C(TIssuesIds::JSONPATH_INVALID_NUMBER_STRING)},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error);
+ }
+ }
+ }
+ void TestTypeMethod() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {"null", "$.type()", {"\"null\""}},
+ {"true", "$.type()", {"\"boolean\""}},
+ {"false", "$.type()", {"\"boolean\""}},
+ {"1", "$.type()", {"\"number\""}},
+ {"-1", "$.type()", {"\"number\""}},
+ {"4.56", "$.type()", {"\"number\""}},
+ {"-4.56", "$.type()", {"\"number\""}},
+ {"\"some string\"", "$.type()", {"\"string\""}},
+ {"[]", "$.type()", {"\"array\""}},
+ {"[1, 2, 3, 4]", "$.type()", {"\"array\""}},
+ {"{}", "$.type()", {"\"object\""}},
+ {"{\"key\": 123}", "$.type()", {"\"object\""}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestSizeMethod() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {"null", "$.size()", {"1"}},
+ {"true", "$.size()", {"1"}},
+ {"false", "$.size()", {"1"}},
+ {"1", "$.size()", {"1"}},
+ {"-1", "$.size()", {"1"}},
+ {"4.56", "$.size()", {"1"}},
+ {"-4.56", "$.size()", {"1"}},
+ {"\"some string\"", "$.size()", {"1"}},
+ {"[]", "$.size()", {"0"}},
+ {"[1, 2, 3, 4]", "$.size()", {"4"}},
+ {"{}", "$.size()", {"1"}},
+ {"{\"key\": 123}", "$.size()", {"1"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestKeyValueMethod() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {R"({
+ "one": 1,
+ "two": 2,
+ "three": 3
+ })", "$.keyvalue()", {
+ R"({"name":"one","value":1})",
+ R"({"name":"three","value":3})",
+ R"({"name":"two","value":2})",
+ }},
+ {R"({
+ "one": "string",
+ "two": [1, 2, 3, 4],
+ "three": [4, 5]
+ })", R"($.keyvalue() ? (@.value.type() == "array" && @.value.size() > 2).name)", {"\"two\""}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestKeyValueMethodErrors() {
+ const TVector<TRuntimeErrorTestCase> testCases = {
+ {"\"string\"", "$.keyvalue()", C(TIssuesIds::JSONPATH_INVALID_KEYVALUE_METHOD_ARGUMENT)},
+ {"[1, 2, 3, 4]", "$.keyvalue()", C(TIssuesIds::JSONPATH_INVALID_KEYVALUE_METHOD_ARGUMENT)},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error);
+ }
+ }
+ }
+ void TestStartsWithPredicate() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {"1", R"("some string" starts with "some")", {"true"}},
+ {"1", R"("some string" starts with "string")", {"false"}},
+ {R"(["some string", "string"])", R"($[*] ? (@ starts with "string"))", {"\"string\""}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestStartsWithPredicateErrors() {
+ const TVector<TRuntimeErrorTestCase> testCases = {
+ {R"(["first", "second"])", R"($[*] starts with "first")", C(TIssuesIds::JSONPATH_INVALID_STARTS_WITH_ARGUMENT)},
+ {"1", R"(1 starts with "string")", C(TIssuesIds::JSONPATH_INVALID_STARTS_WITH_ARGUMENT)},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error);
+ }
+ }
+ }
+ void TestExistsPredicate() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {R"({
+ "key": 123
+ })", "exists ($.key)", {"true"}},
+ {"\"string\"", "exists ($ * 2)", {"null"}},
+ {R"(["some string", 2])", "$[*] ? (exists (@ * 2))", {"2"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestIsUnknownPredicate() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {"1", "(1 < true) is unknown", {"true"}},
+ {"1", "(true == true) is unknown", {"false"}},
+ {"1", "(true == false) is unknown", {"false"}},
+ {R"(["some string", -20])", "$[*] ? ((1 < @) is unknown)", {"\"some string\""}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestLikeRegexPredicate() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {R"(["string", "123", "456"])", R"($[*] like_regex "[0-9]+")", {"true"}},
+ {R"(["string", "another string"])", R"($[*] like_regex "[0-9]+")", {"false"}},
+ // Case insensitive flag
+ {R"("AbCd")", R"($ like_regex "abcd")", {"false"}},
+ {R"("AbCd")", R"($ like_regex "abcd" flag "i")", {"true"}},
+ {R"(["string", "123", "456"])", R"($[*] ? (@ like_regex "[0-9]+"))", {"\"123\"", "\"456\""}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : ALL_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
diff --git a/yql/essentials/minikql/jsonpath/ut/examples_ut.cpp b/yql/essentials/minikql/jsonpath/ut/examples_ut.cpp
new file mode 100644
index 0000000000..3b964e28b5
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/ut/examples_ut.cpp
@@ -0,0 +1,81 @@
+#include "test_base.h"
+ These examples are taken from [ISO/IEC TR 19075-6:2017] standard (https://www.iso.org/standard/67367.html)
+class TJsonPathExamplesTest : public TJsonPathTestBase {
+ TJsonPathExamplesTest()
+ : TJsonPathTestBase()
+ {
+ }
+ UNIT_TEST_SUITE(TJsonPathExamplesTest);
+ UNIT_TEST(TestMemberAccessExamples);
+ UNIT_TEST(TestElementAccessExamples);
+ UNIT_TEST(TestFilterExamples);
+ void TestMemberAccessExamples() {
+ TString input = R"({
+ "phones": [
+ {"type": "cell", "number": "abc-defg"},
+ {"number": "pqr-wxyz"},
+ {"type": "home", "number": "hij-klmn"}
+ ]
+ })";
+ RunTestCase(input, "lax $.phones.type", {"\"cell\"", "\"home\""});
+ RunRuntimeErrorTestCase(input, "strict $.phones[*].type", C(TIssuesIds::JSONPATH_MEMBER_NOT_FOUND));
+ // NOTE: Example in standard has different order of elements. This is okay because order of elements after
+ // wildcard member access is implementation-defined
+ RunTestCase(input, "lax $.phones.*", {"\"abc-defg\"", "\"cell\"", "\"pqr-wxyz\"", "\"hij-klmn\"", "\"home\""});
+ }
+ void TestElementAccessExamples() {
+ // NOTE: Example in standard has different order of elements. This is okay because order of elements after
+ // wildcard member access is implementation-defined
+ RunTestCase(R"({
+ "sensors": {
+ "SF": [10, 11, 12, 13, 15, 16, 17],
+ "FC": [20, 22, 24],
+ "SJ": [30, 33]
+ }
+ })", "lax $.sensors.*[0, last, 2]", {"20", "24", "24", "10", "17", "12", "30", "33"});
+ RunTestCase(R"({
+ "x": [12, 30],
+ "y": [8],
+ "z": ["a", "b", "c"]
+ })", "lax $.*[1 to last]", {"30", "\"b\"", "\"c\""});
+ }
+ void TestFilterExamples() {
+ RunParseErrorTestCase("$ ? (@.skilled)");
+ TString json = R"({"name":"Portia","skilled":true})";
+ RunTestCase(json, "$ ? (@.skilled == true)", {json});
+ // Standard also mentions this example in lax mode. It is invalid because
+ // in this case automatic unwrapping on arrays before filters will be performed
+ // and query will finish with error
+ RunTestCase(R"({
+ "x": [1, "one"]
+ })", "strict $.x ? (2 > @[*])", {});
+ RunTestCase(R"({
+ "name": {
+ "first": "Manny",
+ "last": "Moe"
+ },
+ "points": 123
+ })", "strict $ ? (exists (@.name)).name", {R"({"first":"Manny","last":"Moe"})"});
+ RunTestCase(R"({
+ "points": 41
+ })", "strict $ ? (exists (@.name)).name", {});
+ }
+UNIT_TEST_SUITE_REGISTRATION(TJsonPathExamplesTest); \ No newline at end of file
diff --git a/yql/essentials/minikql/jsonpath/ut/lax_ut.cpp b/yql/essentials/minikql/jsonpath/ut/lax_ut.cpp
new file mode 100644
index 0000000000..4d5dda83ac
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/ut/lax_ut.cpp
@@ -0,0 +1,283 @@
+#include "test_base.h"
+class TJsonPathLaxTest : public TJsonPathTestBase {
+ TJsonPathLaxTest()
+ : TJsonPathTestBase()
+ {
+ }
+ UNIT_TEST_SUITE(TJsonPathLaxTest);
+ UNIT_TEST(TestArrayUnwrap);
+ UNIT_TEST(TestArrayWrap);
+ UNIT_TEST(TestInvalidArrayIndices);
+ UNIT_TEST(TestStructuralErrorsHandling);
+ UNIT_TEST(TestCompareOperations);
+ UNIT_TEST(TestFilter);
+ UNIT_TEST(TestNumericMethods);
+ UNIT_TEST(TestDoubleMethod);
+ UNIT_TEST(TestKeyValueMethod);
+ UNIT_TEST(TestExistsPredicate);
+ UNIT_TEST(TestLikeRegexPredicate);
+ UNIT_TEST(TestStartsWithPredicate);
+ void TestArrayUnwrap() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {R"([
+ {"key": 1},
+ {"key": 2}
+ ])", "$.key", {"1", "2"}},
+ {R"([
+ {"key": 1},
+ {"key": 2}
+ ])", "$.*", {"1", "2"}},
+ {R"({
+ "first": {"key": 1},
+ "second": []
+ })", "$.*.key", {"1"}},
+ {R"({
+ "first": {"key": 1},
+ "second": []
+ })", "$.*.*", {"1"}},
+ {R"({"another_key": 123})", "$.key", {}},
+ {R"([
+ {"key": [{"nested": 28}]},
+ {"key": [{"nested": 29}]}
+ ])", "$.key.nested", {"28", "29"}},
+ {R"([
+ {"key": [{"nested": 28}]},
+ {"key": [{"nested": 29}]}
+ ])", "$.*.*", {"28", "29"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : LAX_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestArrayWrap() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {R"([1, 2])", "$[*][0]", {"1", "2"}},
+ {R"([[1], 2, [3]])", "$[*][0]", {"1", "2", "3"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : LAX_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestInvalidArrayIndices() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {R"({
+ "idx": -1,
+ "array": [1, 2, 3]
+ })", "$.array[$.idx]", {}},
+ {R"({
+ "from": -1,
+ "to": 3,
+ "array": [1, 2, 3]
+ })", "$.array[$.from to $.to]", {}},
+ {R"({
+ "from": 0,
+ "to": -1,
+ "array": [1, 2, 3]
+ })", "$.array[$.from to $.to]", {}},
+ {R"([1, 2, 3, 4, 5])", "$[3 to 0]", {}},
+ {R"({
+ "idx": -1,
+ "array": [1, 2, 3]
+ })", "$.array[$.idx, 1 to 2]", {"2", "3"}},
+ {R"({
+ "from": -1,
+ "to": 3,
+ "array": [1, 2, 3]
+ })", "$.array[0, $.from to $.to, 2 to 2]", {"1", "3"}},
+ {R"({
+ "from": 0,
+ "to": -1,
+ "array": [1, 2, 3]
+ })", "$.array[0, $.from to $.to, 1 to 1]", {"1", "2"}},
+ {R"([1, 2, 3, 4, 5])", "$[0, 3 to 0, 1]", {"1", "2"}},
+ {R"([[1, 2], [3, 4, 5], []])", "$[*][2]", {"5"}},
+ {"[]", "$[last]", {}},
+ {"[]", "$[last to 0]", {}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : LAX_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestStructuralErrorsHandling() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {R"([[{"key": 1}]])", "$.key", {}},
+ {R"([[{"key": 1}]])", "$.*", {}},
+ {R"([
+ {"key": 1},
+ {"not_key": 2},
+ {"key": 3}
+ ])", "$[*].key", {"1", "3"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : LAX_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestCompareOperations() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ // Check unwrap
+ {R"({
+ "left": [1, 2, 3],
+ "right": [4, 5, 6]
+ })", "$.left < $.right", {"true"}},
+ // Check incomparable types
+ // NOTE: Even though values of types string and number are incomparable,
+ // pair 1 < 4 is true and was found first, so the overall result is true
+ {R"({
+ "left": [1, 2, "string"],
+ "right": [4, 5, 6]
+ })", "$.left < $.right", {"true"}},
+ // NOTE: In this example pair "string" < 4 results in error and was found first,
+ // so overall result is null
+ {R"({
+ "left": ["string", 2, 3],
+ "right": [4, 5, 6]
+ })", "$.left < $.right", {"null"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : LAX_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestFilter() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ // Check unwrap
+ {R"([
+ {"age": 18},
+ {"age": 25},
+ {"age": 50},
+ {"age": 5}
+ ])", "$ ? (@.age >= 18 && @.age <= 30) . age", {"18", "25"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : LAX_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestNumericMethods() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ // Check unwrap
+ {"[-1.23, 4.56, 3, 0]", "$.abs()", {"1.23", "4.56", "3", "0"}},
+ {"[-1.23, 4.56, 3, 0]", "$.floor()", {"-2", "4", "3", "0"}},
+ {"[-1.23, 4.56, 3, 0]", "$.ceiling()", {"-1", "5", "3", "0"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : LAX_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestDoubleMethod() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ // Check unwrap
+ {R"([
+ "123", "123.4", "0.567", "1234e-1", "567e-3", "123.4e-1",
+ "123e3", "123e+3", "1.23e+1", "1.23e1",
+ "12e0", "12.3e0", "0", "0.0", "0.0e0"
+ ])", "$.double()", {
+ "123", "123.4", "0.567", "123.4", "0.567", "12.34",
+ "123000", "123000", "12.3", "12.3",
+ "12", "12.3", "0", "0", "0",
+ }},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : LAX_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestKeyValueMethod() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ // Check unwrap
+ {R"([{
+ "one": 1,
+ "two": 2,
+ "three": 3
+ }])", "$.keyvalue().name", {"\"one\"", "\"three\"", "\"two\""}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : LAX_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestExistsPredicate() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {R"({
+ "key": 123
+ })", "exists ($.another_key)", {"false"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : LAX_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestLikeRegexPredicate() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ // Check unwrapping
+ {R"(["string", "123", "456"])", R"($ like_regex "[0-9]+")", {"true"}},
+ // Check early stopping
+ {R"([123, "123", "456"])", R"($ like_regex "[0-9]+")", {"null"}},
+ {R"(["123", "456", 123])", R"($ like_regex "[0-9]+")", {"true"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : LAX_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestStartsWithPredicate() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {R"(["a", "b", "c"])", R"("abcd" starts with $[*])", {"true"}},
+ {R"(["a", 1.45, 50])", R"("abcd" starts with $[*])", {"true"}},
+ {R"([1.45, 50, "a"])", R"("abcd" starts with $[*])", {"null"}},
+ {R"(["b", "c"])", R"("abcd" starts with $[*])", {"false"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : LAX_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+UNIT_TEST_SUITE_REGISTRATION(TJsonPathLaxTest); \ No newline at end of file
diff --git a/yql/essentials/minikql/jsonpath/ut/lib_id_ut.cpp b/yql/essentials/minikql/jsonpath/ut/lib_id_ut.cpp
new file mode 100644
index 0000000000..cea2d8577b
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/ut/lib_id_ut.cpp
@@ -0,0 +1,23 @@
+#include <library/cpp/testing/unittest/registar.h>
+#include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h>
+#include <util/system/platform.h>
+ * Paranoid test to check correct regexp library is used
+ */
+namespace NYql::NJsonPath {
+extern ui32 GetReLibId();
+ Y_UNIT_TEST(DefaultLib) {
+#ifdef __x86_64__
+ UNIT_ASSERT_VALUES_EQUAL(GetReLibId(), (ui32)NReWrapper::TSerialization::kHyperscan);
+ UNIT_ASSERT_VALUES_EQUAL(GetReLibId(), (ui32)NReWrapper::TSerialization::kRe2);
+ }
diff --git a/yql/essentials/minikql/jsonpath/ut/strict_ut.cpp b/yql/essentials/minikql/jsonpath/ut/strict_ut.cpp
new file mode 100644
index 0000000000..c8414581e4
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/ut/strict_ut.cpp
@@ -0,0 +1,118 @@
+#include "test_base.h"
+class TJsonPathStrictTest : public TJsonPathTestBase {
+ TJsonPathStrictTest()
+ : TJsonPathTestBase()
+ {
+ }
+ UNIT_TEST_SUITE(TJsonPathStrictTest);
+ UNIT_TEST(TestRuntimeErrors);
+ UNIT_TEST(TestIncomparableTypes);
+ UNIT_TEST(TestLikeRegexPredicate);
+ UNIT_TEST(TestStartsWithPredicate);
+ void TestRuntimeErrors() {
+ const TVector<TRuntimeErrorTestCase> testCases = {
+ {R"([
+ {"key": 1},
+ {"key": 2}
+ ])", "$.key", C(TIssuesIds::JSONPATH_EXPECTED_OBJECT)},
+ {R"([
+ {"key": 1},
+ {"key": 2}
+ ])", "$.*", C(TIssuesIds::JSONPATH_EXPECTED_OBJECT)},
+ {R"({
+ "first": {"key": 1},
+ "second": []
+ })", "$.*.key", C(TIssuesIds::JSONPATH_EXPECTED_OBJECT)},
+ {R"({
+ "first": {"key": 1},
+ "second": []
+ })", "$.*.*", C(TIssuesIds::JSONPATH_EXPECTED_OBJECT)},
+ {R"({"another_key": 123})", "$.key", C(TIssuesIds::JSONPATH_MEMBER_NOT_FOUND)},
+ {R"([1, 2])", "$[*][0]", C(TIssuesIds::JSONPATH_EXPECTED_ARRAY)},
+ {R"([[1], 2, [3]])", "$[*][0]", C(TIssuesIds::JSONPATH_EXPECTED_ARRAY)},
+ {R"({
+ "idx": -1,
+ "array": [1, 2, 3]
+ })", "$.array[$.idx]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)},
+ {R"({
+ "from": -1,
+ "to": 3,
+ "array": [1, 2, 3]
+ })", "$.array[$.from to $.to]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)},
+ {R"({
+ "from": 0,
+ "to": -1,
+ "array": [1, 2, 3]
+ })", "$.array[$.from to $.to]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)},
+ {R"({
+ "from": -20,
+ "to": -10,
+ "array": [1, 2, 3]
+ })", "$.array[$.from to $.to]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)},
+ {R"([1, 2, 3, 4, 5])", "$[3 to 0]", C(TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX_RANGE)},
+ {R"([[1, 2], [3, 4, 5], []])", "$[*][2]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)},
+ {"[]", "$[last]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)},
+ {"[]", "$[last to 0]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : STRICT_MODES) {
+ RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error);
+ }
+ }
+ }
+ void TestIncomparableTypes() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {R"({
+ "left": [1, 2, "string"],
+ "right": [4, 5, 6]
+ })", "$.left < $.right", {"null"}},
+ {R"({
+ "left": ["string", 2, 3],
+ "right": [4, 5, 6]
+ })", "$.left < $.right", {"null"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : STRICT_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestLikeRegexPredicate() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {R"(["123", 123])", R"($[*] like_regex "[0-9]+")", {"null"}},
+ {R"([123, "123"])", R"($[*] like_regex "[0-9]+")", {"null"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : STRICT_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+ void TestStartsWithPredicate() {
+ const TVector<TMultiOutputTestCase> testCases = {
+ {R"(["a", "b", "c"])", R"("abcd" starts with $[*])", {"true"}},
+ {R"(["a", 1.45, 50])", R"("abcd" starts with $[*])", {"null"}},
+ {R"([1.45, 50, "a"])", R"("abcd" starts with $[*])", {"null"}},
+ {R"(["b", "c"])", R"("abcd" starts with $[*])", {"false"}},
+ };
+ for (const auto& testCase : testCases) {
+ for (const auto mode : STRICT_MODES) {
+ RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
+ }
+ }
+ }
+UNIT_TEST_SUITE_REGISTRATION(TJsonPathStrictTest); \ No newline at end of file
diff --git a/yql/essentials/minikql/jsonpath/ut/test_base.cpp b/yql/essentials/minikql/jsonpath/ut/test_base.cpp
new file mode 100644
index 0000000000..feceecddb1
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/ut/test_base.cpp
@@ -0,0 +1,167 @@
+#include "test_base.h"
+#include <yql/essentials/types/binary_json/write.h>
+using namespace NKikimr::NBinaryJson;
+ : FunctionRegistry(CreateFunctionRegistry(CreateBuiltinRegistry()))
+ , Alloc(__LOCATION__)
+ , Env(Alloc)
+ , MemInfo("Memory")
+ , HolderFactory(Alloc.Ref(), MemInfo, FunctionRegistry.Get())
+ , ValueBuilder(HolderFactory)
+TIssueCode TJsonPathTestBase::C(TIssuesIds::EIssueCode code) {
+ return static_cast<TIssueCode>(code);
+TUnboxedValue TJsonPathTestBase::ParseJson(TStringBuf raw) {
+ return TryParseJsonDom(raw, &ValueBuilder);
+void TJsonPathTestBase::RunTestCase(const TString& rawJson, const TString& rawJsonPath, const TVector<TString>& expectedResult) {
+ try {
+ const auto unboxedValueJson = TValue(ParseJson(rawJson));
+ const auto binaryJson = *SerializeToBinaryJson(rawJson);;
+ auto reader = TBinaryJsonReader::Make(binaryJson);
+ auto binaryJsonRoot = TValue(reader->GetRootCursor());
+ TIssues issues;
+ const TJsonPathPtr jsonPath = ParseJsonPath(rawJsonPath, issues, MAX_PARSE_ERRORS);
+ UNIT_ASSERT_C(issues.Empty(), "Parse errors found");
+ for (const auto& json : {unboxedValueJson, binaryJsonRoot}) {
+ const auto result = ExecuteJsonPath(jsonPath, json, TVariablesMap{}, &ValueBuilder);
+ UNIT_ASSERT_C(!result.IsError(), "Runtime errors found");
+ const auto& nodes = result.GetNodes();
+ UNIT_ASSERT_VALUES_EQUAL(nodes.size(), expectedResult.size());
+ for (size_t i = 0; i < nodes.size(); i++) {
+ const auto converted = nodes[i].ConvertToUnboxedValue(&ValueBuilder);
+ UNIT_ASSERT_VALUES_EQUAL(SerializeJsonDom(converted), expectedResult[i]);
+ }
+ }
+ } catch (...) {
+ TStringBuilder message;
+ message << "Exception: " << CurrentExceptionMessage() << Endl
+ << "Input JSON: " << rawJson << Endl
+ << "Jsonpath: " << rawJsonPath << Endl
+ << "Expected output:";
+ for (const auto& item : expectedResult) {
+ message << " " << item;
+ }
+ message << Endl;
+ UNIT_FAIL(message);
+ }
+void TJsonPathTestBase::RunParseErrorTestCase(const TString& rawJsonPath) {
+ try {
+ TIssues issues;
+ const TJsonPathPtr jsonPath = ParseJsonPath(rawJsonPath, issues, 2);
+ UNIT_ASSERT_C(!issues.Empty(), "Expected parse errors");
+ } catch (...) {
+ "Exception: " << CurrentExceptionMessage() << Endl
+ << "Jsonpath: " << rawJsonPath << Endl
+ );
+ }
+void TJsonPathTestBase::RunRuntimeErrorTestCase(const TString& rawJson, const TString& rawJsonPath, TIssueCode error) {
+ try {
+ const auto unboxedValueJson = TValue(ParseJson(rawJson));
+ const auto binaryJson = *SerializeToBinaryJson(rawJson);
+ auto reader = TBinaryJsonReader::Make(binaryJson);
+ auto binaryJsonRoot = TValue(reader->GetRootCursor());
+ TIssues issues;
+ const TJsonPathPtr jsonPath = ParseJsonPath(rawJsonPath, issues, MAX_PARSE_ERRORS);
+ UNIT_ASSERT_C(issues.Empty(), "Parse errors found");
+ for (const auto& json : {unboxedValueJson, binaryJsonRoot}) {
+ const auto result = ExecuteJsonPath(jsonPath, json, TVariablesMap{}, &ValueBuilder);
+ UNIT_ASSERT_C(result.IsError(), "Expected runtime error");
+ UNIT_ASSERT_VALUES_EQUAL(result.GetError().GetCode(), error);
+ }
+ } catch (...) {
+ TStringBuilder()
+ << "Exception: " << CurrentExceptionMessage() << Endl
+ << "Input JSON: " << rawJson << Endl
+ << "Jsonpath: " << rawJsonPath << Endl
+ << "Expected error: " << error << Endl
+ );
+ }
+void TJsonPathTestBase::RunVariablesTestCase(const TString& rawJson, const THashMap<TStringBuf, TStringBuf>& variables, const TString& rawJsonPath, const TVector<TString>& expectedResult) {
+ try {
+ const auto unboxedValueJson = TValue(ParseJson(rawJson));
+ const auto binaryJson = *SerializeToBinaryJson(rawJson);
+ auto reader = TBinaryJsonReader::Make(binaryJson);
+ auto binaryJsonRoot = TValue(reader->GetRootCursor());
+ TVariablesMap unboxedValueVariables;
+ for (const auto& it : variables) {
+ unboxedValueVariables[it.first] = TValue(ParseJson(it.second));
+ }
+ TVariablesMap binaryJsonVariables;
+ TVector<TBinaryJson> storage;
+ TVector<TBinaryJsonReaderPtr> readers;
+ storage.reserve(variables.size());
+ readers.reserve(variables.size());
+ for (const auto& it : variables) {
+ storage.push_back(*SerializeToBinaryJson(it.second));
+ readers.push_back(TBinaryJsonReader::Make(storage.back()));
+ binaryJsonVariables[it.first] = TValue(readers.back()->GetRootCursor());
+ }
+ TIssues issues;
+ const TJsonPathPtr jsonPath = ParseJsonPath(rawJsonPath, issues, MAX_PARSE_ERRORS);
+ UNIT_ASSERT_C(issues.Empty(), "Parse errors found");
+ TVector<std::pair<TValue, TVariablesMap>> testCases = {
+ {unboxedValueJson, unboxedValueVariables},
+ {binaryJsonRoot, binaryJsonVariables},
+ };
+ for (const auto& testCase : testCases) {
+ const auto result = ExecuteJsonPath(jsonPath, testCase.first, testCase.second, &ValueBuilder);
+ UNIT_ASSERT_C(!result.IsError(), "Runtime errors found");
+ const auto& nodes = result.GetNodes();
+ UNIT_ASSERT_VALUES_EQUAL(nodes.size(), expectedResult.size());
+ for (size_t i = 0; i < nodes.size(); i++) {
+ const auto converted = nodes[i].ConvertToUnboxedValue(&ValueBuilder);
+ UNIT_ASSERT_VALUES_EQUAL(SerializeJsonDom(converted), expectedResult[i]);
+ }
+ }
+ } catch (...) {
+ TStringBuilder message;
+ message << "Exception: " << CurrentExceptionMessage() << Endl
+ << "Input JSON: " << rawJson << Endl
+ << "Variables:" << Endl;
+ for (const auto& it : variables) {
+ message << "\t" << it.first << " = " << it.second;
+ }
+ message << Endl
+ << "Jsonpath: " << rawJsonPath << Endl
+ << "Expected output:";
+ for (const auto& item : expectedResult) {
+ message << " " << item;
+ }
+ message << Endl;
+ UNIT_FAIL(message);
+ }
diff --git a/yql/essentials/minikql/jsonpath/ut/test_base.h b/yql/essentials/minikql/jsonpath/ut/test_base.h
new file mode 100644
index 0000000000..59e654f290
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/ut/test_base.h
@@ -0,0 +1,75 @@
+#pragma once
+#include <yql/essentials/core/issue/protos/issue_id.pb.h>
+#include <yql/essentials/minikql/jsonpath/jsonpath.h>
+#include <yql/essentials/minikql/dom/json.h>
+#include <contrib/ydb/library/yql/minikql/computation/mkql_value_builder.h>
+#include <contrib/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h>
+#include <contrib/ydb/library/yql/minikql/invoke_builtins/mkql_builtins.h>
+#include <contrib/ydb/library/yql/minikql/mkql_mem_info.h>
+#include <contrib/ydb/library/yql/minikql/mkql_function_registry.h>
+#include <contrib/ydb/library/yql/minikql/mkql_alloc.h>
+#include <contrib/ydb/library/yql/minikql/mkql_node.h>
+#include <library/cpp/json/json_reader.h>
+#include <library/cpp/testing/unittest/registar.h>
+#include <util/generic/yexception.h>
+using namespace NYql;
+using namespace NYql::NDom;
+using namespace NYql::NUdf;
+using namespace NYql::NJsonPath;
+using namespace NJson;
+using namespace NKikimr::NMiniKQL;
+class TJsonPathTestBase: public TTestBase {
+ TJsonPathTestBase();
+ const TVector<TStringBuf> LAX_MODES = {"", "lax "};
+ const TVector<TStringBuf> STRICT_MODES = {"strict "};
+ const TVector<TStringBuf> ALL_MODES = {"", "lax ", "strict "};
+ TIntrusivePtr<IFunctionRegistry> FunctionRegistry;
+ TScopedAlloc Alloc;
+ TTypeEnvironment Env;
+ TMemoryUsageInfo MemInfo;
+ THolderFactory HolderFactory;
+ TDefaultValueBuilder ValueBuilder;
+ const int MAX_PARSE_ERRORS = 100;
+ TIssueCode C(TIssuesIds::EIssueCode code);
+ TUnboxedValue ParseJson(TStringBuf raw);
+ struct TMultiOutputTestCase {
+ TString Json;
+ TString JsonPath;
+ TVector<TString> Result;
+ };
+ void RunTestCase(const TString& rawJson, const TString& rawJsonPath, const TVector<TString>& expectedResult);
+ void RunParseErrorTestCase(const TString& rawJsonPath);
+ struct TRuntimeErrorTestCase {
+ TString Json;
+ TString JsonPath;
+ TIssueCode Error;
+ };
+ void RunRuntimeErrorTestCase(const TString& rawJson, const TString& rawJsonPath, TIssueCode error);
+ struct TVariablesTestCase {
+ TString Json;
+ THashMap<TStringBuf, TStringBuf> Variables;
+ TString JsonPath;
+ TVector<TString> Result;
+ };
+ void RunVariablesTestCase(const TString& rawJson, const THashMap<TStringBuf, TStringBuf>& variables, const TString& rawJsonPath, const TVector<TString>& expectedResult);
diff --git a/yql/essentials/minikql/jsonpath/ut/ya.make b/yql/essentials/minikql/jsonpath/ut/ya.make
new file mode 100644
index 0000000000..0da935241b
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/ut/ya.make
@@ -0,0 +1,28 @@
+ common_ut.cpp
+ examples_ut.cpp
+ lax_ut.cpp
+ strict_ut.cpp
+ test_base.cpp
+ lib_id_ut.cpp
+ library/cpp/json
+ yql/essentials/types/binary_json
+ contrib/ydb/library/yql/minikql
+ contrib/ydb/library/yql/minikql/computation/llvm14
+ yql/essentials/minikql/dom
+ contrib/ydb/library/yql/minikql/invoke_builtins/llvm14
+ yql/essentials/public/udf/service/exception_policy
+ yql/essentials/core/issue/protos
+ contrib/ydb/library/yql/sql/pg_dummy
diff --git a/yql/essentials/minikql/jsonpath/value.cpp b/yql/essentials/minikql/jsonpath/value.cpp
new file mode 100644
index 0000000000..356543baf8
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/value.cpp
@@ -0,0 +1,383 @@
+#include "value.h"
+#include <yql/essentials/minikql/dom/node.h>
+namespace NYql::NJsonPath {
+using namespace NUdf;
+using namespace NDom;
+using namespace NKikimr;
+using namespace NKikimr::NBinaryJson;
+ : Iterator(TEmptyMarker())
+TArrayIterator::TArrayIterator(const TUnboxedValue& iterator)
+ : Iterator(iterator)
+TArrayIterator::TArrayIterator(TUnboxedValue&& iterator)
+ : Iterator(std::move(iterator))
+TArrayIterator::TArrayIterator(const NBinaryJson::TArrayIterator& iterator)
+ : Iterator(iterator)
+TArrayIterator::TArrayIterator(NBinaryJson::TArrayIterator&& iterator)
+ : Iterator(std::move(iterator))
+bool TArrayIterator::Next(TValue& value) {
+ if (std::holds_alternative<TEmptyMarker>(Iterator)) {
+ return false;
+ } else if (auto* iterator = std::get_if<NBinaryJson::TArrayIterator>(&Iterator)) {
+ if (!iterator->HasNext()) {
+ return false;
+ }
+ value = TValue(iterator->Next());
+ return true;
+ } else if (auto* iterator = std::get_if<TUnboxedValue>(&Iterator)) {
+ TUnboxedValue result;
+ const bool success = iterator->Next(result);
+ if (success) {
+ value = TValue(result);
+ }
+ return success;
+ } else {
+ Y_ABORT("Unexpected variant case in Next");
+ }
+ : Iterator(TEmptyMarker())
+TObjectIterator::TObjectIterator(const TUnboxedValue& iterator)
+ : Iterator(iterator)
+TObjectIterator::TObjectIterator(TUnboxedValue&& iterator)
+ : Iterator(std::move(iterator))
+TObjectIterator::TObjectIterator(const NBinaryJson::TObjectIterator& iterator)
+ : Iterator(iterator)
+TObjectIterator::TObjectIterator(NBinaryJson::TObjectIterator&& iterator)
+ : Iterator(std::move(iterator))
+bool TObjectIterator::Next(TValue& key, TValue& value) {
+ if (std::holds_alternative<TEmptyMarker>(Iterator)) {
+ return false;
+ } else if (auto* iterator = std::get_if<NBinaryJson::TObjectIterator>(&Iterator)) {
+ if (!iterator->HasNext()) {
+ return false;
+ }
+ const auto [itKey, itValue] = iterator->Next();
+ key = TValue(itKey);
+ value = TValue(itValue);
+ return true;
+ } else if (auto* iterator = std::get_if<TUnboxedValue>(&Iterator)) {
+ TUnboxedValue itKey;
+ TUnboxedValue itValue;
+ const bool success = iterator->NextPair(itKey, itValue);
+ if (success) {
+ key = TValue(itKey);
+ value = TValue(itValue);
+ }
+ return success;
+ } else {
+ Y_ABORT("Unexpected variant case in Next");
+ }
+ : Value(MakeEntity())
+TValue::TValue(const TUnboxedValue& value)
+ : Value(value)
+TValue::TValue(TUnboxedValue&& value)
+ : Value(std::move(value))
+TValue::TValue(const TEntryCursor& value)
+ : Value(value)
+ UnpackInnerValue();
+TValue::TValue(TEntryCursor&& value)
+ : Value(std::move(value))
+ UnpackInnerValue();
+TValue::TValue(const TContainerCursor& value)
+ : Value(value)
+ UnpackInnerValue();
+TValue::TValue(TContainerCursor&& value)
+ : Value(std::move(value))
+ UnpackInnerValue();
+EValueType TValue::GetType() const {
+ if (const auto* value = std::get_if<TEntryCursor>(&Value)) {
+ switch (value->GetType()) {
+ case EEntryType::BoolFalse:
+ case EEntryType::BoolTrue:
+ return EValueType::Bool;
+ case EEntryType::Null:
+ return EValueType::Null;
+ case EEntryType::Number:
+ return EValueType::Number;
+ case EEntryType::String:
+ return EValueType::String;
+ case EEntryType::Container:
+ Y_ABORT("Logical error: TEntryCursor with Container type must be converted to TContainerCursor");
+ }
+ } else if (const auto* value = std::get_if<TContainerCursor>(&Value)) {
+ switch (value->GetType()) {
+ case EContainerType::Array:
+ return EValueType::Array;
+ case EContainerType::Object:
+ return EValueType::Object;
+ case EContainerType::TopLevelScalar:
+ Y_ABORT("Logical error: TContainerCursor with TopLevelScalar type must be converted to TEntryCursor");
+ }
+ } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) {
+ switch (GetNodeType(*value)) {
+ case ENodeType::Bool:
+ return EValueType::Bool;
+ case ENodeType::Double:
+ case ENodeType::Int64:
+ case ENodeType::Uint64:
+ return EValueType::Number;
+ case ENodeType::Dict:
+ case ENodeType::Attr:
+ return EValueType::Object;
+ case ENodeType::List:
+ return EValueType::Array;
+ case ENodeType::String:
+ return EValueType::String;
+ case ENodeType::Entity:
+ return EValueType::Null;
+ }
+ } else {
+ Y_ABORT("Unexpected variant case in GetType");
+ }
+bool TValue::Is(EValueType type) const {
+ return GetType() == type;
+bool TValue::IsBool() const {
+ return Is(EValueType::Bool);
+bool TValue::IsNumber() const {
+ return Is(EValueType::Number);
+bool TValue::IsString() const {
+ return Is(EValueType::String);
+bool TValue::IsNull() const {
+ return Is(EValueType::Null);
+bool TValue::IsObject() const {
+ return Is(EValueType::Object);
+bool TValue::IsArray() const {
+ return Is(EValueType::Array);
+double TValue::GetNumber() const {
+ if (const auto* value = std::get_if<TEntryCursor>(&Value)) {
+ return value->GetNumber();
+ } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) {
+ if (IsNodeType(*value, ENodeType::Double)) {
+ return value->Get<double>();
+ } else if (IsNodeType(*value, ENodeType::Int64)) {
+ return static_cast<double>(value->Get<i64>());
+ } else {
+ return static_cast<double>(value->Get<ui64>());
+ }
+ } else {
+ Y_ABORT("Unexpected variant case in GetNumber");
+ }
+bool TValue::GetBool() const {
+ if (const auto* value = std::get_if<TEntryCursor>(&Value)) {
+ return value->GetType() == EEntryType::BoolTrue;
+ } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) {
+ return value->Get<bool>();
+ } else {
+ Y_ABORT("Unexpected variant case in GetBool");
+ }
+const TStringBuf TValue::GetString() const {
+ if (const auto* value = std::get_if<TEntryCursor>(&Value)) {
+ return value->GetString();
+ } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) {
+ return value->AsStringRef();
+ } else {
+ Y_ABORT("Unexpected variant case in GetString");
+ }
+ui32 TValue::GetSize() const {
+ Y_DEBUG_ABORT_UNLESS(IsArray() || IsObject());
+ if (const auto* value = std::get_if<TContainerCursor>(&Value)) {
+ return value->GetSize();
+ } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) {
+ if (value->IsEmbedded()) {
+ return 0;
+ }
+ if (IsNodeType(*value, ENodeType::List)) {
+ return value->GetListLength();
+ } else {
+ return value->GetDictLength();
+ }
+ } else {
+ Y_ABORT("Unexpected variant case in GetSize");
+ }
+TValue TValue::GetElement(ui32 index) const {
+ if (const auto* value = std::get_if<TContainerCursor>(&Value)) {
+ return TValue(value->GetElement(index));
+ } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) {
+ return TValue(value->Lookup(TUnboxedValuePod(index)));
+ } else {
+ Y_ABORT("Unexpected variant case in GetElement");
+ }
+TArrayIterator TValue::GetArrayIterator() const {
+ if (const auto* value = std::get_if<TContainerCursor>(&Value)) {
+ return TArrayIterator(value->GetArrayIterator());
+ } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) {
+ if (value->IsEmbedded()) {
+ return TArrayIterator();
+ }
+ return TArrayIterator(value->GetListIterator());
+ } else {
+ Y_ABORT("Unexpected variant case in GetArrayIterator");
+ }
+TMaybe<TValue> TValue::Lookup(const TStringBuf key) const {
+ if (const auto* value = std::get_if<TContainerCursor>(&Value)) {
+ const auto payload = value->Lookup(key);
+ if (!payload.Defined()) {
+ return Nothing();
+ }
+ return TValue(*payload);
+ } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) {
+ if (value->IsEmbedded()) {
+ return Nothing();
+ }
+ // Lookup on TUnboxedValue can be performed only with TUnboxedValue key.
+ // To avoid allocating new string we use our custom Lookup method defined
+ // on underlying TMapNode that accepts TStringRef
+ const auto* dict = static_cast<const TMapNode*>(value->AsBoxed().Get());
+ if (const auto payload = dict->Lookup(key)) {
+ return {TValue(payload)};
+ } else {
+ return Nothing();
+ }
+ } else {
+ Y_ABORT("Unexpected variant case in Lookup");
+ }
+TObjectIterator TValue::GetObjectIterator() const {
+ if (const auto* value = std::get_if<TContainerCursor>(&Value)) {
+ return TObjectIterator(value->GetObjectIterator());
+ } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) {
+ if (value->IsEmbedded()) {
+ return TObjectIterator();
+ }
+ return TObjectIterator(value->GetDictIterator());
+ } else {
+ Y_ABORT("Unexpected variant case in GetObjectIterator");
+ }
+TUnboxedValue TValue::ConvertToUnboxedValue(const NUdf::IValueBuilder* valueBuilder) const {
+ if (const auto* value = std::get_if<TEntryCursor>(&Value)) {
+ return ReadElementToJsonDom(*value, valueBuilder);
+ } else if (const auto* value = std::get_if<TContainerCursor>(&Value)) {
+ return ReadContainerToJsonDom(*value, valueBuilder);
+ } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) {
+ return *value;
+ } else {
+ Y_ABORT("Unexpected variant case in ConvertToUnboxedValue");
+ }
+void TValue::UnpackInnerValue() {
+ // If TEntryCursor points to container, we need to extract TContainerCursor
+ if (const auto* value = std::get_if<TEntryCursor>(&Value)) {
+ if (value->GetType() == EEntryType::Container) {
+ Value = value->GetContainer();
+ }
+ }
+ // If TContainerCursor points to top level scalar, we need to extract TEntryCursor
+ if (const auto* value = std::get_if<TContainerCursor>(&Value)) {
+ if (value->GetType() == EContainerType::TopLevelScalar) {
+ Value = value->GetElement(0);
+ }
+ }
diff --git a/yql/essentials/minikql/jsonpath/value.h b/yql/essentials/minikql/jsonpath/value.h
new file mode 100644
index 0000000000..ca663ad5c4
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/value.h
@@ -0,0 +1,101 @@
+#pragma once
+#include <yql/essentials/types/binary_json/read.h>
+#include <yql/essentials/public/udf/udf_value.h>
+#include <util/generic/maybe.h>
+#include <variant>
+namespace NYql::NJsonPath {
+enum class EValueType {
+ Bool = 0,
+ Number = 1,
+ String = 2,
+ Null = 4,
+ Object = 5,
+ Array = 6,
+struct TEmptyMarker {
+class TValue;
+class TArrayIterator {
+ TArrayIterator();
+ explicit TArrayIterator(const NUdf::TUnboxedValue& iterator);
+ explicit TArrayIterator(NUdf::TUnboxedValue&& iterator);
+ explicit TArrayIterator(const NKikimr::NBinaryJson::TArrayIterator& iterator);
+ explicit TArrayIterator(NKikimr::NBinaryJson::TArrayIterator&& iterator);
+ bool Next(TValue& value);
+ std::variant<TEmptyMarker, NUdf::TUnboxedValue, NKikimr::NBinaryJson::TArrayIterator> Iterator;
+class TObjectIterator {
+ TObjectIterator();
+ explicit TObjectIterator(const NUdf::TUnboxedValue& iterator);
+ explicit TObjectIterator(NUdf::TUnboxedValue&& iterator);
+ explicit TObjectIterator(const NKikimr::NBinaryJson::TObjectIterator& iterator);
+ explicit TObjectIterator(NKikimr::NBinaryJson::TObjectIterator&& iterator);
+ bool Next(TValue& key, TValue& value);
+ std::variant<TEmptyMarker, NUdf::TUnboxedValue, NKikimr::NBinaryJson::TObjectIterator> Iterator;
+class TValue {
+ TValue();
+ explicit TValue(const NUdf::TUnboxedValue& value);
+ explicit TValue(NUdf::TUnboxedValue&& value);
+ explicit TValue(const NKikimr::NBinaryJson::TEntryCursor& value);
+ explicit TValue(NKikimr::NBinaryJson::TEntryCursor&& value);
+ explicit TValue(const NKikimr::NBinaryJson::TContainerCursor& value);
+ explicit TValue(NKikimr::NBinaryJson::TContainerCursor&& value);
+ EValueType GetType() const;
+ bool Is(EValueType type) const;
+ bool IsBool() const;
+ bool IsNumber() const;
+ bool IsString() const;
+ bool IsNull() const;
+ bool IsObject() const;
+ bool IsArray() const;
+ // Scalar value methods
+ double GetNumber() const;
+ bool GetBool() const;
+ const TStringBuf GetString() const;
+ ui32 GetSize() const;
+ // Array methods
+ TValue GetElement(ui32 index) const;
+ TArrayIterator GetArrayIterator() const;
+ // Object methods
+ TMaybe<TValue> Lookup(const TStringBuf key) const;
+ TObjectIterator GetObjectIterator() const;
+ NUdf::TUnboxedValue ConvertToUnboxedValue(const NUdf::IValueBuilder* valueBuilder) const;
+ void UnpackInnerValue();
+ std::variant<NUdf::TUnboxedValue, NKikimr::NBinaryJson::TEntryCursor, NKikimr::NBinaryJson::TContainerCursor> Value;
diff --git a/yql/essentials/minikql/jsonpath/ya.make b/yql/essentials/minikql/jsonpath/ya.make
new file mode 100644
index 0000000000..3ae29b36cf
--- /dev/null
+++ b/yql/essentials/minikql/jsonpath/ya.make
@@ -0,0 +1,63 @@
+ 2
+ 27
+ 0
+IF (ARCH_X86_64)
+ )
+ yql/essentials/minikql/jsonpath/rewrapper/hyperscan
+ )
+ )
+ contrib/libs/double-conversion
+ library/cpp/json
+ yql/essentials/minikql/jsonpath/rewrapper/re2
+ yql/essentials/minikql/jsonpath/rewrapper
+ yql/essentials/types/binary_json
+ yql/essentials/minikql/dom
+ yql/essentials/public/issue
+ yql/essentials/public/udf
+ yql/essentials/ast
+ yql/essentials/utils
+ yql/essentials/core/issue/protos
+ yql/essentials/parser/proto_ast/antlr3
+ yql/essentials/parser/proto_ast/gen/jsonpath
+ ast_builder.cpp
+ ast_nodes.cpp
+ binary.cpp
+ executor.cpp
+ jsonpath.cpp
+ parse_double.cpp
+ type_check.cpp
+ value.cpp
+ benchmark
+ rewrapper
+ ut
diff --git a/yql/essentials/minikql/ya.make b/yql/essentials/minikql/ya.make
new file mode 100644
index 0000000000..e45eb4a543
--- /dev/null
+++ b/yql/essentials/minikql/ya.make
@@ -0,0 +1,6 @@
+ dom
+ jsonpath