summaryrefslogtreecommitdiffstats
path: root/yql/essentials/udfs/common/json2/sql_query.h
diff options
context:
space:
mode:
authorimunkin <[email protected]>2024-11-08 10:00:23 +0300
committerimunkin <[email protected]>2024-11-08 10:12:13 +0300
commita784a2f943d6e15caa6241e2e96d80aac6dbf375 (patch)
tree05f1e5366c916b988a8afb75bdab8ddeee0f6e6d /yql/essentials/udfs/common/json2/sql_query.h
parentd70137a7b530ccaa52834274913bbb5a3d1ca06e (diff)
Move yql/udfs/common/ to /yql/essentials YQL-19206
Except the following directories: * clickhouse/client * datetime * knn * roaring commit_hash:c7da95636144d28db109d6b17ddc762e9bacb59f
Diffstat (limited to 'yql/essentials/udfs/common/json2/sql_query.h')
-rw-r--r--yql/essentials/udfs/common/json2/sql_query.h184
1 files changed, 184 insertions, 0 deletions
diff --git a/yql/essentials/udfs/common/json2/sql_query.h b/yql/essentials/udfs/common/json2/sql_query.h
new file mode 100644
index 00000000000..cb3bafd3b0b
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/sql_query.h
@@ -0,0 +1,184 @@
+#pragma once
+
+#include "resource.h"
+#include "compile_path.h"
+
+#include <yql/essentials/core/sql_types/yql_atom_enums.h>
+#include <yql/essentials/public/udf/udf_type_builder.h>
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_helpers.h>
+#include <yql/essentials/minikql/dom/node.h>
+
+#include <util/generic/yexception.h>
+
+namespace NJson2Udf {
+ using namespace NKikimr;
+ using namespace NUdf;
+ using namespace NYql;
+ using namespace NDom;
+ using namespace NJsonPath;
+
+ template <EDataSlot InputType, EJsonQueryWrap Mode>
+ class TSqlQuery: public TBoxedValue {
+ public:
+ explicit TSqlQuery(TSourcePosition pos)
+ : Pos_(pos)
+ {
+ }
+
+ static TStringRef Name();
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (name != Name()) {
+ return false;
+ }
+
+ auto jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME);
+ auto optionalJsonType = builder.Optional()->Item(jsonType).Build();
+ TType* inputType = nullptr;
+ if constexpr (InputType == EDataSlot::JsonDocument) {
+ inputType = builder.SimpleType<TJsonDocument>();
+ } else {
+ inputType = jsonType;
+ }
+ auto inputOptionalType = builder.Optional()->Item(inputType).Build();
+ auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME);
+ auto dictType = builder.Dict()->Key<TUtf8>().Value(jsonType).Build();
+
+ /*
+ Arguments:
+ 0. Resource<JsonNode>? or JsonDocument?. Input json
+ 1. Resource<JsonPath>. Jsonpath to execute on json
+ 2. Dict<TUtf8, Resource<JsonNode>>. Variables to pass into jsonpath
+ 3. Bool. True - throw on empty result, false otherwise
+ 4. Resource<JsonNode>?. Default value to return on empty result. Ignored if 2d argument is true
+ 5. Bool. True - throw on error, false - otherwise
+ 6. Resource<JsonNode>?. Default value to return on error. Ignored if 4th argument is true
+ */
+ // we can't mark TSqlQuery as strict due to runtime throw policy setting
+ // TODO: optimizer can mark SqlQuery as strict if 3th/5th arguments are literal booleans
+ builder.Args()
+ ->Add(inputOptionalType)
+ .Add(jsonPathType)
+ .Add(dictType)
+ .Add<bool>()
+ .Add(optionalJsonType)
+ .Add<bool>()
+ .Add(optionalJsonType)
+ .Done()
+ .Returns(optionalJsonType);
+
+ if (!typesOnly) {
+ builder.Implementation(new TSqlQuery(builder.GetSourcePosition()));
+ }
+ return true;
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const final {
+ Y_UNUSED(valueBuilder);
+ try {
+ if (!args[0].HasValue()) {
+ return TUnboxedValuePod();
+ }
+
+ TValue jsonDom;
+ if constexpr (InputType == EDataSlot::JsonDocument) {
+ jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor());
+ } else {
+ jsonDom = TValue(args[0]);
+ }
+
+ auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get());
+ const auto& jsonPath = *jsonPathResource->Get();
+
+ const bool throwOnEmpty = args[3].Get<bool>();
+ const auto emptyDefault = args[4];
+ const bool throwOnError = args[5].Get<bool>();
+ const auto errorDefault = args[6];
+ const auto variables = DictToVariables(args[2]);
+
+ auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder);
+
+ const auto handleCase = [](TStringBuf message, bool throws, const TUnboxedValuePod& caseDefault) {
+ if (throws) {
+ ythrow yexception() << message;
+ }
+ return caseDefault;
+ };
+
+ if (result.IsError()) {
+ return handleCase(TStringBuilder() << "Error executing jsonpath:" << Endl << result.GetError() << Endl, throwOnError, errorDefault);
+ }
+
+ auto& nodes = result.GetNodes();
+ const bool isSingleStruct = nodes.size() == 1 && (nodes[0].Is(EValueType::Array) || nodes[0].Is(EValueType::Object));
+ if (Mode == EJsonQueryWrap::Wrap || (Mode == EJsonQueryWrap::ConditionalWrap && !isSingleStruct)) {
+ TVector<TUnboxedValue> converted;
+ converted.reserve(nodes.size());
+ for (auto& node : nodes) {
+ converted.push_back(node.ConvertToUnboxedValue(valueBuilder));
+ }
+ return MakeList(converted.data(), converted.size(), valueBuilder);
+ }
+
+ if (nodes.empty()) {
+ return handleCase("Empty result", throwOnEmpty, emptyDefault);
+ }
+
+ // No wrapping is applicable and result is not empty. Result must be a single object or array
+ if (nodes.size() > 1) {
+ return handleCase("Result consists of multiple items", throwOnError, errorDefault);
+ }
+
+ if (!nodes[0].Is(EValueType::Array) && !nodes[0].Is(EValueType::Object)) {
+ return handleCase("Result is neither object nor array", throwOnError, errorDefault);
+ }
+
+ return nodes[0].ConvertToUnboxedValue(valueBuilder);
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ TSourcePosition Pos_;
+ };
+
+ template <>
+ TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::NoWrap>::Name() {
+ return "SqlQuery";
+ }
+
+ template <>
+ TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::Wrap>::Name() {
+ return "SqlQueryWrap";
+ }
+
+ template <>
+ TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::ConditionalWrap>::Name() {
+ return "SqlQueryConditionalWrap";
+ }
+
+ template <>
+ TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::NoWrap>::Name() {
+ return "JsonDocumentSqlQuery";
+ }
+
+ template <>
+ TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::Wrap>::Name() {
+ return "JsonDocumentSqlQueryWrap";
+ }
+
+ template <>
+ TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::ConditionalWrap>::Name() {
+ return "JsonDocumentSqlQueryConditionalWrap";
+ }
+}
+