diff options
author | aidarsamer <aidarsamer@ydb.tech> | 2023-05-12 17:28:33 +0300 |
---|---|---|
committer | aidarsamer <aidarsamer@ydb.tech> | 2023-05-12 17:28:33 +0300 |
commit | 9629a1cbefcb838e4801ad8a22944c8e81c8f0c9 (patch) | |
tree | cdaea314bae5d64f904bd2834748ba5ac8b238c9 | |
parent | 0ab536455399f2da26b3e92917c0eda5436f0ae8 (diff) | |
download | ydb-9629a1cbefcb838e4801ad8a22944c8e81c8f0c9.tar.gz |
KIKIMr-15068: Move JSON_EXISTS and JSON_VALUE expand to UDFs to peephole optimizers
24 files changed, 783 insertions, 719 deletions
diff --git a/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt b/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt index ac4ece79a0a..0d26cb8fc26 100644 --- a/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt @@ -26,6 +26,7 @@ target_link_libraries(kqp-ut-common PUBLIC cpp-client-ydb_topic ) target_sources(kqp-ut-common PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/json2_udf.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/kqp_ut_common.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp ) diff --git a/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt b/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt index 84aa8d90e3b..dd0675d5bdf 100644 --- a/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt +++ b/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt @@ -27,6 +27,7 @@ target_link_libraries(kqp-ut-common PUBLIC cpp-client-ydb_topic ) target_sources(kqp-ut-common PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/json2_udf.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/kqp_ut_common.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp ) diff --git a/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt b/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt index 84aa8d90e3b..dd0675d5bdf 100644 --- a/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt +++ b/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt @@ -27,6 +27,7 @@ target_link_libraries(kqp-ut-common PUBLIC cpp-client-ydb_topic ) target_sources(kqp-ut-common PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/json2_udf.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/kqp_ut_common.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp ) diff --git a/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt b/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt index ac4ece79a0a..0d26cb8fc26 100644 --- a/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt +++ b/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt @@ -26,6 +26,7 @@ target_link_libraries(kqp-ut-common PUBLIC cpp-client-ydb_topic ) target_sources(kqp-ut-common PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/json2_udf.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/kqp_ut_common.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp ) diff --git a/ydb/services/ydb/ut/json_udf.cpp b/ydb/core/kqp/ut/common/json2_udf.cpp index 704eb5c9e47..2d6c08ab968 100644 --- a/ydb/services/ydb/ut/json_udf.cpp +++ b/ydb/core/kqp/ut/common/json2_udf.cpp @@ -1,5 +1,9 @@ #include <ydb/library/yql/udfs/common/json2/json2_udf.cpp> +namespace NKikimr::NKqp { + NYql::NUdf::TUniquePtr<NYql::NUdf::IUdfModule> CreateJson2Module() { return new NJson2Udf::TJson2Module(); } + +} // namespace NKikimr::NKqp
\ No newline at end of file diff --git a/ydb/core/kqp/ut/common/kqp_ut_common.cpp b/ydb/core/kqp/ut/common/kqp_ut_common.cpp index 164122bac9a..788fae83fd8 100644 --- a/ydb/core/kqp/ut/common/kqp_ut_common.cpp +++ b/ydb/core/kqp/ut/common/kqp_ut_common.cpp @@ -50,12 +50,14 @@ SIMPLE_UDF(TRandString, char*(ui32)) { } SIMPLE_MODULE(TTestUdfsModule, TTestFilter, TTestFilterTerminate, TRandString); +NYql::NUdf::TUniquePtr<NYql::NUdf::IUdfModule> CreateJson2Module(); NYql::NUdf::TUniquePtr<NYql::NUdf::IUdfModule> CreateRe2Module(); NMiniKQL::IFunctionRegistry* UdfFrFactory(const NScheme::TTypeRegistry& typeRegistry) { Y_UNUSED(typeRegistry); auto funcRegistry = NMiniKQL::CreateFunctionRegistry(NMiniKQL::CreateBuiltinRegistry())->Clone(); funcRegistry->AddModule("", "TestUdfs", new TTestUdfsModule()); + funcRegistry->AddModule("", "Json2", CreateJson2Module()); funcRegistry->AddModule("", "Re2", CreateRe2Module()); NKikimr::NMiniKQL::FillStaticModules(*funcRegistry); return funcRegistry.Release(); diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp index 3bd25b69ec2..258dd45e120 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp @@ -4438,6 +4438,44 @@ Y_UNIT_TEST_SUITE(KqpOlap) { TestTableWithNulls({ testCase }); } + Y_UNIT_TEST(Json_GetValue) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT id, JSON_VALUE(jsonval, "$.col1"), JSON_VALUE(jsondoc, "$.col1") FROM `/Root/tableWithNulls` + WHERE + level = 1; + )") + .SetExpectedReply(R"([[1;["val1"];#]])"); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Json_Exists) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT id, JSON_EXISTS(jsonval, "$.col1"), JSON_EXISTS(jsondoc, "$.col1") FROM `/Root/tableWithNulls` + WHERE + level = 1; + )") + .SetExpectedReply(R"([[1;[%true];#]])"); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Json_Query) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT id, JSON_QUERY(jsonval, "$.col1" WITH UNCONDITIONAL WRAPPER), + JSON_QUERY(jsondoc, "$.col1" WITH UNCONDITIONAL WRAPPER) + FROM `/Root/tableWithNulls` + WHERE + level = 1; + )") + .SetExpectedReply(R"([[1;["[\"val1\"]"];#]])"); + + TestTableWithNulls({ testCase }); + } + Y_UNIT_TEST(Olap_InsertFails) { auto settings = TKikimrSettings() .SetWithSampleTables(false) diff --git a/ydb/core/testlib/cs_helper.cpp b/ydb/core/testlib/cs_helper.cpp index d3edd61a252..ae234a1859c 100644 --- a/ydb/core/testlib/cs_helper.cpp +++ b/ydb/core/testlib/cs_helper.cpp @@ -357,7 +357,9 @@ std::shared_ptr<arrow::Schema> TTableWithNullsHelper::GetArrowSchema() { arrow::field("id", arrow::int32()), arrow::field("resource_id", arrow::utf8()), arrow::field("level", arrow::int32()), - arrow::field("binary_str", arrow::binary()) + arrow::field("binary_str", arrow::binary()), + arrow::field("jsonval", arrow::utf8()), + arrow::field("jsondoc", arrow::binary()) }); } @@ -369,36 +371,46 @@ std::shared_ptr<arrow::RecordBatch> TTableWithNullsHelper::TestArrowBatch(ui64, rowCount = 10; std::shared_ptr<arrow::Schema> schema = GetArrowSchema(); - arrow::Int32Builder b1; - arrow::StringBuilder b2; - arrow::Int32Builder b3; - arrow::StringBuilder b4; + arrow::Int32Builder bId; + arrow::StringBuilder bResourceId; + arrow::Int32Builder bLevel; + arrow::StringBuilder bBinaryStr; + arrow::StringBuilder bJsonVal; + arrow::StringBuilder bJsonDoc; for (size_t i = 1; i <= rowCount / 2; ++i) { - Y_VERIFY(b1.Append(i).ok()); - Y_VERIFY(b2.AppendNull().ok()); - Y_VERIFY(b3.Append(i).ok()); - Y_VERIFY(b4.AppendNull().ok()); + Y_VERIFY(bId.Append(i).ok()); + Y_VERIFY(bResourceId.AppendNull().ok()); + Y_VERIFY(bLevel.Append(i).ok()); + Y_VERIFY(bBinaryStr.AppendNull().ok()); + Y_VERIFY(bJsonVal.Append(std::string(R"({"col1": "val1", "obj": {"obj_col2": "val2"}})")).ok()); + Y_VERIFY(bJsonDoc.AppendNull().ok()); } for (size_t i = rowCount / 2 + 1; i <= rowCount; ++i) { - Y_VERIFY(b1.Append(i).ok()); - Y_VERIFY(b2.Append(std::to_string(i)).ok()); - Y_VERIFY(b3.AppendNull().ok()); - Y_VERIFY(b4.Append(std::to_string(i)).ok()); + Y_VERIFY(bId.Append(i).ok()); + Y_VERIFY(bResourceId.Append(std::to_string(i)).ok()); + Y_VERIFY(bLevel.AppendNull().ok()); + Y_VERIFY(bBinaryStr.Append(std::to_string(i)).ok()); + Y_VERIFY(bJsonVal.AppendNull().ok()); + Y_VERIFY(bJsonDoc.Append(std::string(R"({"col1": "val1", "obj": {"obj_col2": "val2"}})")).ok()); } - std::shared_ptr<arrow::Int32Array> a1; - std::shared_ptr<arrow::StringArray> a2; - std::shared_ptr<arrow::Int32Array> a3; - std::shared_ptr<arrow::StringArray> a4; - - Y_VERIFY(b1.Finish(&a1).ok()); - Y_VERIFY(b2.Finish(&a2).ok()); - Y_VERIFY(b3.Finish(&a3).ok()); - Y_VERIFY(b4.Finish(&a4).ok()); - - return arrow::RecordBatch::Make(schema, rowCount, { a1, a2, a3, a4 }); + std::shared_ptr<arrow::Int32Array> aId; + std::shared_ptr<arrow::StringArray> aResourceId; + std::shared_ptr<arrow::Int32Array> aLevel; + std::shared_ptr<arrow::StringArray> aBinaryStr; + std::shared_ptr<arrow::StringArray> aJsonVal; + std::shared_ptr<arrow::StringArray> aJsonDoc; + + Y_VERIFY(bId.Finish(&aId).ok()); + Y_VERIFY(bResourceId.Finish(&aResourceId).ok()); + Y_VERIFY(bLevel.Finish(&aLevel).ok()); + Y_VERIFY(bBinaryStr.Finish(&aBinaryStr).ok()); + Y_VERIFY(bJsonVal.Finish(&aJsonVal).ok()); + Y_VERIFY(bJsonDoc.Finish(&aJsonDoc).ok()); + + return arrow::RecordBatch::Make(schema, rowCount, { aId, aResourceId, aLevel, aBinaryStr, aJsonVal, aJsonDoc }); } } diff --git a/ydb/core/testlib/cs_helper.h b/ydb/core/testlib/cs_helper.h index cc76cb33976..f2022674160 100644 --- a/ydb/core/testlib/cs_helper.h +++ b/ydb/core/testlib/cs_helper.h @@ -183,6 +183,8 @@ public: Columns { Name: "resource_id" Type: "Utf8" } Columns { Name: "level" Type: "Int32" } Columns { Name: "binary_str" Type: "String" } + Columns { Name: "jsonval" Type: "Json" } + Columns { Name: "jsondoc" Type: "JsonDocument" } KeyColumnNames: "id" )"; diff --git a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp index 189d86adfa5..524a38a3f31 100644 --- a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp @@ -3200,113 +3200,6 @@ TExprNode::TPtr FoldJsonSeralizeAfterParse(const TExprNode::TPtr& node) { return FoldSeralizeAfterParse(node, "Json2.Parse", "Json2.Serialize"); } -TExprNode::TPtr BuildJsonParse(const TExprNode::TPtr& jsonExpr, TExprContext& ctx) { - auto jsonPos = jsonExpr->Pos(); - - auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - ctx.MakeType<TDataExprType>(EDataSlot::Json), - }); - - auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - argumentsType, - ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), - ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) - }); - - auto parse = Build<TCoUdf>(ctx, jsonPos) - .MethodName() - .Build("Json2.Parse") - .RunConfigValue<TCoVoid>() - .Build() - .UserType(ExpandType(jsonPos, *udfArgumentsType, ctx)) - .Done().Ptr(); - - return Build<TCoApply>(ctx, jsonPos) - .Callable(parse) - .FreeArgs() - .Add(jsonExpr) - .Build() - .Done().Ptr(); -} - -TExprNode::TPtr GetJsonDocumentOrParseJson(const TExprNode::TPtr& jsonExpr, TExprContext& ctx, EDataSlot& argumentDataSlot) { - const TTypeAnnotationNode* type = jsonExpr->GetTypeAnn(); - if (type->GetKind() == ETypeAnnotationKind::Optional) { - type = type->Cast<TOptionalExprType>()->GetItemType(); - } - argumentDataSlot = type->Cast<TDataExprType>()->GetSlot(); - - // If jsonExpr has JsonDocument type, there is no need to parse it - if (argumentDataSlot == EDataSlot::JsonDocument) { - return jsonExpr; - } - - // Otherwise jsonExpr has Json type and we need to wrap it in Json2::Parse - return BuildJsonParse(jsonExpr, ctx); -} - -TExprNode::TPtr GetJsonDocumentOrParseJson(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx, EDataSlot& argumentDataSlot) { - return GetJsonDocumentOrParseJson(jsonExpr.Json().Ptr(), ctx, argumentDataSlot); -} - -TExprNode::TPtr BuildJsonSerialize(const TExprNode::TPtr& resourceExpr, TExprContext& ctx) { - auto resourcePos = resourceExpr->Pos(); - - auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")), - }); - - auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - argumentsType, - ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), - ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) - }); - - auto parse = Build<TCoUdf>(ctx, resourcePos) - .MethodName() - .Build("Json2.Serialize") - .RunConfigValue<TCoVoid>() - .Build() - .UserType(ExpandType(resourcePos, *udfArgumentsType, ctx)) - .Done().Ptr(); - - return Build<TCoApply>(ctx, resourcePos) - .Callable(parse) - .FreeArgs() - .Add(resourceExpr) - .Build() - .Done().Ptr(); -} - -TExprNode::TPtr BuildJsonCompilePath(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx) { - auto jsonPathPos = jsonExpr.JsonPath().Pos(); - - auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - ctx.MakeType<TDataExprType>(EDataSlot::Utf8) - }); - - auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - argumentsType, - ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), - ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) - }); - - auto compilePath = Build<TCoUdf>(ctx, jsonPathPos) - .MethodName() - .Build("Json2.CompilePath") - .RunConfigValue<TCoVoid>() - .Build() - .UserType(ExpandType(jsonPathPos, *udfArgumentsType, ctx)) - .Done().Ptr(); - - return Build<TCoApply>(ctx, jsonPathPos) - .Callable(compilePath) - .FreeArgs() - .Add(jsonExpr.JsonPath()) - .Build() - .Done().Ptr(); -} - template<bool Ordered> TExprNode::TPtr CanonizeMultiMap(const TExprNode::TPtr& node, TExprContext& ctx) { if constexpr (Ordered) { @@ -5576,560 +5469,6 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { return node; }; - map["JsonValue"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) { - /* - Here we rewrite expression - JSON_VALUE( - <json>, <jsonPath> - [PASSING <variableExpr1> AS <variableName1>, ...] - [RETURNING <resultType>] - [(NULL | DEFAULT <onEmptyExpr>) ON EMPTY] - [(NULL | DEFAULT <onErrorExpr>) ON ERROR] - ) - Generated depends on the <resultType> specified in RETURNING section: - 1. No RETURNING section - Default returning type of JsonValue is Utf8 and it must convert - result of JsonPath expression into Utf8 string. - Json2::SqlValueConvertToUtf8 is used - 2. <resultType> is a numeric type (Int16, Uint16, Float, etc.) - Json2::SqlValueNumber is used with additional CAST to corresponding type - 3. <resultType> is a date type (Date, Datetime, Timestamp) - Json2::SqlValueInt64 is used with additional CAST to corresponding type - 4. <resultType> is Bool - Json2::SqlValueBool is used - 5. <resultType> is String - Json2::SqlValueUtf8 is used with additional CAST to String - 6. <resultType> is Utf8 - Json2::SqlValueUtf8 is used - Returning type of all Json2::SqlValue* functions is Variant<Tuple<Uint8, String?>, <resultType>?>: - 1. If variant holds first type, either error happened or the result is empty. - If first tuple element is 0, result is empty. - If first tuple element is 1, error happened. - Second tuple element contains message that can be displayed to the user. - 2. If variant hold second type, execution was successful and it is a result. - We process result of Json2::SqlValue* function by using Visit callable with lambdas handling each type. - Note that in some cases we need to CAST result of Json2::SqlValue* and it can fail. So: - 1. If the result of Json2::SqlValue* is NULL, we return Nothing(<resultType>) - 2. Otherwise we check the result of SafeCast callable. If it is NULL, cast has failed and it is an error. - If it holds some value, we return it to the user. - If no CAST is needed, we just return the result of Json2::SqlValue*. - What is more, <onEmptyExpr> and <onErrorExpr> must be casted to <resultType> and this CAST can fail too. - ANSI SQL specification is unclear about what to do with this situation. If we failed to cast <onEmptyExpr> to - target type, we return <onErrorExpr>. If we failed to cast <onErrorExpr> to target type, we throw an exception. - - I know all this sounds very clumsy and a lot of logic to handle in s-expressions. If you have a better idea - of a way to handle all this ***, please write to laplab@. - */ - TCoJsonValue jsonValue(node); - - // <json expr> or Json2::Parse(<json expr>) - EDataSlot jsonDataSlot; - TExprNode::TPtr jsonExpr = GetJsonDocumentOrParseJson(jsonValue, ctx, jsonDataSlot); - - // Json2::CompilePath(<jsonPath>) - TExprNode::TPtr compilePathExpr = BuildJsonCompilePath(jsonValue, ctx); - - // Json2::SqlValue...(<parsedJson>, <compiledJsonPath>) - TExprNode::TPtr sqlValueExpr; - const auto returnTypeAnn = node->GetTypeAnn()->Cast<TOptionalExprType>(); - const auto unwrappedSlot = returnTypeAnn->GetItemType()->Cast<TDataExprType>()->GetSlot(); - bool needCast = false; - const auto jsonValuePos = jsonValue.Pos(); - { - TString sqlValueUdfName; - if (IsDataTypeNumeric(unwrappedSlot)) { - sqlValueUdfName = "SqlValueNumber"; - needCast = true; - } else if (IsDataTypeDate(unwrappedSlot)) { - sqlValueUdfName = "SqlValueInt64"; - needCast = true; - } else if (unwrappedSlot == EDataSlot::Utf8 || unwrappedSlot == EDataSlot::String) { - if (jsonValue.ReturningType()) { - sqlValueUdfName = "SqlValueUtf8"; - } else { - sqlValueUdfName = "SqlValueConvertToUtf8"; - } - needCast = unwrappedSlot == EDataSlot::String; - } else if (unwrappedSlot == EDataSlot::Bool) { - sqlValueUdfName = "SqlValueBool"; - } else { - YQL_ENSURE(false, "Unsupported type"); - } - - const TTypeAnnotationNode* inputType = nullptr; - if (jsonDataSlot == EDataSlot::JsonDocument) { - inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(EDataSlot::JsonDocument)); - sqlValueUdfName = "JsonDocument" + sqlValueUdfName; - } else { - inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")); - } - sqlValueUdfName = "Json2." + sqlValueUdfName; - - TTypeAnnotationNode::TListType arguments = { - inputType, - ctx.MakeType<TResourceExprType>("JsonPath") - }; - - auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - ctx.MakeType<TTupleExprType>(arguments), - ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), - ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) - }); - - auto sqlValue = Build<TCoUdf>(ctx, jsonValuePos) - .MethodName() - .Build(sqlValueUdfName) - .RunConfigValue<TCoVoid>() - .Build() - .UserType(ExpandType(jsonValuePos, *udfArgumentsType, ctx)) - .Done().Ptr(); - - sqlValueExpr = Build<TCoApply>(ctx, jsonValuePos) - .Callable(sqlValue) - .FreeArgs() - .Add(jsonExpr) - .Add(compilePathExpr) - .Add(jsonValue.Variables()) - .Build() - .Done().Ptr(); - } - - auto makeCastOrValue = [&](TPositionHandle pos, const TExprNode::TPtr& source, const TExprNode::TPtr& onCastFail) { - /* - if Exists($source) - then - return IfPresent( - CAST($source as <resultType>), - ($x) -> { return Just($x); }, - $onCastFail - ) - else - return Nothing(<resultType>) - */ - TExprNode::TPtr returnTypeNode = ExpandType(pos, *returnTypeAnn, ctx); - return Build<TCoIf>(ctx, pos) - .Predicate<TCoExists>() - .Optional(source) - .Build() - .ThenValue<TCoIfPresent>() - .Optional<TCoSafeCast>() - .Value(source) - .Type(returnTypeNode) - .Build() - .PresentHandler<TCoLambda>() - .Args({"unwrappedValue"}) - .Body<TCoJust>() - .Input("unwrappedValue") - .Build() - .Build() - .MissingValue(onCastFail) - .Build() - .ElseValue<TCoNothing>() - .OptionalType(returnTypeNode) - .Build() - .Done().Ptr(); - }; - - auto makeThrow = [&](TPositionHandle pos, const TExprNode::TPtr& message) { - return Build<TCoEnsure>(ctx, pos) - .Value<TCoNothing>() - .OptionalType(ExpandType(pos, *returnTypeAnn, ctx)) - .Build() - .Predicate<TCoBool>() - .Literal() - .Build("false") - .Build() - .Message(message) - .Done().Ptr(); - }; - - auto makeHandler = [&](EJsonValueHandlerMode mode, const TExprNode::TPtr& node, const TExprNode::TPtr& errorMessage, const TExprNode::TPtr& onCastFail) -> TExprNode::TPtr { - const auto pos = node->Pos(); - if (mode == EJsonValueHandlerMode::Error) { - return makeThrow(pos, errorMessage); - } - - // Convert NULL to Nothing(<resultType>) - if (IsNull(*node)) { - return Build<TCoNothing>(ctx, pos) - .OptionalType(ExpandType(pos, *returnTypeAnn, ctx)) - .Done().Ptr(); - } - - // If type is not Optional, wrap expression in Just call - TExprNode::TPtr result = node; - const auto typeAnn = node->GetTypeAnn(); - if (typeAnn->GetKind() != ETypeAnnotationKind::Optional) { - result = Build<TCoJust>(ctx, pos) - .Input(result) - .Done().Ptr(); - } - - // Perform CAST to <resultType> or return onCastFail - return makeCastOrValue(pos, result, onCastFail); - }; - - const auto onEmptyMode = FromString<EJsonValueHandlerMode>(jsonValue.OnEmptyMode().Ref().Content()); - const auto onErrorMode = FromString<EJsonValueHandlerMode>(jsonValue.OnErrorMode().Ref().Content()); - auto makeOnErrorHandler = [&](const TExprNode::TPtr& errorMessage) { - const auto onError = jsonValue.OnError(); - const auto throwCastError = makeThrow( - onError.Pos(), - Build<TCoString>(ctx, onError.Pos()) - .Literal() - .Build(TStringBuilder() << "Failed to cast default value from ON ERROR clause to target type " << FormatType(returnTypeAnn)) - .Done().Ptr() - ); - - return makeHandler(onErrorMode, onError.Ptr(), errorMessage, throwCastError); - }; - auto makeOnEmptyHandler = [&](const TExprNode::TPtr& errorMessage) { - const auto onEmptyDefaultCastError = Build<TCoString>(ctx, jsonValue.OnEmpty().Pos()) - .Literal() - .Build(TStringBuilder() << "Failed to cast default value from ON EMPTY clause to target type " << FormatType(returnTypeAnn)) - .Done().Ptr(); - return makeHandler(onEmptyMode, jsonValue.OnEmpty().Ptr(), errorMessage, makeOnErrorHandler(onEmptyDefaultCastError)); - }; - - /* - Lambda for handling first type of variant - - ($errorTuple) -> { - if $errorTuple[0] == 0 - then - return onEmptyHandler - else - return onErrorHandler - } - */ - auto errorTupleArgument = ctx.NewArgument(jsonValuePos, "errorTuple"); - auto sqlValueMessage = Build<TCoNth>(ctx, jsonValuePos) - .Tuple(errorTupleArgument) - .Index() - .Build("1") - .Done().Ptr(); - const auto errorLambda = Build<TCoLambda>(ctx, jsonValuePos) - .Args(TExprNode::TListType{errorTupleArgument}) - .Body<TCoIf>() - .Predicate<TCoCmpEqual>() - .Left<TCoNth>() - .Tuple(errorTupleArgument) - .Index() - .Build("0") - .Build() - .Right<TCoUint8>() - .Literal() - .Build("0") - .Build() - .Build() - .ThenValue(makeOnEmptyHandler(sqlValueMessage)) - .ElseValue(makeOnErrorHandler(sqlValueMessage)) - .Build() - .Done().Ptr(); - - // Lambda for handling second type of variant - TExprNode::TPtr sqlValueResultLambda; - if (needCast) { - const auto errorMessage = Build<TCoString>(ctx, jsonValuePos) - .Literal() - .Build(TStringBuilder() << "Failed to cast extracted JSON value to target type " << FormatType(returnTypeAnn)) - .Done().Ptr(); - const auto inputArgument = ctx.NewArgument(jsonValuePos, "sqlValueResult"); - sqlValueResultLambda = Build<TCoLambda>(ctx, jsonValuePos) - .Args(TExprNode::TListType{inputArgument}) - .Body(makeCastOrValue(jsonValuePos, inputArgument, makeOnErrorHandler(errorMessage))) - .Done().Ptr(); - } else { - /* - ($sqlValueResult) -> { - return $sqlValueResult; - } - */ - sqlValueResultLambda = Build<TCoLambda>(ctx, jsonValuePos) - .Args({"sqlValueResult"}) - .Body("sqlValueResult") - .Done().Ptr(); - } - - // Visit call to get the result - const auto visitResult = Build<TCoVisit>(ctx, jsonValuePos) - .Input(sqlValueExpr) - .FreeArgs() - .Add<TCoAtom>() - .Build("0") - .Add(errorLambda) - .Add<TCoAtom>() - .Build("1") - .Add(sqlValueResultLambda) - .Build() - .Done().Ptr(); - - return visitResult; - }; - - map["JsonExists"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) { - /* - Here we rewrite expression - JSON_EXISTS(<json expr>, <jsonpath> [PASSING <variableExpr1> AS <variableName1>, ...] {TRUE | FALSE | UNKNOWN} ON ERROR) - into - Json2::SqlExists(Json2::Parse(<json expr>), Json2::CompilePath(<jsonpath>), <dict with variables>, <on error value>) - and its sibling - JSON_EXISTS(<json expr>, <jsonpath> [PASSING <variableExpr1> AS <variableName1>, ...] ERROR ON ERROR) - into - Json2::SqlTryExists(Json2::Parse(<json expr>), <dict with variables>, Json2::CompilePath(<jsonpath>)) - */ - TCoJsonExists jsonExists(node); - - // <json expr> or Json2::Parse(<json expr>) - EDataSlot jsonDataSlot; - TExprNode::TPtr parseJsonExpr = GetJsonDocumentOrParseJson(jsonExists, ctx, jsonDataSlot); - - // Json2::CompilePath(<jsonPath>) - TExprNode::TPtr compilePathExpr = BuildJsonCompilePath(jsonExists, ctx); - - // Json2::SqlExists(<json>, <compiled jsonpath>, [<default value>]) - // or - // Json2::SqlTryExists(<json>, <compiled jsonpath>) - const bool needThrow = !jsonExists.OnError().IsValid(); - - TString sqlExistsUdfName = "SqlExists"; - if (needThrow) { - sqlExistsUdfName = "SqlTryExists"; - } - - const TTypeAnnotationNode* inputType = nullptr; - if (jsonDataSlot == EDataSlot::JsonDocument) { - inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(EDataSlot::JsonDocument)); - sqlExistsUdfName = "JsonDocument" + sqlExistsUdfName; - } else { - inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")); - } - sqlExistsUdfName = "Json2." + sqlExistsUdfName; - - TTypeAnnotationNode::TListType arguments = { - inputType, - ctx.MakeType<TResourceExprType>("JsonPath") - }; - - if (!needThrow) { - const auto boolType = ctx.MakeType<TDataExprType>(EDataSlot::Bool); - const auto optionalBoolType = ctx.MakeType<TOptionalExprType>(boolType); - arguments.push_back(optionalBoolType); - } - - auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - ctx.MakeType<TTupleExprType>(arguments), - ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), - ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) - }); - - const auto jsonExistsPos = jsonExists.Pos(); - auto sqlExists = Build<TCoUdf>(ctx, jsonExistsPos) - .MethodName() - .Build(sqlExistsUdfName) - .RunConfigValue<TCoVoid>() - .Build() - .UserType(ExpandType(jsonExistsPos, *udfArgumentsType, ctx)) - .Done().Ptr(); - - if (needThrow) { - return Build<TCoApply>(ctx, jsonExistsPos) - .Callable(sqlExists) - .FreeArgs() - .Add(parseJsonExpr) - .Add(compilePathExpr) - .Add(jsonExists.Variables()) - .Build() - .Done().Ptr(); - } - - return Build<TCoApply>(ctx, jsonExistsPos) - .Callable(sqlExists) - .FreeArgs() - .Add(parseJsonExpr) - .Add(compilePathExpr) - .Add(jsonExists.Variables()) - .Add(jsonExists.OnError().Cast()) - .Build() - .Done().Ptr(); - }; - - map["JsonQuery"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { - /* - Here we rewrite expression - JSON_QUERY( - <json expr>, - <jsonpath> - [PASSING <variableExpr1> AS <variableName1>, ...] - [{WITHOUT [ARRAY] | WITH [CONDITIONAL | UNCONDITIONAL] [ARRAY]} WRAPPER] - [{ERROR | NULL | EMPTY ARRAY | EMPTY OBJECT} ON EMPTY] - [{ERROR | NULL | EMPTY ARRAY | EMPTY OBJECT} ON ERROR] - ) - into something like - Json2::SqlQuery...( - Json2::Parse(<json expr>), - Json2::CompilePath(<jsonpath>), - <dict with variables>, - <do we have ERROR ON EMPTY?>, - <default value depending on {NULL | EMPTY ARRAY | EMPTY OBJECT} ON EMPTY>, - <do we have ERROR ON ERROR?>, - <default value depending on {NULL | EMPTY ARRAY | EMPTY OBJECT} ON ERROR> - ) - Exact UDF name is choosen depending on wrap config: - - WITHOUT [ARRAY] WRAPPER -> Json2::SqlQuery - - WITH [UNCONDITIONAL] [ARRAY] WRAPPER -> Json2::SqlQueryWrap - - WITH CONDITIONAL [ARRAY] WRAPPER -> Json2::SqlQueryConditionalWrap - */ - TCoJsonQuery jsonQuery(node); - - // <json expr> or Json2::Parse(<json expr>) - EDataSlot jsonDataSlot; - TExprNode::TPtr parseJsonExpr = GetJsonDocumentOrParseJson(jsonQuery, ctx, jsonDataSlot); - - // Json2::CompilePath(<jsonPath>) - TExprNode::TPtr compilePathExpr = BuildJsonCompilePath(jsonQuery, ctx); - - // Json2::SqlQuery...(<json expr>, <jsonpath>, ...) - const auto wrapMode = FromString<EJsonQueryWrap>(jsonQuery.WrapMode().Ref().Content()); - TString sqlQueryUdfName = "SqlQuery"; - switch (wrapMode) { - case EJsonQueryWrap::NoWrap: - sqlQueryUdfName = "SqlQuery"; - break; - case EJsonQueryWrap::Wrap: - sqlQueryUdfName = "SqlQueryWrap"; - break; - case EJsonQueryWrap::ConditionalWrap: - sqlQueryUdfName = "SqlQueryConditionalWrap"; - break; - } - - const TTypeAnnotationNode* inputType = nullptr; - if (jsonDataSlot == EDataSlot::JsonDocument) { - inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(EDataSlot::JsonDocument)); - sqlQueryUdfName = "JsonDocument" + sqlQueryUdfName; - } else { - inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")); - } - inputType = ctx.MakeType<TOptionalExprType>(inputType); - sqlQueryUdfName = "Json2." + sqlQueryUdfName; - - const auto optionalJsonResourceType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")); - TTypeAnnotationNode::TListType arguments{ - inputType, - ctx.MakeType<TResourceExprType>("JsonPath"), - ctx.MakeType<TDataExprType>(EDataSlot::Bool), - optionalJsonResourceType, - ctx.MakeType<TDataExprType>(EDataSlot::Bool), - optionalJsonResourceType, - }; - - auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - ctx.MakeType<TTupleExprType>(arguments), - ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), - ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) - }); - - auto buildShouldThrow = [&](EJsonQueryHandler handler, TPositionHandle pos) { - return Build<TCoBool>(ctx, pos) - .Literal() - .Build(handler == EJsonQueryHandler::Error ? "true" : "false") - .Done().Ptr(); - }; - - auto buildHandler = [&](EJsonQueryHandler handler, TPositionHandle pos) { - switch (handler) { - case EJsonQueryHandler::Error: - case EJsonQueryHandler::Null: { - // Nothing(Resource<JsonNode>) - return Build<TCoNothing>(ctx, pos) - .OptionalType(ExpandType(pos, *optionalJsonResourceType, ctx)) - .Done().Ptr(); - } - case EJsonQueryHandler::EmptyArray: { - auto value = Build<TCoJson>(ctx, pos) - .Literal() - .Build("[]") - .Done().Ptr(); - return BuildJsonParse(value, ctx); - } - case EJsonQueryHandler::EmptyObject: { - auto value = Build<TCoJson>(ctx, pos) - .Literal() - .Build("{}") - .Done().Ptr(); - return BuildJsonParse(value, ctx); - } - } - }; - - const auto jsonQueryPos = jsonQuery.Pos(); - auto sqlQuery = Build<TCoUdf>(ctx, jsonQueryPos) - .MethodName() - .Build(sqlQueryUdfName) - .RunConfigValue<TCoVoid>() - .Build() - .UserType(ExpandType(jsonQueryPos, *udfArgumentsType, ctx)) - .Done().Ptr(); - - const auto onEmpty = FromString<EJsonQueryHandler>(jsonQuery.OnEmpty().Ref().Content()); - const auto onError = FromString<EJsonQueryHandler>(jsonQuery.OnError().Ref().Content()); - const auto onEmptyPos = jsonQuery.OnEmpty().Pos(); - const auto onErrorPos = jsonQuery.OnError().Pos(); - - auto sqlQueryApply = Build<TCoApply>(ctx, jsonQueryPos) - .Callable(sqlQuery) - .FreeArgs() - .Add(parseJsonExpr) - .Add(compilePathExpr) - .Add(jsonQuery.Variables()) - .Add(buildShouldThrow(onEmpty, onEmptyPos)) - .Add(buildHandler(onEmpty, onEmptyPos)) - .Add(buildShouldThrow(onError, onErrorPos)) - .Add(buildHandler(onError, onErrorPos)) - .Build() - .Done().Ptr(); - - // In this case we need to serialize Resource<JsonNode> to Json type - if (!optCtx.Types->JsonQueryReturnsJsonDocument) { - return BuildJsonSerialize(sqlQueryApply, ctx); - } - - // Now we need to serialize Resource<JsonNode> from sqlQueryApply to JsonDocument - { - auto resourcePos = sqlQueryApply->Pos(); - - auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")), - }); - - auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - argumentsType, - ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), - ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) - }); - - TStringBuf serializeUdfName = "Json2.Serialize"; - if (optCtx.Types->JsonQueryReturnsJsonDocument) { - serializeUdfName = "Json2.SerializeToJsonDocument"; - } - auto parse = Build<TCoUdf>(ctx, resourcePos) - .MethodName() - .Build(serializeUdfName) - .RunConfigValue<TCoVoid>() - .Build() - .UserType(ExpandType(resourcePos, *udfArgumentsType, ctx)) - .Done().Ptr(); - - return Build<TCoApply>(ctx, resourcePos) - .Callable(parse) - .FreeArgs() - .Add(sqlQueryApply) - .Build() - .Done().Ptr(); - } - }; - map["JsonVariables"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) { /* Here we rewrite expression @@ -6803,8 +6142,4 @@ TExprNode::TPtr TryConvertSqlInPredicatesToJoins(const TCoFlatMapBase& flatMap, return {}; } -TExprNode::TPtr BuildJsonParse(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx) { - return BuildJsonParse(jsonExpr.Json().Ptr(), ctx); -} - } // namespace NYql diff --git a/ydb/library/yql/core/peephole_opt/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/core/peephole_opt/CMakeLists.darwin-x86_64.txt index fa54b0a08b3..294ed16d9b6 100644 --- a/ydb/library/yql/core/peephole_opt/CMakeLists.darwin-x86_64.txt +++ b/ydb/library/yql/core/peephole_opt/CMakeLists.darwin-x86_64.txt @@ -19,5 +19,6 @@ target_link_libraries(yql-core-peephole_opt PUBLIC yql-core-type_ann ) target_sources(yql-core-peephole_opt PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp ) diff --git a/ydb/library/yql/core/peephole_opt/CMakeLists.linux-aarch64.txt b/ydb/library/yql/core/peephole_opt/CMakeLists.linux-aarch64.txt index 1bbb34cbc46..36a6bbe0897 100644 --- a/ydb/library/yql/core/peephole_opt/CMakeLists.linux-aarch64.txt +++ b/ydb/library/yql/core/peephole_opt/CMakeLists.linux-aarch64.txt @@ -20,5 +20,6 @@ target_link_libraries(yql-core-peephole_opt PUBLIC yql-core-type_ann ) target_sources(yql-core-peephole_opt PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp ) diff --git a/ydb/library/yql/core/peephole_opt/CMakeLists.linux-x86_64.txt b/ydb/library/yql/core/peephole_opt/CMakeLists.linux-x86_64.txt index 1bbb34cbc46..36a6bbe0897 100644 --- a/ydb/library/yql/core/peephole_opt/CMakeLists.linux-x86_64.txt +++ b/ydb/library/yql/core/peephole_opt/CMakeLists.linux-x86_64.txt @@ -20,5 +20,6 @@ target_link_libraries(yql-core-peephole_opt PUBLIC yql-core-type_ann ) target_sources(yql-core-peephole_opt PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp ) diff --git a/ydb/library/yql/core/peephole_opt/CMakeLists.windows-x86_64.txt b/ydb/library/yql/core/peephole_opt/CMakeLists.windows-x86_64.txt index fa54b0a08b3..294ed16d9b6 100644 --- a/ydb/library/yql/core/peephole_opt/CMakeLists.windows-x86_64.txt +++ b/ydb/library/yql/core/peephole_opt/CMakeLists.windows-x86_64.txt @@ -19,5 +19,6 @@ target_link_libraries(yql-core-peephole_opt PUBLIC yql-core-type_ann ) target_sources(yql-core-peephole_opt PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp ) diff --git a/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.cpp b/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.cpp new file mode 100644 index 00000000000..b778624b9da --- /dev/null +++ b/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.cpp @@ -0,0 +1,676 @@ +#include "yql_opt_json_peephole_physical.h" + +#include <ydb/library/yql/core/yql_atom_enums.h> +#include <ydb/library/yql/core/yql_expr_type_annotation.h> + +namespace NYql { + +namespace { + +using namespace NNodes; + +TExprNode::TPtr BuildJsonParse(const TExprNode::TPtr& jsonExpr, TExprContext& ctx) { + auto jsonPos = jsonExpr->Pos(); + + auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + ctx.MakeType<TDataExprType>(EDataSlot::Json), + }); + + auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + argumentsType, + ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), + ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) + }); + + auto parse = Build<TCoUdf>(ctx, jsonPos) + .MethodName() + .Build("Json2.Parse") + .RunConfigValue<TCoVoid>() + .Build() + .UserType(ExpandType(jsonPos, *udfArgumentsType, ctx)) + .Done().Ptr(); + + return Build<TCoApply>(ctx, jsonPos) + .Callable(parse) + .FreeArgs() + .Add(jsonExpr) + .Build() + .Done().Ptr(); +} + +TExprNode::TPtr GetJsonDocumentOrParseJson(const TExprNode::TPtr& jsonExpr, TExprContext& ctx, EDataSlot& argumentDataSlot) { + const TTypeAnnotationNode* type = jsonExpr->GetTypeAnn(); + if (type->GetKind() == ETypeAnnotationKind::Optional) { + type = type->Cast<TOptionalExprType>()->GetItemType(); + } + argumentDataSlot = type->Cast<TDataExprType>()->GetSlot(); + + // If jsonExpr has JsonDocument type, there is no need to parse it + if (argumentDataSlot == EDataSlot::JsonDocument) { + return jsonExpr; + } + + // Otherwise jsonExpr has Json type and we need to wrap it in Json2::Parse + return BuildJsonParse(jsonExpr, ctx); +} + +TExprNode::TPtr GetJsonDocumentOrParseJson(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx, EDataSlot& argumentDataSlot) { + return GetJsonDocumentOrParseJson(jsonExpr.Json().Ptr(), ctx, argumentDataSlot); +} + +TExprNode::TPtr BuildJsonCompilePath(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx) { + auto jsonPathPos = jsonExpr.JsonPath().Pos(); + + auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + ctx.MakeType<TDataExprType>(EDataSlot::Utf8) + }); + + auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + argumentsType, + ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), + ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) + }); + + auto compilePath = Build<TCoUdf>(ctx, jsonPathPos) + .MethodName() + .Build("Json2.CompilePath") + .RunConfigValue<TCoVoid>() + .Build() + .UserType(ExpandType(jsonPathPos, *udfArgumentsType, ctx)) + .Done().Ptr(); + + return Build<TCoApply>(ctx, jsonPathPos) + .Callable(compilePath) + .FreeArgs() + .Add(jsonExpr.JsonPath()) + .Build() + .Done().Ptr(); +} + +TExprNode::TPtr BuildJsonSerialize(const TExprNode::TPtr& resourceExpr, TExprContext& ctx) { + auto resourcePos = resourceExpr->Pos(); + + auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")), + }); + + auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + argumentsType, + ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), + ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) + }); + + auto parse = Build<TCoUdf>(ctx, resourcePos) + .MethodName() + .Build("Json2.Serialize") + .RunConfigValue<TCoVoid>() + .Build() + .UserType(ExpandType(resourcePos, *udfArgumentsType, ctx)) + .Done().Ptr(); + + return Build<TCoApply>(ctx, resourcePos) + .Callable(parse) + .FreeArgs() + .Add(resourceExpr) + .Build() + .Done().Ptr(); +} + +} // namespace + + +TExprNode::TPtr ExpandJsonValue(const TExprNode::TPtr& node, TExprContext& ctx) { + /* + Here we rewrite expression + JSON_VALUE( + <json>, <jsonPath> + [PASSING <variableExpr1> AS <variableName1>, ...] + [RETURNING <resultType>] + [(NULL | DEFAULT <onEmptyExpr>) ON EMPTY] + [(NULL | DEFAULT <onErrorExpr>) ON ERROR] + ) + Generated depends on the <resultType> specified in RETURNING section: + 1. No RETURNING section + Default returning type of JsonValue is Utf8 and it must convert + result of JsonPath expression into Utf8 string. + Json2::SqlValueConvertToUtf8 is used + 2. <resultType> is a numeric type (Int16, Uint16, Float, etc.) + Json2::SqlValueNumber is used with additional CAST to corresponding type + 3. <resultType> is a date type (Date, Datetime, Timestamp) + Json2::SqlValueInt64 is used with additional CAST to corresponding type + 4. <resultType> is Bool + Json2::SqlValueBool is used + 5. <resultType> is String + Json2::SqlValueUtf8 is used with additional CAST to String + 6. <resultType> is Utf8 + Json2::SqlValueUtf8 is used + Returning type of all Json2::SqlValue* functions is Variant<Tuple<Uint8, String?>, <resultType>?>: + 1. If variant holds first type, either error happened or the result is empty. + If first tuple element is 0, result is empty. + If first tuple element is 1, error happened. + Second tuple element contains message that can be displayed to the user. + 2. If variant hold second type, execution was successful and it is a result. + We process result of Json2::SqlValue* function by using Visit callable with lambdas handling each type. + Note that in some cases we need to CAST result of Json2::SqlValue* and it can fail. So: + 1. If the result of Json2::SqlValue* is NULL, we return Nothing(<resultType>) + 2. Otherwise we check the result of SafeCast callable. If it is NULL, cast has failed and it is an error. + If it holds some value, we return it to the user. + If no CAST is needed, we just return the result of Json2::SqlValue*. + What is more, <onEmptyExpr> and <onErrorExpr> must be casted to <resultType> and this CAST can fail too. + ANSI SQL specification is unclear about what to do with this situation. If we failed to cast <onEmptyExpr> to + target type, we return <onErrorExpr>. If we failed to cast <onErrorExpr> to target type, we throw an exception. + + I know all this sounds very clumsy and a lot of logic to handle in s-expressions. If you have a better idea + of a way to handle all this ***, please write to laplab@. + */ + TCoJsonValue jsonValue(node); + + // <json expr> or Json2::Parse(<json expr>) + EDataSlot jsonDataSlot; + TExprNode::TPtr jsonExpr = GetJsonDocumentOrParseJson(jsonValue, ctx, jsonDataSlot); + + // Json2::CompilePath(<jsonPath>) + TExprNode::TPtr compilePathExpr = BuildJsonCompilePath(jsonValue, ctx); + + // Json2::SqlValue...(<parsedJson>, <compiledJsonPath>) + TExprNode::TPtr sqlValueExpr; + const auto returnTypeAnn = node->GetTypeAnn()->Cast<TOptionalExprType>(); + const auto unwrappedSlot = returnTypeAnn->GetItemType()->Cast<TDataExprType>()->GetSlot(); + bool needCast = false; + const auto jsonValuePos = jsonValue.Pos(); + { + TString sqlValueUdfName; + if (IsDataTypeNumeric(unwrappedSlot)) { + sqlValueUdfName = "SqlValueNumber"; + needCast = true; + } else if (IsDataTypeDate(unwrappedSlot)) { + sqlValueUdfName = "SqlValueInt64"; + needCast = true; + } else if (unwrappedSlot == EDataSlot::Utf8 || unwrappedSlot == EDataSlot::String) { + if (jsonValue.ReturningType()) { + sqlValueUdfName = "SqlValueUtf8"; + } else { + sqlValueUdfName = "SqlValueConvertToUtf8"; + } + needCast = unwrappedSlot == EDataSlot::String; + } else if (unwrappedSlot == EDataSlot::Bool) { + sqlValueUdfName = "SqlValueBool"; + } else { + YQL_ENSURE(false, "Unsupported type"); + } + + const TTypeAnnotationNode* inputType = nullptr; + if (jsonDataSlot == EDataSlot::JsonDocument) { + inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(EDataSlot::JsonDocument)); + sqlValueUdfName = "JsonDocument" + sqlValueUdfName; + } else { + inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")); + } + sqlValueUdfName = "Json2." + sqlValueUdfName; + + TTypeAnnotationNode::TListType arguments = { + inputType, + ctx.MakeType<TResourceExprType>("JsonPath") + }; + + auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + ctx.MakeType<TTupleExprType>(arguments), + ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), + ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) + }); + + auto sqlValue = Build<TCoUdf>(ctx, jsonValuePos) + .MethodName() + .Build(sqlValueUdfName) + .RunConfigValue<TCoVoid>() + .Build() + .UserType(ExpandType(jsonValuePos, *udfArgumentsType, ctx)) + .Done().Ptr(); + + sqlValueExpr = Build<TCoApply>(ctx, jsonValuePos) + .Callable(sqlValue) + .FreeArgs() + .Add(jsonExpr) + .Add(compilePathExpr) + .Add(jsonValue.Variables()) + .Build() + .Done().Ptr(); + } + + auto makeCastOrValue = [&](TPositionHandle pos, const TExprNode::TPtr& source, const TExprNode::TPtr& onCastFail) { + /* + if Exists($source) + then + return IfPresent( + CAST($source as <resultType>), + ($x) -> { return Just($x); }, + $onCastFail + ) + else + return Nothing(<resultType>) + */ + TExprNode::TPtr returnTypeNode = ExpandType(pos, *returnTypeAnn, ctx); + return Build<TCoIf>(ctx, pos) + .Predicate<TCoExists>() + .Optional(source) + .Build() + .ThenValue<TCoIfPresent>() + .Optional<TCoSafeCast>() + .Value(source) + .Type(returnTypeNode) + .Build() + .PresentHandler<TCoLambda>() + .Args({"unwrappedValue"}) + .Body<TCoJust>() + .Input("unwrappedValue") + .Build() + .Build() + .MissingValue(onCastFail) + .Build() + .ElseValue<TCoNothing>() + .OptionalType(returnTypeNode) + .Build() + .Done().Ptr(); + }; + + auto makeThrow = [&](TPositionHandle pos, const TExprNode::TPtr& message) { + return Build<TCoEnsure>(ctx, pos) + .Value<TCoNothing>() + .OptionalType(ExpandType(pos, *returnTypeAnn, ctx)) + .Build() + .Predicate<TCoBool>() + .Literal() + .Build("false") + .Build() + .Message(message) + .Done().Ptr(); + }; + + auto makeHandler = [&](EJsonValueHandlerMode mode, const TExprNode::TPtr& node, const TExprNode::TPtr& errorMessage, const TExprNode::TPtr& onCastFail) -> TExprNode::TPtr { + const auto pos = node->Pos(); + if (mode == EJsonValueHandlerMode::Error) { + return makeThrow(pos, errorMessage); + } + + // Convert NULL to Nothing(<resultType>) + if (IsNull(*node)) { + return Build<TCoNothing>(ctx, pos) + .OptionalType(ExpandType(pos, *returnTypeAnn, ctx)) + .Done().Ptr(); + } + + // If type is not Optional, wrap expression in Just call + TExprNode::TPtr result = node; + const auto typeAnn = node->GetTypeAnn(); + if (typeAnn->GetKind() != ETypeAnnotationKind::Optional) { + result = Build<TCoJust>(ctx, pos) + .Input(result) + .Done().Ptr(); + } + + // Perform CAST to <resultType> or return onCastFail + return makeCastOrValue(pos, result, onCastFail); + }; + + const auto onEmptyMode = FromString<EJsonValueHandlerMode>(jsonValue.OnEmptyMode().Ref().Content()); + const auto onErrorMode = FromString<EJsonValueHandlerMode>(jsonValue.OnErrorMode().Ref().Content()); + auto makeOnErrorHandler = [&](const TExprNode::TPtr& errorMessage) { + const auto onError = jsonValue.OnError(); + const auto throwCastError = makeThrow( + onError.Pos(), + Build<TCoString>(ctx, onError.Pos()) + .Literal() + .Build(TStringBuilder() << "Failed to cast default value from ON ERROR clause to target type " << FormatType(returnTypeAnn)) + .Done().Ptr() + ); + + return makeHandler(onErrorMode, onError.Ptr(), errorMessage, throwCastError); + }; + auto makeOnEmptyHandler = [&](const TExprNode::TPtr& errorMessage) { + const auto onEmptyDefaultCastError = Build<TCoString>(ctx, jsonValue.OnEmpty().Pos()) + .Literal() + .Build(TStringBuilder() << "Failed to cast default value from ON EMPTY clause to target type " << FormatType(returnTypeAnn)) + .Done().Ptr(); + return makeHandler(onEmptyMode, jsonValue.OnEmpty().Ptr(), errorMessage, makeOnErrorHandler(onEmptyDefaultCastError)); + }; + + /* + Lambda for handling first type of variant + + ($errorTuple) -> { + if $errorTuple[0] == 0 + then + return onEmptyHandler + else + return onErrorHandler + } + */ + auto errorTupleArgument = ctx.NewArgument(jsonValuePos, "errorTuple"); + auto sqlValueMessage = Build<TCoNth>(ctx, jsonValuePos) + .Tuple(errorTupleArgument) + .Index() + .Build("1") + .Done().Ptr(); + const auto errorLambda = Build<TCoLambda>(ctx, jsonValuePos) + .Args(TExprNode::TListType{errorTupleArgument}) + .Body<TCoIf>() + .Predicate<TCoCmpEqual>() + .Left<TCoNth>() + .Tuple(errorTupleArgument) + .Index() + .Build("0") + .Build() + .Right<TCoUint8>() + .Literal() + .Build("0") + .Build() + .Build() + .ThenValue(makeOnEmptyHandler(sqlValueMessage)) + .ElseValue(makeOnErrorHandler(sqlValueMessage)) + .Build() + .Done().Ptr(); + + // Lambda for handling second type of variant + TExprNode::TPtr sqlValueResultLambda; + if (needCast) { + const auto errorMessage = Build<TCoString>(ctx, jsonValuePos) + .Literal() + .Build(TStringBuilder() << "Failed to cast extracted JSON value to target type " << FormatType(returnTypeAnn)) + .Done().Ptr(); + const auto inputArgument = ctx.NewArgument(jsonValuePos, "sqlValueResult"); + sqlValueResultLambda = Build<TCoLambda>(ctx, jsonValuePos) + .Args(TExprNode::TListType{inputArgument}) + .Body(makeCastOrValue(jsonValuePos, inputArgument, makeOnErrorHandler(errorMessage))) + .Done().Ptr(); + } else { + /* + ($sqlValueResult) -> { + return $sqlValueResult; + } + */ + sqlValueResultLambda = Build<TCoLambda>(ctx, jsonValuePos) + .Args({"sqlValueResult"}) + .Body("sqlValueResult") + .Done().Ptr(); + } + + // Visit call to get the result + const auto visitResult = Build<TCoVisit>(ctx, jsonValuePos) + .Input(sqlValueExpr) + .FreeArgs() + .Add<TCoAtom>() + .Build("0") + .Add(errorLambda) + .Add<TCoAtom>() + .Build("1") + .Add(sqlValueResultLambda) + .Build() + .Done().Ptr(); + + return visitResult; +} + +TExprNode::TPtr ExpandJsonExists(const TExprNode::TPtr& node, TExprContext& ctx) { + /* + Here we rewrite expression + JSON_EXISTS(<json expr>, <jsonpath> [PASSING <variableExpr1> AS <variableName1>, ...] {TRUE | FALSE | UNKNOWN} ON ERROR) + into + Json2::SqlExists(Json2::Parse(<json expr>), Json2::CompilePath(<jsonpath>), <dict with variables>, <on error value>) + and its sibling + JSON_EXISTS(<json expr>, <jsonpath> [PASSING <variableExpr1> AS <variableName1>, ...] ERROR ON ERROR) + into + Json2::SqlTryExists(Json2::Parse(<json expr>), <dict with variables>, Json2::CompilePath(<jsonpath>)) + */ + TCoJsonExists jsonExists(node); + + // <json expr> or Json2::Parse(<json expr>) + EDataSlot jsonDataSlot; + TExprNode::TPtr parseJsonExpr = GetJsonDocumentOrParseJson(jsonExists, ctx, jsonDataSlot); + + // Json2::CompilePath(<jsonPath>) + TExprNode::TPtr compilePathExpr = BuildJsonCompilePath(jsonExists, ctx); + + // Json2::SqlExists(<json>, <compiled jsonpath>, [<default value>]) + // or + // Json2::SqlTryExists(<json>, <compiled jsonpath>) + const bool needThrow = !jsonExists.OnError().IsValid(); + + TString sqlExistsUdfName = "SqlExists"; + if (needThrow) { + sqlExistsUdfName = "SqlTryExists"; + } + + const TTypeAnnotationNode* inputType = nullptr; + if (jsonDataSlot == EDataSlot::JsonDocument) { + inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(EDataSlot::JsonDocument)); + sqlExistsUdfName = "JsonDocument" + sqlExistsUdfName; + } else { + inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")); + } + sqlExistsUdfName = "Json2." + sqlExistsUdfName; + + TTypeAnnotationNode::TListType arguments = { + inputType, + ctx.MakeType<TResourceExprType>("JsonPath") + }; + + if (!needThrow) { + const auto boolType = ctx.MakeType<TDataExprType>(EDataSlot::Bool); + const auto optionalBoolType = ctx.MakeType<TOptionalExprType>(boolType); + arguments.push_back(optionalBoolType); + } + + auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + ctx.MakeType<TTupleExprType>(arguments), + ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), + ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) + }); + + const auto jsonExistsPos = jsonExists.Pos(); + auto sqlExists = Build<TCoUdf>(ctx, jsonExistsPos) + .MethodName() + .Build(sqlExistsUdfName) + .RunConfigValue<TCoVoid>() + .Build() + .UserType(ExpandType(jsonExistsPos, *udfArgumentsType, ctx)) + .Done().Ptr(); + + if (needThrow) { + return Build<TCoApply>(ctx, jsonExistsPos) + .Callable(sqlExists) + .FreeArgs() + .Add(parseJsonExpr) + .Add(compilePathExpr) + .Add(jsonExists.Variables()) + .Build() + .Done().Ptr(); + } + + return Build<TCoApply>(ctx, jsonExistsPos) + .Callable(sqlExists) + .FreeArgs() + .Add(parseJsonExpr) + .Add(compilePathExpr) + .Add(jsonExists.Variables()) + .Add(jsonExists.OnError().Cast()) + .Build() + .Done().Ptr(); +} + +TExprNode::TPtr ExpandJsonQuery(const TExprNode::TPtr& node, TExprContext& ctx, TTypeAnnotationContext& typesCtx) { + /* + Here we rewrite expression + JSON_QUERY( + <json expr>, + <jsonpath> + [PASSING <variableExpr1> AS <variableName1>, ...] + [{WITHOUT [ARRAY] | WITH [CONDITIONAL | UNCONDITIONAL] [ARRAY]} WRAPPER] + [{ERROR | NULL | EMPTY ARRAY | EMPTY OBJECT} ON EMPTY] + [{ERROR | NULL | EMPTY ARRAY | EMPTY OBJECT} ON ERROR] + ) + into something like + Json2::SqlQuery...( + Json2::Parse(<json expr>), + Json2::CompilePath(<jsonpath>), + <dict with variables>, + <do we have ERROR ON EMPTY?>, + <default value depending on {NULL | EMPTY ARRAY | EMPTY OBJECT} ON EMPTY>, + <do we have ERROR ON ERROR?>, + <default value depending on {NULL | EMPTY ARRAY | EMPTY OBJECT} ON ERROR> + ) + Exact UDF name is choosen depending on wrap config: + - WITHOUT [ARRAY] WRAPPER -> Json2::SqlQuery + - WITH [UNCONDITIONAL] [ARRAY] WRAPPER -> Json2::SqlQueryWrap + - WITH CONDITIONAL [ARRAY] WRAPPER -> Json2::SqlQueryConditionalWrap + */ + TCoJsonQuery jsonQuery(node); + + // <json expr> or Json2::Parse(<json expr>) + EDataSlot jsonDataSlot; + TExprNode::TPtr parseJsonExpr = GetJsonDocumentOrParseJson(jsonQuery, ctx, jsonDataSlot); + + // Json2::CompilePath(<jsonPath>) + TExprNode::TPtr compilePathExpr = BuildJsonCompilePath(jsonQuery, ctx); + + // Json2::SqlQuery...(<json expr>, <jsonpath>, ...) + const auto wrapMode = FromString<EJsonQueryWrap>(jsonQuery.WrapMode().Ref().Content()); + TString sqlQueryUdfName = "SqlQuery"; + switch (wrapMode) { + case EJsonQueryWrap::NoWrap: + sqlQueryUdfName = "SqlQuery"; + break; + case EJsonQueryWrap::Wrap: + sqlQueryUdfName = "SqlQueryWrap"; + break; + case EJsonQueryWrap::ConditionalWrap: + sqlQueryUdfName = "SqlQueryConditionalWrap"; + break; + } + + const TTypeAnnotationNode* inputType = nullptr; + if (jsonDataSlot == EDataSlot::JsonDocument) { + inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(EDataSlot::JsonDocument)); + sqlQueryUdfName = "JsonDocument" + sqlQueryUdfName; + } else { + inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")); + } + inputType = ctx.MakeType<TOptionalExprType>(inputType); + sqlQueryUdfName = "Json2." + sqlQueryUdfName; + + const auto optionalJsonResourceType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")); + TTypeAnnotationNode::TListType arguments{ + inputType, + ctx.MakeType<TResourceExprType>("JsonPath"), + ctx.MakeType<TDataExprType>(EDataSlot::Bool), + optionalJsonResourceType, + ctx.MakeType<TDataExprType>(EDataSlot::Bool), + optionalJsonResourceType, + }; + + auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + ctx.MakeType<TTupleExprType>(arguments), + ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), + ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) + }); + + auto buildShouldThrow = [&](EJsonQueryHandler handler, TPositionHandle pos) { + return Build<TCoBool>(ctx, pos) + .Literal() + .Build(handler == EJsonQueryHandler::Error ? "true" : "false") + .Done().Ptr(); + }; + + auto buildHandler = [&](EJsonQueryHandler handler, TPositionHandle pos) { + switch (handler) { + case EJsonQueryHandler::Error: + case EJsonQueryHandler::Null: { + // Nothing(Resource<JsonNode>) + return Build<TCoNothing>(ctx, pos) + .OptionalType(ExpandType(pos, *optionalJsonResourceType, ctx)) + .Done().Ptr(); + } + case EJsonQueryHandler::EmptyArray: { + auto value = Build<TCoJson>(ctx, pos) + .Literal() + .Build("[]") + .Done().Ptr(); + return BuildJsonParse(value, ctx); + } + case EJsonQueryHandler::EmptyObject: { + auto value = Build<TCoJson>(ctx, pos) + .Literal() + .Build("{}") + .Done().Ptr(); + return BuildJsonParse(value, ctx); + } + } + }; + + const auto jsonQueryPos = jsonQuery.Pos(); + auto sqlQuery = Build<TCoUdf>(ctx, jsonQueryPos) + .MethodName() + .Build(sqlQueryUdfName) + .RunConfigValue<TCoVoid>() + .Build() + .UserType(ExpandType(jsonQueryPos, *udfArgumentsType, ctx)) + .Done().Ptr(); + + const auto onEmpty = FromString<EJsonQueryHandler>(jsonQuery.OnEmpty().Ref().Content()); + const auto onError = FromString<EJsonQueryHandler>(jsonQuery.OnError().Ref().Content()); + const auto onEmptyPos = jsonQuery.OnEmpty().Pos(); + const auto onErrorPos = jsonQuery.OnError().Pos(); + + auto sqlQueryApply = Build<TCoApply>(ctx, jsonQueryPos) + .Callable(sqlQuery) + .FreeArgs() + .Add(parseJsonExpr) + .Add(compilePathExpr) + .Add(jsonQuery.Variables()) + .Add(buildShouldThrow(onEmpty, onEmptyPos)) + .Add(buildHandler(onEmpty, onEmptyPos)) + .Add(buildShouldThrow(onError, onErrorPos)) + .Add(buildHandler(onError, onErrorPos)) + .Build() + .Done().Ptr(); + + // In this case we need to serialize Resource<JsonNode> to Json type + if (!typesCtx.JsonQueryReturnsJsonDocument) { + return BuildJsonSerialize(sqlQueryApply, ctx); + } + + // Now we need to serialize Resource<JsonNode> from sqlQueryApply to JsonDocument + { + auto resourcePos = sqlQueryApply->Pos(); + + auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")), + }); + + auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + argumentsType, + ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), + ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) + }); + + TStringBuf serializeUdfName = "Json2.Serialize"; + if (typesCtx.JsonQueryReturnsJsonDocument) { + serializeUdfName = "Json2.SerializeToJsonDocument"; + } + auto parse = Build<TCoUdf>(ctx, resourcePos) + .MethodName() + .Build(serializeUdfName) + .RunConfigValue<TCoVoid>() + .Build() + .UserType(ExpandType(resourcePos, *udfArgumentsType, ctx)) + .Done().Ptr(); + + return Build<TCoApply>(ctx, resourcePos) + .Callable(parse) + .FreeArgs() + .Add(sqlQueryApply) + .Build() + .Done().Ptr(); + } +}; + +} // namespace NYql
\ No newline at end of file diff --git a/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.h b/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.h new file mode 100644 index 00000000000..82af2f2ecc6 --- /dev/null +++ b/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.h @@ -0,0 +1,11 @@ +#pragma once + +#include <ydb/library/yql/ast/yql_expr.h> +#include <ydb/library/yql/core/expr_nodes/yql_expr_nodes.h> +#include <ydb/library/yql/core/yql_type_annotation.h> + +namespace NYql { + TExprNode::TPtr ExpandJsonValue(const TExprNode::TPtr& node, TExprContext& ctx); + TExprNode::TPtr ExpandJsonExists(const TExprNode::TPtr& node, TExprContext& ctx); + TExprNode::TPtr ExpandJsonQuery(const TExprNode::TPtr& node, TExprContext& ctx, TTypeAnnotationContext& typesCtx); +} // namespace NYql
\ No newline at end of file diff --git a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp index c35056e47ca..d16c1d46ef6 100644 --- a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp +++ b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp @@ -1,5 +1,7 @@ #include "yql_opt_peephole_physical.h" +#include <ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.h> +#include <ydb/library/yql/core/yql_atom_enums.h> #include <ydb/library/yql/core/yql_expr_optimize.h> #include <ydb/library/yql/core/yql_expr_type_annotation.h> #include <ydb/library/yql/core/yql_expr_constraint.h> @@ -7158,6 +7160,8 @@ struct TPeepHoleRules { {"CheckedDiv", &ExpandCheckedDiv}, {"CheckedMod", &ExpandCheckedMod}, {"CheckedMinus", &ExpandCheckedMinus}, + {"JsonValue", &ExpandJsonValue}, + {"JsonExists", &ExpandJsonExists} }; static constexpr std::initializer_list<TExtPeepHoleOptimizerMap::value_type> CommonStageExtRulesInit = { @@ -7168,6 +7172,7 @@ struct TPeepHoleRules { {"AggregateMergeFinalize", &ExpandAggregatePeephole}, {"AggregateMergeManyFinalize", &ExpandAggregatePeephole}, {"AggregateFinalize", &ExpandAggregatePeephole}, + {"JsonQuery", &ExpandJsonQuery}, }; static constexpr std::initializer_list<TPeepHoleOptimizerMap::value_type> SimplifyStageRulesInit = { diff --git a/ydb/services/ydb/ut/CMakeLists.darwin-x86_64.txt b/ydb/services/ydb/ut/CMakeLists.darwin-x86_64.txt index e97d638f7b4..b73b9d81828 100644 --- a/ydb/services/ydb/ut/CMakeLists.darwin-x86_64.txt +++ b/ydb/services/ydb/ut/CMakeLists.darwin-x86_64.txt @@ -64,8 +64,6 @@ target_sources(ydb-services-ydb-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_logstore_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_olapstore_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_monitoring_ut.cpp - ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ut/json_udf.cpp - ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ut/re2_udf.cpp ${CMAKE_SOURCE_DIR}/ydb/services/ydb/cert_gen.cpp ) set_property( diff --git a/ydb/services/ydb/ut/CMakeLists.linux-aarch64.txt b/ydb/services/ydb/ut/CMakeLists.linux-aarch64.txt index d0b405d213e..017cd291baa 100644 --- a/ydb/services/ydb/ut/CMakeLists.linux-aarch64.txt +++ b/ydb/services/ydb/ut/CMakeLists.linux-aarch64.txt @@ -67,8 +67,6 @@ target_sources(ydb-services-ydb-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_logstore_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_olapstore_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_monitoring_ut.cpp - ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ut/json_udf.cpp - ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ut/re2_udf.cpp ${CMAKE_SOURCE_DIR}/ydb/services/ydb/cert_gen.cpp ) set_property( diff --git a/ydb/services/ydb/ut/CMakeLists.linux-x86_64.txt b/ydb/services/ydb/ut/CMakeLists.linux-x86_64.txt index f452cdf9321..77c88076457 100644 --- a/ydb/services/ydb/ut/CMakeLists.linux-x86_64.txt +++ b/ydb/services/ydb/ut/CMakeLists.linux-x86_64.txt @@ -68,8 +68,6 @@ target_sources(ydb-services-ydb-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_logstore_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_olapstore_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_monitoring_ut.cpp - ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ut/json_udf.cpp - ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ut/re2_udf.cpp ${CMAKE_SOURCE_DIR}/ydb/services/ydb/cert_gen.cpp ) set_property( diff --git a/ydb/services/ydb/ut/CMakeLists.windows-x86_64.txt b/ydb/services/ydb/ut/CMakeLists.windows-x86_64.txt index 05dffba80cc..64ee11bf384 100644 --- a/ydb/services/ydb/ut/CMakeLists.windows-x86_64.txt +++ b/ydb/services/ydb/ut/CMakeLists.windows-x86_64.txt @@ -57,8 +57,6 @@ target_sources(ydb-services-ydb-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_logstore_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_olapstore_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_monitoring_ut.cpp - ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ut/json_udf.cpp - ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ut/re2_udf.cpp ${CMAKE_SOURCE_DIR}/ydb/services/ydb/cert_gen.cpp ) set_property( diff --git a/ydb/services/ydb/ut/re2_udf.cpp b/ydb/services/ydb/ut/re2_udf.cpp deleted file mode 100644 index e7e4cecd3ad..00000000000 --- a/ydb/services/ydb/ut/re2_udf.cpp +++ /dev/null @@ -1,7 +0,0 @@ -// HACK: the TRe2Module class is in an anonymous namespace -// so including the source cpp is the only way to access it -#include <ydb/library/yql/udfs/common/re2/re2_udf.cpp> - -NYql::NUdf::TUniquePtr<NYql::NUdf::IUdfModule> CreateRe2Module() { - return new TRe2Module<true>(); -} diff --git a/ydb/services/ydb/ut/udfs.h b/ydb/services/ydb/ut/udfs.h deleted file mode 100644 index 612ab37da10..00000000000 --- a/ydb/services/ydb/ut/udfs.h +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once -#include <ydb/library/yql/public/udf/udf_registrator.h> - -NYql::NUdf::TUniquePtr<NYql::NUdf::IUdfModule> CreateRe2Module(); -NYql::NUdf::TUniquePtr<NYql::NUdf::IUdfModule> CreateJson2Module(); diff --git a/ydb/services/ydb/ydb_olapstore_ut.cpp b/ydb/services/ydb/ydb_olapstore_ut.cpp index 2fdced9f4a0..402b150b52c 100644 --- a/ydb/services/ydb/ydb_olapstore_ut.cpp +++ b/ydb/services/ydb/ydb_olapstore_ut.cpp @@ -1,6 +1,5 @@ #include "ydb_common_ut.h" -#include <ydb/services/ydb/ut/udfs.h> #include <ydb/core/kqp/ut/common/kqp_ut_common.h> #include <ydb/public/sdk/cpp/client/ydb_result/result.h> @@ -44,14 +43,6 @@ static constexpr const char* testShardingVariants[] = { Y_UNIT_TEST_SUITE(YdbOlapStore) { - NMiniKQL::IFunctionRegistry* UdfFrFactory(const NKikimr::NScheme::TTypeRegistry& typeRegistry) { - Y_UNUSED(typeRegistry); - auto funcRegistry = NMiniKQL::CreateFunctionRegistry(NMiniKQL::CreateBuiltinRegistry())->Clone(); - funcRegistry->AddModule("fake_re2_path", "Re2", CreateRe2Module()); - funcRegistry->AddModule("fake_json2_path", "Json2", CreateJson2Module()); - return funcRegistry.Release(); - } - void EnableDebugLogs(TKikimrWithGrpcAndRootSchema& server) { server.Server_->GetRuntime()->SetLogPriority(NKikimrServices::FLAT_TX_SCHEMESHARD, NActors::NLog::PRI_DEBUG); server.Server_->GetRuntime()->SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_DEBUG); @@ -483,7 +474,7 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { template<bool NotNull> void TestQuery(const TString& query, const TString& sharding) { NKikimrConfig::TAppConfig appConfig; - TKikimrWithGrpcAndRootSchema server(appConfig, {}, {}, false, &UdfFrFactory); + TKikimrWithGrpcAndRootSchema server(appConfig, {}, {}, false, nullptr); auto connection = ConnectToServer(server); |