aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoraidarsamer <aidarsamer@ydb.tech>2023-05-12 17:28:33 +0300
committeraidarsamer <aidarsamer@ydb.tech>2023-05-12 17:28:33 +0300
commit9629a1cbefcb838e4801ad8a22944c8e81c8f0c9 (patch)
treecdaea314bae5d64f904bd2834748ba5ac8b238c9
parent0ab536455399f2da26b3e92917c0eda5436f0ae8 (diff)
downloadydb-9629a1cbefcb838e4801ad8a22944c8e81c8f0c9.tar.gz
KIKIMr-15068: Move JSON_EXISTS and JSON_VALUE expand to UDFs to peephole optimizers
-rw-r--r--ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/core/kqp/ut/common/json2_udf.cpp (renamed from ydb/services/ydb/ut/json_udf.cpp)4
-rw-r--r--ydb/core/kqp/ut/common/kqp_ut_common.cpp2
-rw-r--r--ydb/core/kqp/ut/olap/kqp_olap_ut.cpp38
-rw-r--r--ydb/core/testlib/cs_helper.cpp60
-rw-r--r--ydb/core/testlib/cs_helper.h2
-rw-r--r--ydb/library/yql/core/common_opt/yql_co_simple1.cpp665
-rw-r--r--ydb/library/yql/core/peephole_opt/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/library/yql/core/peephole_opt/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/library/yql/core/peephole_opt/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/library/yql/core/peephole_opt/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.cpp676
-rw-r--r--ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.h11
-rw-r--r--ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp5
-rw-r--r--ydb/services/ydb/ut/CMakeLists.darwin-x86_64.txt2
-rw-r--r--ydb/services/ydb/ut/CMakeLists.linux-aarch64.txt2
-rw-r--r--ydb/services/ydb/ut/CMakeLists.linux-x86_64.txt2
-rw-r--r--ydb/services/ydb/ut/CMakeLists.windows-x86_64.txt2
-rw-r--r--ydb/services/ydb/ut/re2_udf.cpp7
-rw-r--r--ydb/services/ydb/ut/udfs.h5
-rw-r--r--ydb/services/ydb/ydb_olapstore_ut.cpp11
24 files changed, 783 insertions, 719 deletions
diff --git a/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt b/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt
index ac4ece79a0a..0d26cb8fc26 100644
--- a/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt
+++ b/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt
@@ -26,6 +26,7 @@ target_link_libraries(kqp-ut-common PUBLIC
cpp-client-ydb_topic
)
target_sources(kqp-ut-common PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/json2_udf.cpp
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/kqp_ut_common.cpp
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp
)
diff --git a/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt b/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt
index 84aa8d90e3b..dd0675d5bdf 100644
--- a/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt
+++ b/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt
@@ -27,6 +27,7 @@ target_link_libraries(kqp-ut-common PUBLIC
cpp-client-ydb_topic
)
target_sources(kqp-ut-common PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/json2_udf.cpp
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/kqp_ut_common.cpp
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp
)
diff --git a/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt b/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt
index 84aa8d90e3b..dd0675d5bdf 100644
--- a/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt
+++ b/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt
@@ -27,6 +27,7 @@ target_link_libraries(kqp-ut-common PUBLIC
cpp-client-ydb_topic
)
target_sources(kqp-ut-common PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/json2_udf.cpp
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/kqp_ut_common.cpp
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp
)
diff --git a/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt b/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt
index ac4ece79a0a..0d26cb8fc26 100644
--- a/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt
+++ b/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt
@@ -26,6 +26,7 @@ target_link_libraries(kqp-ut-common PUBLIC
cpp-client-ydb_topic
)
target_sources(kqp-ut-common PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/json2_udf.cpp
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/kqp_ut_common.cpp
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp
)
diff --git a/ydb/services/ydb/ut/json_udf.cpp b/ydb/core/kqp/ut/common/json2_udf.cpp
index 704eb5c9e47..2d6c08ab968 100644
--- a/ydb/services/ydb/ut/json_udf.cpp
+++ b/ydb/core/kqp/ut/common/json2_udf.cpp
@@ -1,5 +1,9 @@
#include <ydb/library/yql/udfs/common/json2/json2_udf.cpp>
+namespace NKikimr::NKqp {
+
NYql::NUdf::TUniquePtr<NYql::NUdf::IUdfModule> CreateJson2Module() {
return new NJson2Udf::TJson2Module();
}
+
+} // namespace NKikimr::NKqp \ No newline at end of file
diff --git a/ydb/core/kqp/ut/common/kqp_ut_common.cpp b/ydb/core/kqp/ut/common/kqp_ut_common.cpp
index 164122bac9a..788fae83fd8 100644
--- a/ydb/core/kqp/ut/common/kqp_ut_common.cpp
+++ b/ydb/core/kqp/ut/common/kqp_ut_common.cpp
@@ -50,12 +50,14 @@ SIMPLE_UDF(TRandString, char*(ui32)) {
}
SIMPLE_MODULE(TTestUdfsModule, TTestFilter, TTestFilterTerminate, TRandString);
+NYql::NUdf::TUniquePtr<NYql::NUdf::IUdfModule> CreateJson2Module();
NYql::NUdf::TUniquePtr<NYql::NUdf::IUdfModule> CreateRe2Module();
NMiniKQL::IFunctionRegistry* UdfFrFactory(const NScheme::TTypeRegistry& typeRegistry) {
Y_UNUSED(typeRegistry);
auto funcRegistry = NMiniKQL::CreateFunctionRegistry(NMiniKQL::CreateBuiltinRegistry())->Clone();
funcRegistry->AddModule("", "TestUdfs", new TTestUdfsModule());
+ funcRegistry->AddModule("", "Json2", CreateJson2Module());
funcRegistry->AddModule("", "Re2", CreateRe2Module());
NKikimr::NMiniKQL::FillStaticModules(*funcRegistry);
return funcRegistry.Release();
diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp
index 3bd25b69ec2..258dd45e120 100644
--- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp
+++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp
@@ -4438,6 +4438,44 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
TestTableWithNulls({ testCase });
}
+ Y_UNIT_TEST(Json_GetValue) {
+ TAggregationTestCase testCase;
+ testCase.SetQuery(R"(
+ SELECT id, JSON_VALUE(jsonval, "$.col1"), JSON_VALUE(jsondoc, "$.col1") FROM `/Root/tableWithNulls`
+ WHERE
+ level = 1;
+ )")
+ .SetExpectedReply(R"([[1;["val1"];#]])");
+
+ TestTableWithNulls({ testCase });
+ }
+
+ Y_UNIT_TEST(Json_Exists) {
+ TAggregationTestCase testCase;
+ testCase.SetQuery(R"(
+ SELECT id, JSON_EXISTS(jsonval, "$.col1"), JSON_EXISTS(jsondoc, "$.col1") FROM `/Root/tableWithNulls`
+ WHERE
+ level = 1;
+ )")
+ .SetExpectedReply(R"([[1;[%true];#]])");
+
+ TestTableWithNulls({ testCase });
+ }
+
+ Y_UNIT_TEST(Json_Query) {
+ TAggregationTestCase testCase;
+ testCase.SetQuery(R"(
+ SELECT id, JSON_QUERY(jsonval, "$.col1" WITH UNCONDITIONAL WRAPPER),
+ JSON_QUERY(jsondoc, "$.col1" WITH UNCONDITIONAL WRAPPER)
+ FROM `/Root/tableWithNulls`
+ WHERE
+ level = 1;
+ )")
+ .SetExpectedReply(R"([[1;["[\"val1\"]"];#]])");
+
+ TestTableWithNulls({ testCase });
+ }
+
Y_UNIT_TEST(Olap_InsertFails) {
auto settings = TKikimrSettings()
.SetWithSampleTables(false)
diff --git a/ydb/core/testlib/cs_helper.cpp b/ydb/core/testlib/cs_helper.cpp
index d3edd61a252..ae234a1859c 100644
--- a/ydb/core/testlib/cs_helper.cpp
+++ b/ydb/core/testlib/cs_helper.cpp
@@ -357,7 +357,9 @@ std::shared_ptr<arrow::Schema> TTableWithNullsHelper::GetArrowSchema() {
arrow::field("id", arrow::int32()),
arrow::field("resource_id", arrow::utf8()),
arrow::field("level", arrow::int32()),
- arrow::field("binary_str", arrow::binary())
+ arrow::field("binary_str", arrow::binary()),
+ arrow::field("jsonval", arrow::utf8()),
+ arrow::field("jsondoc", arrow::binary())
});
}
@@ -369,36 +371,46 @@ std::shared_ptr<arrow::RecordBatch> TTableWithNullsHelper::TestArrowBatch(ui64,
rowCount = 10;
std::shared_ptr<arrow::Schema> schema = GetArrowSchema();
- arrow::Int32Builder b1;
- arrow::StringBuilder b2;
- arrow::Int32Builder b3;
- arrow::StringBuilder b4;
+ arrow::Int32Builder bId;
+ arrow::StringBuilder bResourceId;
+ arrow::Int32Builder bLevel;
+ arrow::StringBuilder bBinaryStr;
+ arrow::StringBuilder bJsonVal;
+ arrow::StringBuilder bJsonDoc;
for (size_t i = 1; i <= rowCount / 2; ++i) {
- Y_VERIFY(b1.Append(i).ok());
- Y_VERIFY(b2.AppendNull().ok());
- Y_VERIFY(b3.Append(i).ok());
- Y_VERIFY(b4.AppendNull().ok());
+ Y_VERIFY(bId.Append(i).ok());
+ Y_VERIFY(bResourceId.AppendNull().ok());
+ Y_VERIFY(bLevel.Append(i).ok());
+ Y_VERIFY(bBinaryStr.AppendNull().ok());
+ Y_VERIFY(bJsonVal.Append(std::string(R"({"col1": "val1", "obj": {"obj_col2": "val2"}})")).ok());
+ Y_VERIFY(bJsonDoc.AppendNull().ok());
}
for (size_t i = rowCount / 2 + 1; i <= rowCount; ++i) {
- Y_VERIFY(b1.Append(i).ok());
- Y_VERIFY(b2.Append(std::to_string(i)).ok());
- Y_VERIFY(b3.AppendNull().ok());
- Y_VERIFY(b4.Append(std::to_string(i)).ok());
+ Y_VERIFY(bId.Append(i).ok());
+ Y_VERIFY(bResourceId.Append(std::to_string(i)).ok());
+ Y_VERIFY(bLevel.AppendNull().ok());
+ Y_VERIFY(bBinaryStr.Append(std::to_string(i)).ok());
+ Y_VERIFY(bJsonVal.AppendNull().ok());
+ Y_VERIFY(bJsonDoc.Append(std::string(R"({"col1": "val1", "obj": {"obj_col2": "val2"}})")).ok());
}
- std::shared_ptr<arrow::Int32Array> a1;
- std::shared_ptr<arrow::StringArray> a2;
- std::shared_ptr<arrow::Int32Array> a3;
- std::shared_ptr<arrow::StringArray> a4;
-
- Y_VERIFY(b1.Finish(&a1).ok());
- Y_VERIFY(b2.Finish(&a2).ok());
- Y_VERIFY(b3.Finish(&a3).ok());
- Y_VERIFY(b4.Finish(&a4).ok());
-
- return arrow::RecordBatch::Make(schema, rowCount, { a1, a2, a3, a4 });
+ std::shared_ptr<arrow::Int32Array> aId;
+ std::shared_ptr<arrow::StringArray> aResourceId;
+ std::shared_ptr<arrow::Int32Array> aLevel;
+ std::shared_ptr<arrow::StringArray> aBinaryStr;
+ std::shared_ptr<arrow::StringArray> aJsonVal;
+ std::shared_ptr<arrow::StringArray> aJsonDoc;
+
+ Y_VERIFY(bId.Finish(&aId).ok());
+ Y_VERIFY(bResourceId.Finish(&aResourceId).ok());
+ Y_VERIFY(bLevel.Finish(&aLevel).ok());
+ Y_VERIFY(bBinaryStr.Finish(&aBinaryStr).ok());
+ Y_VERIFY(bJsonVal.Finish(&aJsonVal).ok());
+ Y_VERIFY(bJsonDoc.Finish(&aJsonDoc).ok());
+
+ return arrow::RecordBatch::Make(schema, rowCount, { aId, aResourceId, aLevel, aBinaryStr, aJsonVal, aJsonDoc });
}
}
diff --git a/ydb/core/testlib/cs_helper.h b/ydb/core/testlib/cs_helper.h
index cc76cb33976..f2022674160 100644
--- a/ydb/core/testlib/cs_helper.h
+++ b/ydb/core/testlib/cs_helper.h
@@ -183,6 +183,8 @@ public:
Columns { Name: "resource_id" Type: "Utf8" }
Columns { Name: "level" Type: "Int32" }
Columns { Name: "binary_str" Type: "String" }
+ Columns { Name: "jsonval" Type: "Json" }
+ Columns { Name: "jsondoc" Type: "JsonDocument" }
KeyColumnNames: "id"
)";
diff --git a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp
index 189d86adfa5..524a38a3f31 100644
--- a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp
+++ b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp
@@ -3200,113 +3200,6 @@ TExprNode::TPtr FoldJsonSeralizeAfterParse(const TExprNode::TPtr& node) {
return FoldSeralizeAfterParse(node, "Json2.Parse", "Json2.Serialize");
}
-TExprNode::TPtr BuildJsonParse(const TExprNode::TPtr& jsonExpr, TExprContext& ctx) {
- auto jsonPos = jsonExpr->Pos();
-
- auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
- ctx.MakeType<TDataExprType>(EDataSlot::Json),
- });
-
- auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
- argumentsType,
- ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}),
- ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{})
- });
-
- auto parse = Build<TCoUdf>(ctx, jsonPos)
- .MethodName()
- .Build("Json2.Parse")
- .RunConfigValue<TCoVoid>()
- .Build()
- .UserType(ExpandType(jsonPos, *udfArgumentsType, ctx))
- .Done().Ptr();
-
- return Build<TCoApply>(ctx, jsonPos)
- .Callable(parse)
- .FreeArgs()
- .Add(jsonExpr)
- .Build()
- .Done().Ptr();
-}
-
-TExprNode::TPtr GetJsonDocumentOrParseJson(const TExprNode::TPtr& jsonExpr, TExprContext& ctx, EDataSlot& argumentDataSlot) {
- const TTypeAnnotationNode* type = jsonExpr->GetTypeAnn();
- if (type->GetKind() == ETypeAnnotationKind::Optional) {
- type = type->Cast<TOptionalExprType>()->GetItemType();
- }
- argumentDataSlot = type->Cast<TDataExprType>()->GetSlot();
-
- // If jsonExpr has JsonDocument type, there is no need to parse it
- if (argumentDataSlot == EDataSlot::JsonDocument) {
- return jsonExpr;
- }
-
- // Otherwise jsonExpr has Json type and we need to wrap it in Json2::Parse
- return BuildJsonParse(jsonExpr, ctx);
-}
-
-TExprNode::TPtr GetJsonDocumentOrParseJson(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx, EDataSlot& argumentDataSlot) {
- return GetJsonDocumentOrParseJson(jsonExpr.Json().Ptr(), ctx, argumentDataSlot);
-}
-
-TExprNode::TPtr BuildJsonSerialize(const TExprNode::TPtr& resourceExpr, TExprContext& ctx) {
- auto resourcePos = resourceExpr->Pos();
-
- auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
- ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")),
- });
-
- auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
- argumentsType,
- ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}),
- ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{})
- });
-
- auto parse = Build<TCoUdf>(ctx, resourcePos)
- .MethodName()
- .Build("Json2.Serialize")
- .RunConfigValue<TCoVoid>()
- .Build()
- .UserType(ExpandType(resourcePos, *udfArgumentsType, ctx))
- .Done().Ptr();
-
- return Build<TCoApply>(ctx, resourcePos)
- .Callable(parse)
- .FreeArgs()
- .Add(resourceExpr)
- .Build()
- .Done().Ptr();
-}
-
-TExprNode::TPtr BuildJsonCompilePath(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx) {
- auto jsonPathPos = jsonExpr.JsonPath().Pos();
-
- auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
- ctx.MakeType<TDataExprType>(EDataSlot::Utf8)
- });
-
- auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
- argumentsType,
- ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}),
- ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{})
- });
-
- auto compilePath = Build<TCoUdf>(ctx, jsonPathPos)
- .MethodName()
- .Build("Json2.CompilePath")
- .RunConfigValue<TCoVoid>()
- .Build()
- .UserType(ExpandType(jsonPathPos, *udfArgumentsType, ctx))
- .Done().Ptr();
-
- return Build<TCoApply>(ctx, jsonPathPos)
- .Callable(compilePath)
- .FreeArgs()
- .Add(jsonExpr.JsonPath())
- .Build()
- .Done().Ptr();
-}
-
template<bool Ordered>
TExprNode::TPtr CanonizeMultiMap(const TExprNode::TPtr& node, TExprContext& ctx) {
if constexpr (Ordered) {
@@ -5576,560 +5469,6 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) {
return node;
};
- map["JsonValue"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) {
- /*
- Here we rewrite expression
- JSON_VALUE(
- <json>, <jsonPath>
- [PASSING <variableExpr1> AS <variableName1>, ...]
- [RETURNING <resultType>]
- [(NULL | DEFAULT <onEmptyExpr>) ON EMPTY]
- [(NULL | DEFAULT <onErrorExpr>) ON ERROR]
- )
- Generated depends on the <resultType> specified in RETURNING section:
- 1. No RETURNING section
- Default returning type of JsonValue is Utf8 and it must convert
- result of JsonPath expression into Utf8 string.
- Json2::SqlValueConvertToUtf8 is used
- 2. <resultType> is a numeric type (Int16, Uint16, Float, etc.)
- Json2::SqlValueNumber is used with additional CAST to corresponding type
- 3. <resultType> is a date type (Date, Datetime, Timestamp)
- Json2::SqlValueInt64 is used with additional CAST to corresponding type
- 4. <resultType> is Bool
- Json2::SqlValueBool is used
- 5. <resultType> is String
- Json2::SqlValueUtf8 is used with additional CAST to String
- 6. <resultType> is Utf8
- Json2::SqlValueUtf8 is used
- Returning type of all Json2::SqlValue* functions is Variant<Tuple<Uint8, String?>, <resultType>?>:
- 1. If variant holds first type, either error happened or the result is empty.
- If first tuple element is 0, result is empty.
- If first tuple element is 1, error happened.
- Second tuple element contains message that can be displayed to the user.
- 2. If variant hold second type, execution was successful and it is a result.
- We process result of Json2::SqlValue* function by using Visit callable with lambdas handling each type.
- Note that in some cases we need to CAST result of Json2::SqlValue* and it can fail. So:
- 1. If the result of Json2::SqlValue* is NULL, we return Nothing(<resultType>)
- 2. Otherwise we check the result of SafeCast callable. If it is NULL, cast has failed and it is an error.
- If it holds some value, we return it to the user.
- If no CAST is needed, we just return the result of Json2::SqlValue*.
- What is more, <onEmptyExpr> and <onErrorExpr> must be casted to <resultType> and this CAST can fail too.
- ANSI SQL specification is unclear about what to do with this situation. If we failed to cast <onEmptyExpr> to
- target type, we return <onErrorExpr>. If we failed to cast <onErrorExpr> to target type, we throw an exception.
-
- I know all this sounds very clumsy and a lot of logic to handle in s-expressions. If you have a better idea
- of a way to handle all this ***, please write to laplab@.
- */
- TCoJsonValue jsonValue(node);
-
- // <json expr> or Json2::Parse(<json expr>)
- EDataSlot jsonDataSlot;
- TExprNode::TPtr jsonExpr = GetJsonDocumentOrParseJson(jsonValue, ctx, jsonDataSlot);
-
- // Json2::CompilePath(<jsonPath>)
- TExprNode::TPtr compilePathExpr = BuildJsonCompilePath(jsonValue, ctx);
-
- // Json2::SqlValue...(<parsedJson>, <compiledJsonPath>)
- TExprNode::TPtr sqlValueExpr;
- const auto returnTypeAnn = node->GetTypeAnn()->Cast<TOptionalExprType>();
- const auto unwrappedSlot = returnTypeAnn->GetItemType()->Cast<TDataExprType>()->GetSlot();
- bool needCast = false;
- const auto jsonValuePos = jsonValue.Pos();
- {
- TString sqlValueUdfName;
- if (IsDataTypeNumeric(unwrappedSlot)) {
- sqlValueUdfName = "SqlValueNumber";
- needCast = true;
- } else if (IsDataTypeDate(unwrappedSlot)) {
- sqlValueUdfName = "SqlValueInt64";
- needCast = true;
- } else if (unwrappedSlot == EDataSlot::Utf8 || unwrappedSlot == EDataSlot::String) {
- if (jsonValue.ReturningType()) {
- sqlValueUdfName = "SqlValueUtf8";
- } else {
- sqlValueUdfName = "SqlValueConvertToUtf8";
- }
- needCast = unwrappedSlot == EDataSlot::String;
- } else if (unwrappedSlot == EDataSlot::Bool) {
- sqlValueUdfName = "SqlValueBool";
- } else {
- YQL_ENSURE(false, "Unsupported type");
- }
-
- const TTypeAnnotationNode* inputType = nullptr;
- if (jsonDataSlot == EDataSlot::JsonDocument) {
- inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(EDataSlot::JsonDocument));
- sqlValueUdfName = "JsonDocument" + sqlValueUdfName;
- } else {
- inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode"));
- }
- sqlValueUdfName = "Json2." + sqlValueUdfName;
-
- TTypeAnnotationNode::TListType arguments = {
- inputType,
- ctx.MakeType<TResourceExprType>("JsonPath")
- };
-
- auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
- ctx.MakeType<TTupleExprType>(arguments),
- ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}),
- ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{})
- });
-
- auto sqlValue = Build<TCoUdf>(ctx, jsonValuePos)
- .MethodName()
- .Build(sqlValueUdfName)
- .RunConfigValue<TCoVoid>()
- .Build()
- .UserType(ExpandType(jsonValuePos, *udfArgumentsType, ctx))
- .Done().Ptr();
-
- sqlValueExpr = Build<TCoApply>(ctx, jsonValuePos)
- .Callable(sqlValue)
- .FreeArgs()
- .Add(jsonExpr)
- .Add(compilePathExpr)
- .Add(jsonValue.Variables())
- .Build()
- .Done().Ptr();
- }
-
- auto makeCastOrValue = [&](TPositionHandle pos, const TExprNode::TPtr& source, const TExprNode::TPtr& onCastFail) {
- /*
- if Exists($source)
- then
- return IfPresent(
- CAST($source as <resultType>),
- ($x) -> { return Just($x); },
- $onCastFail
- )
- else
- return Nothing(<resultType>)
- */
- TExprNode::TPtr returnTypeNode = ExpandType(pos, *returnTypeAnn, ctx);
- return Build<TCoIf>(ctx, pos)
- .Predicate<TCoExists>()
- .Optional(source)
- .Build()
- .ThenValue<TCoIfPresent>()
- .Optional<TCoSafeCast>()
- .Value(source)
- .Type(returnTypeNode)
- .Build()
- .PresentHandler<TCoLambda>()
- .Args({"unwrappedValue"})
- .Body<TCoJust>()
- .Input("unwrappedValue")
- .Build()
- .Build()
- .MissingValue(onCastFail)
- .Build()
- .ElseValue<TCoNothing>()
- .OptionalType(returnTypeNode)
- .Build()
- .Done().Ptr();
- };
-
- auto makeThrow = [&](TPositionHandle pos, const TExprNode::TPtr& message) {
- return Build<TCoEnsure>(ctx, pos)
- .Value<TCoNothing>()
- .OptionalType(ExpandType(pos, *returnTypeAnn, ctx))
- .Build()
- .Predicate<TCoBool>()
- .Literal()
- .Build("false")
- .Build()
- .Message(message)
- .Done().Ptr();
- };
-
- auto makeHandler = [&](EJsonValueHandlerMode mode, const TExprNode::TPtr& node, const TExprNode::TPtr& errorMessage, const TExprNode::TPtr& onCastFail) -> TExprNode::TPtr {
- const auto pos = node->Pos();
- if (mode == EJsonValueHandlerMode::Error) {
- return makeThrow(pos, errorMessage);
- }
-
- // Convert NULL to Nothing(<resultType>)
- if (IsNull(*node)) {
- return Build<TCoNothing>(ctx, pos)
- .OptionalType(ExpandType(pos, *returnTypeAnn, ctx))
- .Done().Ptr();
- }
-
- // If type is not Optional, wrap expression in Just call
- TExprNode::TPtr result = node;
- const auto typeAnn = node->GetTypeAnn();
- if (typeAnn->GetKind() != ETypeAnnotationKind::Optional) {
- result = Build<TCoJust>(ctx, pos)
- .Input(result)
- .Done().Ptr();
- }
-
- // Perform CAST to <resultType> or return onCastFail
- return makeCastOrValue(pos, result, onCastFail);
- };
-
- const auto onEmptyMode = FromString<EJsonValueHandlerMode>(jsonValue.OnEmptyMode().Ref().Content());
- const auto onErrorMode = FromString<EJsonValueHandlerMode>(jsonValue.OnErrorMode().Ref().Content());
- auto makeOnErrorHandler = [&](const TExprNode::TPtr& errorMessage) {
- const auto onError = jsonValue.OnError();
- const auto throwCastError = makeThrow(
- onError.Pos(),
- Build<TCoString>(ctx, onError.Pos())
- .Literal()
- .Build(TStringBuilder() << "Failed to cast default value from ON ERROR clause to target type " << FormatType(returnTypeAnn))
- .Done().Ptr()
- );
-
- return makeHandler(onErrorMode, onError.Ptr(), errorMessage, throwCastError);
- };
- auto makeOnEmptyHandler = [&](const TExprNode::TPtr& errorMessage) {
- const auto onEmptyDefaultCastError = Build<TCoString>(ctx, jsonValue.OnEmpty().Pos())
- .Literal()
- .Build(TStringBuilder() << "Failed to cast default value from ON EMPTY clause to target type " << FormatType(returnTypeAnn))
- .Done().Ptr();
- return makeHandler(onEmptyMode, jsonValue.OnEmpty().Ptr(), errorMessage, makeOnErrorHandler(onEmptyDefaultCastError));
- };
-
- /*
- Lambda for handling first type of variant
-
- ($errorTuple) -> {
- if $errorTuple[0] == 0
- then
- return onEmptyHandler
- else
- return onErrorHandler
- }
- */
- auto errorTupleArgument = ctx.NewArgument(jsonValuePos, "errorTuple");
- auto sqlValueMessage = Build<TCoNth>(ctx, jsonValuePos)
- .Tuple(errorTupleArgument)
- .Index()
- .Build("1")
- .Done().Ptr();
- const auto errorLambda = Build<TCoLambda>(ctx, jsonValuePos)
- .Args(TExprNode::TListType{errorTupleArgument})
- .Body<TCoIf>()
- .Predicate<TCoCmpEqual>()
- .Left<TCoNth>()
- .Tuple(errorTupleArgument)
- .Index()
- .Build("0")
- .Build()
- .Right<TCoUint8>()
- .Literal()
- .Build("0")
- .Build()
- .Build()
- .ThenValue(makeOnEmptyHandler(sqlValueMessage))
- .ElseValue(makeOnErrorHandler(sqlValueMessage))
- .Build()
- .Done().Ptr();
-
- // Lambda for handling second type of variant
- TExprNode::TPtr sqlValueResultLambda;
- if (needCast) {
- const auto errorMessage = Build<TCoString>(ctx, jsonValuePos)
- .Literal()
- .Build(TStringBuilder() << "Failed to cast extracted JSON value to target type " << FormatType(returnTypeAnn))
- .Done().Ptr();
- const auto inputArgument = ctx.NewArgument(jsonValuePos, "sqlValueResult");
- sqlValueResultLambda = Build<TCoLambda>(ctx, jsonValuePos)
- .Args(TExprNode::TListType{inputArgument})
- .Body(makeCastOrValue(jsonValuePos, inputArgument, makeOnErrorHandler(errorMessage)))
- .Done().Ptr();
- } else {
- /*
- ($sqlValueResult) -> {
- return $sqlValueResult;
- }
- */
- sqlValueResultLambda = Build<TCoLambda>(ctx, jsonValuePos)
- .Args({"sqlValueResult"})
- .Body("sqlValueResult")
- .Done().Ptr();
- }
-
- // Visit call to get the result
- const auto visitResult = Build<TCoVisit>(ctx, jsonValuePos)
- .Input(sqlValueExpr)
- .FreeArgs()
- .Add<TCoAtom>()
- .Build("0")
- .Add(errorLambda)
- .Add<TCoAtom>()
- .Build("1")
- .Add(sqlValueResultLambda)
- .Build()
- .Done().Ptr();
-
- return visitResult;
- };
-
- map["JsonExists"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) {
- /*
- Here we rewrite expression
- JSON_EXISTS(<json expr>, <jsonpath> [PASSING <variableExpr1> AS <variableName1>, ...] {TRUE | FALSE | UNKNOWN} ON ERROR)
- into
- Json2::SqlExists(Json2::Parse(<json expr>), Json2::CompilePath(<jsonpath>), <dict with variables>, <on error value>)
- and its sibling
- JSON_EXISTS(<json expr>, <jsonpath> [PASSING <variableExpr1> AS <variableName1>, ...] ERROR ON ERROR)
- into
- Json2::SqlTryExists(Json2::Parse(<json expr>), <dict with variables>, Json2::CompilePath(<jsonpath>))
- */
- TCoJsonExists jsonExists(node);
-
- // <json expr> or Json2::Parse(<json expr>)
- EDataSlot jsonDataSlot;
- TExprNode::TPtr parseJsonExpr = GetJsonDocumentOrParseJson(jsonExists, ctx, jsonDataSlot);
-
- // Json2::CompilePath(<jsonPath>)
- TExprNode::TPtr compilePathExpr = BuildJsonCompilePath(jsonExists, ctx);
-
- // Json2::SqlExists(<json>, <compiled jsonpath>, [<default value>])
- // or
- // Json2::SqlTryExists(<json>, <compiled jsonpath>)
- const bool needThrow = !jsonExists.OnError().IsValid();
-
- TString sqlExistsUdfName = "SqlExists";
- if (needThrow) {
- sqlExistsUdfName = "SqlTryExists";
- }
-
- const TTypeAnnotationNode* inputType = nullptr;
- if (jsonDataSlot == EDataSlot::JsonDocument) {
- inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(EDataSlot::JsonDocument));
- sqlExistsUdfName = "JsonDocument" + sqlExistsUdfName;
- } else {
- inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode"));
- }
- sqlExistsUdfName = "Json2." + sqlExistsUdfName;
-
- TTypeAnnotationNode::TListType arguments = {
- inputType,
- ctx.MakeType<TResourceExprType>("JsonPath")
- };
-
- if (!needThrow) {
- const auto boolType = ctx.MakeType<TDataExprType>(EDataSlot::Bool);
- const auto optionalBoolType = ctx.MakeType<TOptionalExprType>(boolType);
- arguments.push_back(optionalBoolType);
- }
-
- auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
- ctx.MakeType<TTupleExprType>(arguments),
- ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}),
- ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{})
- });
-
- const auto jsonExistsPos = jsonExists.Pos();
- auto sqlExists = Build<TCoUdf>(ctx, jsonExistsPos)
- .MethodName()
- .Build(sqlExistsUdfName)
- .RunConfigValue<TCoVoid>()
- .Build()
- .UserType(ExpandType(jsonExistsPos, *udfArgumentsType, ctx))
- .Done().Ptr();
-
- if (needThrow) {
- return Build<TCoApply>(ctx, jsonExistsPos)
- .Callable(sqlExists)
- .FreeArgs()
- .Add(parseJsonExpr)
- .Add(compilePathExpr)
- .Add(jsonExists.Variables())
- .Build()
- .Done().Ptr();
- }
-
- return Build<TCoApply>(ctx, jsonExistsPos)
- .Callable(sqlExists)
- .FreeArgs()
- .Add(parseJsonExpr)
- .Add(compilePathExpr)
- .Add(jsonExists.Variables())
- .Add(jsonExists.OnError().Cast())
- .Build()
- .Done().Ptr();
- };
-
- map["JsonQuery"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) {
- /*
- Here we rewrite expression
- JSON_QUERY(
- <json expr>,
- <jsonpath>
- [PASSING <variableExpr1> AS <variableName1>, ...]
- [{WITHOUT [ARRAY] | WITH [CONDITIONAL | UNCONDITIONAL] [ARRAY]} WRAPPER]
- [{ERROR | NULL | EMPTY ARRAY | EMPTY OBJECT} ON EMPTY]
- [{ERROR | NULL | EMPTY ARRAY | EMPTY OBJECT} ON ERROR]
- )
- into something like
- Json2::SqlQuery...(
- Json2::Parse(<json expr>),
- Json2::CompilePath(<jsonpath>),
- <dict with variables>,
- <do we have ERROR ON EMPTY?>,
- <default value depending on {NULL | EMPTY ARRAY | EMPTY OBJECT} ON EMPTY>,
- <do we have ERROR ON ERROR?>,
- <default value depending on {NULL | EMPTY ARRAY | EMPTY OBJECT} ON ERROR>
- )
- Exact UDF name is choosen depending on wrap config:
- - WITHOUT [ARRAY] WRAPPER -> Json2::SqlQuery
- - WITH [UNCONDITIONAL] [ARRAY] WRAPPER -> Json2::SqlQueryWrap
- - WITH CONDITIONAL [ARRAY] WRAPPER -> Json2::SqlQueryConditionalWrap
- */
- TCoJsonQuery jsonQuery(node);
-
- // <json expr> or Json2::Parse(<json expr>)
- EDataSlot jsonDataSlot;
- TExprNode::TPtr parseJsonExpr = GetJsonDocumentOrParseJson(jsonQuery, ctx, jsonDataSlot);
-
- // Json2::CompilePath(<jsonPath>)
- TExprNode::TPtr compilePathExpr = BuildJsonCompilePath(jsonQuery, ctx);
-
- // Json2::SqlQuery...(<json expr>, <jsonpath>, ...)
- const auto wrapMode = FromString<EJsonQueryWrap>(jsonQuery.WrapMode().Ref().Content());
- TString sqlQueryUdfName = "SqlQuery";
- switch (wrapMode) {
- case EJsonQueryWrap::NoWrap:
- sqlQueryUdfName = "SqlQuery";
- break;
- case EJsonQueryWrap::Wrap:
- sqlQueryUdfName = "SqlQueryWrap";
- break;
- case EJsonQueryWrap::ConditionalWrap:
- sqlQueryUdfName = "SqlQueryConditionalWrap";
- break;
- }
-
- const TTypeAnnotationNode* inputType = nullptr;
- if (jsonDataSlot == EDataSlot::JsonDocument) {
- inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(EDataSlot::JsonDocument));
- sqlQueryUdfName = "JsonDocument" + sqlQueryUdfName;
- } else {
- inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode"));
- }
- inputType = ctx.MakeType<TOptionalExprType>(inputType);
- sqlQueryUdfName = "Json2." + sqlQueryUdfName;
-
- const auto optionalJsonResourceType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode"));
- TTypeAnnotationNode::TListType arguments{
- inputType,
- ctx.MakeType<TResourceExprType>("JsonPath"),
- ctx.MakeType<TDataExprType>(EDataSlot::Bool),
- optionalJsonResourceType,
- ctx.MakeType<TDataExprType>(EDataSlot::Bool),
- optionalJsonResourceType,
- };
-
- auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
- ctx.MakeType<TTupleExprType>(arguments),
- ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}),
- ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{})
- });
-
- auto buildShouldThrow = [&](EJsonQueryHandler handler, TPositionHandle pos) {
- return Build<TCoBool>(ctx, pos)
- .Literal()
- .Build(handler == EJsonQueryHandler::Error ? "true" : "false")
- .Done().Ptr();
- };
-
- auto buildHandler = [&](EJsonQueryHandler handler, TPositionHandle pos) {
- switch (handler) {
- case EJsonQueryHandler::Error:
- case EJsonQueryHandler::Null: {
- // Nothing(Resource<JsonNode>)
- return Build<TCoNothing>(ctx, pos)
- .OptionalType(ExpandType(pos, *optionalJsonResourceType, ctx))
- .Done().Ptr();
- }
- case EJsonQueryHandler::EmptyArray: {
- auto value = Build<TCoJson>(ctx, pos)
- .Literal()
- .Build("[]")
- .Done().Ptr();
- return BuildJsonParse(value, ctx);
- }
- case EJsonQueryHandler::EmptyObject: {
- auto value = Build<TCoJson>(ctx, pos)
- .Literal()
- .Build("{}")
- .Done().Ptr();
- return BuildJsonParse(value, ctx);
- }
- }
- };
-
- const auto jsonQueryPos = jsonQuery.Pos();
- auto sqlQuery = Build<TCoUdf>(ctx, jsonQueryPos)
- .MethodName()
- .Build(sqlQueryUdfName)
- .RunConfigValue<TCoVoid>()
- .Build()
- .UserType(ExpandType(jsonQueryPos, *udfArgumentsType, ctx))
- .Done().Ptr();
-
- const auto onEmpty = FromString<EJsonQueryHandler>(jsonQuery.OnEmpty().Ref().Content());
- const auto onError = FromString<EJsonQueryHandler>(jsonQuery.OnError().Ref().Content());
- const auto onEmptyPos = jsonQuery.OnEmpty().Pos();
- const auto onErrorPos = jsonQuery.OnError().Pos();
-
- auto sqlQueryApply = Build<TCoApply>(ctx, jsonQueryPos)
- .Callable(sqlQuery)
- .FreeArgs()
- .Add(parseJsonExpr)
- .Add(compilePathExpr)
- .Add(jsonQuery.Variables())
- .Add(buildShouldThrow(onEmpty, onEmptyPos))
- .Add(buildHandler(onEmpty, onEmptyPos))
- .Add(buildShouldThrow(onError, onErrorPos))
- .Add(buildHandler(onError, onErrorPos))
- .Build()
- .Done().Ptr();
-
- // In this case we need to serialize Resource<JsonNode> to Json type
- if (!optCtx.Types->JsonQueryReturnsJsonDocument) {
- return BuildJsonSerialize(sqlQueryApply, ctx);
- }
-
- // Now we need to serialize Resource<JsonNode> from sqlQueryApply to JsonDocument
- {
- auto resourcePos = sqlQueryApply->Pos();
-
- auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
- ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")),
- });
-
- auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
- argumentsType,
- ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}),
- ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{})
- });
-
- TStringBuf serializeUdfName = "Json2.Serialize";
- if (optCtx.Types->JsonQueryReturnsJsonDocument) {
- serializeUdfName = "Json2.SerializeToJsonDocument";
- }
- auto parse = Build<TCoUdf>(ctx, resourcePos)
- .MethodName()
- .Build(serializeUdfName)
- .RunConfigValue<TCoVoid>()
- .Build()
- .UserType(ExpandType(resourcePos, *udfArgumentsType, ctx))
- .Done().Ptr();
-
- return Build<TCoApply>(ctx, resourcePos)
- .Callable(parse)
- .FreeArgs()
- .Add(sqlQueryApply)
- .Build()
- .Done().Ptr();
- }
- };
-
map["JsonVariables"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) {
/*
Here we rewrite expression
@@ -6803,8 +6142,4 @@ TExprNode::TPtr TryConvertSqlInPredicatesToJoins(const TCoFlatMapBase& flatMap,
return {};
}
-TExprNode::TPtr BuildJsonParse(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx) {
- return BuildJsonParse(jsonExpr.Json().Ptr(), ctx);
-}
-
} // namespace NYql
diff --git a/ydb/library/yql/core/peephole_opt/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/core/peephole_opt/CMakeLists.darwin-x86_64.txt
index fa54b0a08b3..294ed16d9b6 100644
--- a/ydb/library/yql/core/peephole_opt/CMakeLists.darwin-x86_64.txt
+++ b/ydb/library/yql/core/peephole_opt/CMakeLists.darwin-x86_64.txt
@@ -19,5 +19,6 @@ target_link_libraries(yql-core-peephole_opt PUBLIC
yql-core-type_ann
)
target_sources(yql-core-peephole_opt PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp
)
diff --git a/ydb/library/yql/core/peephole_opt/CMakeLists.linux-aarch64.txt b/ydb/library/yql/core/peephole_opt/CMakeLists.linux-aarch64.txt
index 1bbb34cbc46..36a6bbe0897 100644
--- a/ydb/library/yql/core/peephole_opt/CMakeLists.linux-aarch64.txt
+++ b/ydb/library/yql/core/peephole_opt/CMakeLists.linux-aarch64.txt
@@ -20,5 +20,6 @@ target_link_libraries(yql-core-peephole_opt PUBLIC
yql-core-type_ann
)
target_sources(yql-core-peephole_opt PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp
)
diff --git a/ydb/library/yql/core/peephole_opt/CMakeLists.linux-x86_64.txt b/ydb/library/yql/core/peephole_opt/CMakeLists.linux-x86_64.txt
index 1bbb34cbc46..36a6bbe0897 100644
--- a/ydb/library/yql/core/peephole_opt/CMakeLists.linux-x86_64.txt
+++ b/ydb/library/yql/core/peephole_opt/CMakeLists.linux-x86_64.txt
@@ -20,5 +20,6 @@ target_link_libraries(yql-core-peephole_opt PUBLIC
yql-core-type_ann
)
target_sources(yql-core-peephole_opt PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp
)
diff --git a/ydb/library/yql/core/peephole_opt/CMakeLists.windows-x86_64.txt b/ydb/library/yql/core/peephole_opt/CMakeLists.windows-x86_64.txt
index fa54b0a08b3..294ed16d9b6 100644
--- a/ydb/library/yql/core/peephole_opt/CMakeLists.windows-x86_64.txt
+++ b/ydb/library/yql/core/peephole_opt/CMakeLists.windows-x86_64.txt
@@ -19,5 +19,6 @@ target_link_libraries(yql-core-peephole_opt PUBLIC
yql-core-type_ann
)
target_sources(yql-core-peephole_opt PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp
)
diff --git a/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.cpp b/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.cpp
new file mode 100644
index 00000000000..b778624b9da
--- /dev/null
+++ b/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.cpp
@@ -0,0 +1,676 @@
+#include "yql_opt_json_peephole_physical.h"
+
+#include <ydb/library/yql/core/yql_atom_enums.h>
+#include <ydb/library/yql/core/yql_expr_type_annotation.h>
+
+namespace NYql {
+
+namespace {
+
+using namespace NNodes;
+
+TExprNode::TPtr BuildJsonParse(const TExprNode::TPtr& jsonExpr, TExprContext& ctx) {
+ auto jsonPos = jsonExpr->Pos();
+
+ auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
+ ctx.MakeType<TDataExprType>(EDataSlot::Json),
+ });
+
+ auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
+ argumentsType,
+ ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}),
+ ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{})
+ });
+
+ auto parse = Build<TCoUdf>(ctx, jsonPos)
+ .MethodName()
+ .Build("Json2.Parse")
+ .RunConfigValue<TCoVoid>()
+ .Build()
+ .UserType(ExpandType(jsonPos, *udfArgumentsType, ctx))
+ .Done().Ptr();
+
+ return Build<TCoApply>(ctx, jsonPos)
+ .Callable(parse)
+ .FreeArgs()
+ .Add(jsonExpr)
+ .Build()
+ .Done().Ptr();
+}
+
+TExprNode::TPtr GetJsonDocumentOrParseJson(const TExprNode::TPtr& jsonExpr, TExprContext& ctx, EDataSlot& argumentDataSlot) {
+ const TTypeAnnotationNode* type = jsonExpr->GetTypeAnn();
+ if (type->GetKind() == ETypeAnnotationKind::Optional) {
+ type = type->Cast<TOptionalExprType>()->GetItemType();
+ }
+ argumentDataSlot = type->Cast<TDataExprType>()->GetSlot();
+
+ // If jsonExpr has JsonDocument type, there is no need to parse it
+ if (argumentDataSlot == EDataSlot::JsonDocument) {
+ return jsonExpr;
+ }
+
+ // Otherwise jsonExpr has Json type and we need to wrap it in Json2::Parse
+ return BuildJsonParse(jsonExpr, ctx);
+}
+
+TExprNode::TPtr GetJsonDocumentOrParseJson(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx, EDataSlot& argumentDataSlot) {
+ return GetJsonDocumentOrParseJson(jsonExpr.Json().Ptr(), ctx, argumentDataSlot);
+}
+
+TExprNode::TPtr BuildJsonCompilePath(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx) {
+ auto jsonPathPos = jsonExpr.JsonPath().Pos();
+
+ auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
+ ctx.MakeType<TDataExprType>(EDataSlot::Utf8)
+ });
+
+ auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
+ argumentsType,
+ ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}),
+ ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{})
+ });
+
+ auto compilePath = Build<TCoUdf>(ctx, jsonPathPos)
+ .MethodName()
+ .Build("Json2.CompilePath")
+ .RunConfigValue<TCoVoid>()
+ .Build()
+ .UserType(ExpandType(jsonPathPos, *udfArgumentsType, ctx))
+ .Done().Ptr();
+
+ return Build<TCoApply>(ctx, jsonPathPos)
+ .Callable(compilePath)
+ .FreeArgs()
+ .Add(jsonExpr.JsonPath())
+ .Build()
+ .Done().Ptr();
+}
+
+TExprNode::TPtr BuildJsonSerialize(const TExprNode::TPtr& resourceExpr, TExprContext& ctx) {
+ auto resourcePos = resourceExpr->Pos();
+
+ auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
+ ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")),
+ });
+
+ auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
+ argumentsType,
+ ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}),
+ ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{})
+ });
+
+ auto parse = Build<TCoUdf>(ctx, resourcePos)
+ .MethodName()
+ .Build("Json2.Serialize")
+ .RunConfigValue<TCoVoid>()
+ .Build()
+ .UserType(ExpandType(resourcePos, *udfArgumentsType, ctx))
+ .Done().Ptr();
+
+ return Build<TCoApply>(ctx, resourcePos)
+ .Callable(parse)
+ .FreeArgs()
+ .Add(resourceExpr)
+ .Build()
+ .Done().Ptr();
+}
+
+} // namespace
+
+
+TExprNode::TPtr ExpandJsonValue(const TExprNode::TPtr& node, TExprContext& ctx) {
+ /*
+ Here we rewrite expression
+ JSON_VALUE(
+ <json>, <jsonPath>
+ [PASSING <variableExpr1> AS <variableName1>, ...]
+ [RETURNING <resultType>]
+ [(NULL | DEFAULT <onEmptyExpr>) ON EMPTY]
+ [(NULL | DEFAULT <onErrorExpr>) ON ERROR]
+ )
+ Generated depends on the <resultType> specified in RETURNING section:
+ 1. No RETURNING section
+ Default returning type of JsonValue is Utf8 and it must convert
+ result of JsonPath expression into Utf8 string.
+ Json2::SqlValueConvertToUtf8 is used
+ 2. <resultType> is a numeric type (Int16, Uint16, Float, etc.)
+ Json2::SqlValueNumber is used with additional CAST to corresponding type
+ 3. <resultType> is a date type (Date, Datetime, Timestamp)
+ Json2::SqlValueInt64 is used with additional CAST to corresponding type
+ 4. <resultType> is Bool
+ Json2::SqlValueBool is used
+ 5. <resultType> is String
+ Json2::SqlValueUtf8 is used with additional CAST to String
+ 6. <resultType> is Utf8
+ Json2::SqlValueUtf8 is used
+ Returning type of all Json2::SqlValue* functions is Variant<Tuple<Uint8, String?>, <resultType>?>:
+ 1. If variant holds first type, either error happened or the result is empty.
+ If first tuple element is 0, result is empty.
+ If first tuple element is 1, error happened.
+ Second tuple element contains message that can be displayed to the user.
+ 2. If variant hold second type, execution was successful and it is a result.
+ We process result of Json2::SqlValue* function by using Visit callable with lambdas handling each type.
+ Note that in some cases we need to CAST result of Json2::SqlValue* and it can fail. So:
+ 1. If the result of Json2::SqlValue* is NULL, we return Nothing(<resultType>)
+ 2. Otherwise we check the result of SafeCast callable. If it is NULL, cast has failed and it is an error.
+ If it holds some value, we return it to the user.
+ If no CAST is needed, we just return the result of Json2::SqlValue*.
+ What is more, <onEmptyExpr> and <onErrorExpr> must be casted to <resultType> and this CAST can fail too.
+ ANSI SQL specification is unclear about what to do with this situation. If we failed to cast <onEmptyExpr> to
+ target type, we return <onErrorExpr>. If we failed to cast <onErrorExpr> to target type, we throw an exception.
+
+ I know all this sounds very clumsy and a lot of logic to handle in s-expressions. If you have a better idea
+ of a way to handle all this ***, please write to laplab@.
+ */
+ TCoJsonValue jsonValue(node);
+
+ // <json expr> or Json2::Parse(<json expr>)
+ EDataSlot jsonDataSlot;
+ TExprNode::TPtr jsonExpr = GetJsonDocumentOrParseJson(jsonValue, ctx, jsonDataSlot);
+
+ // Json2::CompilePath(<jsonPath>)
+ TExprNode::TPtr compilePathExpr = BuildJsonCompilePath(jsonValue, ctx);
+
+ // Json2::SqlValue...(<parsedJson>, <compiledJsonPath>)
+ TExprNode::TPtr sqlValueExpr;
+ const auto returnTypeAnn = node->GetTypeAnn()->Cast<TOptionalExprType>();
+ const auto unwrappedSlot = returnTypeAnn->GetItemType()->Cast<TDataExprType>()->GetSlot();
+ bool needCast = false;
+ const auto jsonValuePos = jsonValue.Pos();
+ {
+ TString sqlValueUdfName;
+ if (IsDataTypeNumeric(unwrappedSlot)) {
+ sqlValueUdfName = "SqlValueNumber";
+ needCast = true;
+ } else if (IsDataTypeDate(unwrappedSlot)) {
+ sqlValueUdfName = "SqlValueInt64";
+ needCast = true;
+ } else if (unwrappedSlot == EDataSlot::Utf8 || unwrappedSlot == EDataSlot::String) {
+ if (jsonValue.ReturningType()) {
+ sqlValueUdfName = "SqlValueUtf8";
+ } else {
+ sqlValueUdfName = "SqlValueConvertToUtf8";
+ }
+ needCast = unwrappedSlot == EDataSlot::String;
+ } else if (unwrappedSlot == EDataSlot::Bool) {
+ sqlValueUdfName = "SqlValueBool";
+ } else {
+ YQL_ENSURE(false, "Unsupported type");
+ }
+
+ const TTypeAnnotationNode* inputType = nullptr;
+ if (jsonDataSlot == EDataSlot::JsonDocument) {
+ inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(EDataSlot::JsonDocument));
+ sqlValueUdfName = "JsonDocument" + sqlValueUdfName;
+ } else {
+ inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode"));
+ }
+ sqlValueUdfName = "Json2." + sqlValueUdfName;
+
+ TTypeAnnotationNode::TListType arguments = {
+ inputType,
+ ctx.MakeType<TResourceExprType>("JsonPath")
+ };
+
+ auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
+ ctx.MakeType<TTupleExprType>(arguments),
+ ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}),
+ ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{})
+ });
+
+ auto sqlValue = Build<TCoUdf>(ctx, jsonValuePos)
+ .MethodName()
+ .Build(sqlValueUdfName)
+ .RunConfigValue<TCoVoid>()
+ .Build()
+ .UserType(ExpandType(jsonValuePos, *udfArgumentsType, ctx))
+ .Done().Ptr();
+
+ sqlValueExpr = Build<TCoApply>(ctx, jsonValuePos)
+ .Callable(sqlValue)
+ .FreeArgs()
+ .Add(jsonExpr)
+ .Add(compilePathExpr)
+ .Add(jsonValue.Variables())
+ .Build()
+ .Done().Ptr();
+ }
+
+ auto makeCastOrValue = [&](TPositionHandle pos, const TExprNode::TPtr& source, const TExprNode::TPtr& onCastFail) {
+ /*
+ if Exists($source)
+ then
+ return IfPresent(
+ CAST($source as <resultType>),
+ ($x) -> { return Just($x); },
+ $onCastFail
+ )
+ else
+ return Nothing(<resultType>)
+ */
+ TExprNode::TPtr returnTypeNode = ExpandType(pos, *returnTypeAnn, ctx);
+ return Build<TCoIf>(ctx, pos)
+ .Predicate<TCoExists>()
+ .Optional(source)
+ .Build()
+ .ThenValue<TCoIfPresent>()
+ .Optional<TCoSafeCast>()
+ .Value(source)
+ .Type(returnTypeNode)
+ .Build()
+ .PresentHandler<TCoLambda>()
+ .Args({"unwrappedValue"})
+ .Body<TCoJust>()
+ .Input("unwrappedValue")
+ .Build()
+ .Build()
+ .MissingValue(onCastFail)
+ .Build()
+ .ElseValue<TCoNothing>()
+ .OptionalType(returnTypeNode)
+ .Build()
+ .Done().Ptr();
+ };
+
+ auto makeThrow = [&](TPositionHandle pos, const TExprNode::TPtr& message) {
+ return Build<TCoEnsure>(ctx, pos)
+ .Value<TCoNothing>()
+ .OptionalType(ExpandType(pos, *returnTypeAnn, ctx))
+ .Build()
+ .Predicate<TCoBool>()
+ .Literal()
+ .Build("false")
+ .Build()
+ .Message(message)
+ .Done().Ptr();
+ };
+
+ auto makeHandler = [&](EJsonValueHandlerMode mode, const TExprNode::TPtr& node, const TExprNode::TPtr& errorMessage, const TExprNode::TPtr& onCastFail) -> TExprNode::TPtr {
+ const auto pos = node->Pos();
+ if (mode == EJsonValueHandlerMode::Error) {
+ return makeThrow(pos, errorMessage);
+ }
+
+ // Convert NULL to Nothing(<resultType>)
+ if (IsNull(*node)) {
+ return Build<TCoNothing>(ctx, pos)
+ .OptionalType(ExpandType(pos, *returnTypeAnn, ctx))
+ .Done().Ptr();
+ }
+
+ // If type is not Optional, wrap expression in Just call
+ TExprNode::TPtr result = node;
+ const auto typeAnn = node->GetTypeAnn();
+ if (typeAnn->GetKind() != ETypeAnnotationKind::Optional) {
+ result = Build<TCoJust>(ctx, pos)
+ .Input(result)
+ .Done().Ptr();
+ }
+
+ // Perform CAST to <resultType> or return onCastFail
+ return makeCastOrValue(pos, result, onCastFail);
+ };
+
+ const auto onEmptyMode = FromString<EJsonValueHandlerMode>(jsonValue.OnEmptyMode().Ref().Content());
+ const auto onErrorMode = FromString<EJsonValueHandlerMode>(jsonValue.OnErrorMode().Ref().Content());
+ auto makeOnErrorHandler = [&](const TExprNode::TPtr& errorMessage) {
+ const auto onError = jsonValue.OnError();
+ const auto throwCastError = makeThrow(
+ onError.Pos(),
+ Build<TCoString>(ctx, onError.Pos())
+ .Literal()
+ .Build(TStringBuilder() << "Failed to cast default value from ON ERROR clause to target type " << FormatType(returnTypeAnn))
+ .Done().Ptr()
+ );
+
+ return makeHandler(onErrorMode, onError.Ptr(), errorMessage, throwCastError);
+ };
+ auto makeOnEmptyHandler = [&](const TExprNode::TPtr& errorMessage) {
+ const auto onEmptyDefaultCastError = Build<TCoString>(ctx, jsonValue.OnEmpty().Pos())
+ .Literal()
+ .Build(TStringBuilder() << "Failed to cast default value from ON EMPTY clause to target type " << FormatType(returnTypeAnn))
+ .Done().Ptr();
+ return makeHandler(onEmptyMode, jsonValue.OnEmpty().Ptr(), errorMessage, makeOnErrorHandler(onEmptyDefaultCastError));
+ };
+
+ /*
+ Lambda for handling first type of variant
+
+ ($errorTuple) -> {
+ if $errorTuple[0] == 0
+ then
+ return onEmptyHandler
+ else
+ return onErrorHandler
+ }
+ */
+ auto errorTupleArgument = ctx.NewArgument(jsonValuePos, "errorTuple");
+ auto sqlValueMessage = Build<TCoNth>(ctx, jsonValuePos)
+ .Tuple(errorTupleArgument)
+ .Index()
+ .Build("1")
+ .Done().Ptr();
+ const auto errorLambda = Build<TCoLambda>(ctx, jsonValuePos)
+ .Args(TExprNode::TListType{errorTupleArgument})
+ .Body<TCoIf>()
+ .Predicate<TCoCmpEqual>()
+ .Left<TCoNth>()
+ .Tuple(errorTupleArgument)
+ .Index()
+ .Build("0")
+ .Build()
+ .Right<TCoUint8>()
+ .Literal()
+ .Build("0")
+ .Build()
+ .Build()
+ .ThenValue(makeOnEmptyHandler(sqlValueMessage))
+ .ElseValue(makeOnErrorHandler(sqlValueMessage))
+ .Build()
+ .Done().Ptr();
+
+ // Lambda for handling second type of variant
+ TExprNode::TPtr sqlValueResultLambda;
+ if (needCast) {
+ const auto errorMessage = Build<TCoString>(ctx, jsonValuePos)
+ .Literal()
+ .Build(TStringBuilder() << "Failed to cast extracted JSON value to target type " << FormatType(returnTypeAnn))
+ .Done().Ptr();
+ const auto inputArgument = ctx.NewArgument(jsonValuePos, "sqlValueResult");
+ sqlValueResultLambda = Build<TCoLambda>(ctx, jsonValuePos)
+ .Args(TExprNode::TListType{inputArgument})
+ .Body(makeCastOrValue(jsonValuePos, inputArgument, makeOnErrorHandler(errorMessage)))
+ .Done().Ptr();
+ } else {
+ /*
+ ($sqlValueResult) -> {
+ return $sqlValueResult;
+ }
+ */
+ sqlValueResultLambda = Build<TCoLambda>(ctx, jsonValuePos)
+ .Args({"sqlValueResult"})
+ .Body("sqlValueResult")
+ .Done().Ptr();
+ }
+
+ // Visit call to get the result
+ const auto visitResult = Build<TCoVisit>(ctx, jsonValuePos)
+ .Input(sqlValueExpr)
+ .FreeArgs()
+ .Add<TCoAtom>()
+ .Build("0")
+ .Add(errorLambda)
+ .Add<TCoAtom>()
+ .Build("1")
+ .Add(sqlValueResultLambda)
+ .Build()
+ .Done().Ptr();
+
+ return visitResult;
+}
+
+TExprNode::TPtr ExpandJsonExists(const TExprNode::TPtr& node, TExprContext& ctx) {
+ /*
+ Here we rewrite expression
+ JSON_EXISTS(<json expr>, <jsonpath> [PASSING <variableExpr1> AS <variableName1>, ...] {TRUE | FALSE | UNKNOWN} ON ERROR)
+ into
+ Json2::SqlExists(Json2::Parse(<json expr>), Json2::CompilePath(<jsonpath>), <dict with variables>, <on error value>)
+ and its sibling
+ JSON_EXISTS(<json expr>, <jsonpath> [PASSING <variableExpr1> AS <variableName1>, ...] ERROR ON ERROR)
+ into
+ Json2::SqlTryExists(Json2::Parse(<json expr>), <dict with variables>, Json2::CompilePath(<jsonpath>))
+ */
+ TCoJsonExists jsonExists(node);
+
+ // <json expr> or Json2::Parse(<json expr>)
+ EDataSlot jsonDataSlot;
+ TExprNode::TPtr parseJsonExpr = GetJsonDocumentOrParseJson(jsonExists, ctx, jsonDataSlot);
+
+ // Json2::CompilePath(<jsonPath>)
+ TExprNode::TPtr compilePathExpr = BuildJsonCompilePath(jsonExists, ctx);
+
+ // Json2::SqlExists(<json>, <compiled jsonpath>, [<default value>])
+ // or
+ // Json2::SqlTryExists(<json>, <compiled jsonpath>)
+ const bool needThrow = !jsonExists.OnError().IsValid();
+
+ TString sqlExistsUdfName = "SqlExists";
+ if (needThrow) {
+ sqlExistsUdfName = "SqlTryExists";
+ }
+
+ const TTypeAnnotationNode* inputType = nullptr;
+ if (jsonDataSlot == EDataSlot::JsonDocument) {
+ inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(EDataSlot::JsonDocument));
+ sqlExistsUdfName = "JsonDocument" + sqlExistsUdfName;
+ } else {
+ inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode"));
+ }
+ sqlExistsUdfName = "Json2." + sqlExistsUdfName;
+
+ TTypeAnnotationNode::TListType arguments = {
+ inputType,
+ ctx.MakeType<TResourceExprType>("JsonPath")
+ };
+
+ if (!needThrow) {
+ const auto boolType = ctx.MakeType<TDataExprType>(EDataSlot::Bool);
+ const auto optionalBoolType = ctx.MakeType<TOptionalExprType>(boolType);
+ arguments.push_back(optionalBoolType);
+ }
+
+ auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
+ ctx.MakeType<TTupleExprType>(arguments),
+ ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}),
+ ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{})
+ });
+
+ const auto jsonExistsPos = jsonExists.Pos();
+ auto sqlExists = Build<TCoUdf>(ctx, jsonExistsPos)
+ .MethodName()
+ .Build(sqlExistsUdfName)
+ .RunConfigValue<TCoVoid>()
+ .Build()
+ .UserType(ExpandType(jsonExistsPos, *udfArgumentsType, ctx))
+ .Done().Ptr();
+
+ if (needThrow) {
+ return Build<TCoApply>(ctx, jsonExistsPos)
+ .Callable(sqlExists)
+ .FreeArgs()
+ .Add(parseJsonExpr)
+ .Add(compilePathExpr)
+ .Add(jsonExists.Variables())
+ .Build()
+ .Done().Ptr();
+ }
+
+ return Build<TCoApply>(ctx, jsonExistsPos)
+ .Callable(sqlExists)
+ .FreeArgs()
+ .Add(parseJsonExpr)
+ .Add(compilePathExpr)
+ .Add(jsonExists.Variables())
+ .Add(jsonExists.OnError().Cast())
+ .Build()
+ .Done().Ptr();
+}
+
+TExprNode::TPtr ExpandJsonQuery(const TExprNode::TPtr& node, TExprContext& ctx, TTypeAnnotationContext& typesCtx) {
+ /*
+ Here we rewrite expression
+ JSON_QUERY(
+ <json expr>,
+ <jsonpath>
+ [PASSING <variableExpr1> AS <variableName1>, ...]
+ [{WITHOUT [ARRAY] | WITH [CONDITIONAL | UNCONDITIONAL] [ARRAY]} WRAPPER]
+ [{ERROR | NULL | EMPTY ARRAY | EMPTY OBJECT} ON EMPTY]
+ [{ERROR | NULL | EMPTY ARRAY | EMPTY OBJECT} ON ERROR]
+ )
+ into something like
+ Json2::SqlQuery...(
+ Json2::Parse(<json expr>),
+ Json2::CompilePath(<jsonpath>),
+ <dict with variables>,
+ <do we have ERROR ON EMPTY?>,
+ <default value depending on {NULL | EMPTY ARRAY | EMPTY OBJECT} ON EMPTY>,
+ <do we have ERROR ON ERROR?>,
+ <default value depending on {NULL | EMPTY ARRAY | EMPTY OBJECT} ON ERROR>
+ )
+ Exact UDF name is choosen depending on wrap config:
+ - WITHOUT [ARRAY] WRAPPER -> Json2::SqlQuery
+ - WITH [UNCONDITIONAL] [ARRAY] WRAPPER -> Json2::SqlQueryWrap
+ - WITH CONDITIONAL [ARRAY] WRAPPER -> Json2::SqlQueryConditionalWrap
+ */
+ TCoJsonQuery jsonQuery(node);
+
+ // <json expr> or Json2::Parse(<json expr>)
+ EDataSlot jsonDataSlot;
+ TExprNode::TPtr parseJsonExpr = GetJsonDocumentOrParseJson(jsonQuery, ctx, jsonDataSlot);
+
+ // Json2::CompilePath(<jsonPath>)
+ TExprNode::TPtr compilePathExpr = BuildJsonCompilePath(jsonQuery, ctx);
+
+ // Json2::SqlQuery...(<json expr>, <jsonpath>, ...)
+ const auto wrapMode = FromString<EJsonQueryWrap>(jsonQuery.WrapMode().Ref().Content());
+ TString sqlQueryUdfName = "SqlQuery";
+ switch (wrapMode) {
+ case EJsonQueryWrap::NoWrap:
+ sqlQueryUdfName = "SqlQuery";
+ break;
+ case EJsonQueryWrap::Wrap:
+ sqlQueryUdfName = "SqlQueryWrap";
+ break;
+ case EJsonQueryWrap::ConditionalWrap:
+ sqlQueryUdfName = "SqlQueryConditionalWrap";
+ break;
+ }
+
+ const TTypeAnnotationNode* inputType = nullptr;
+ if (jsonDataSlot == EDataSlot::JsonDocument) {
+ inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(EDataSlot::JsonDocument));
+ sqlQueryUdfName = "JsonDocument" + sqlQueryUdfName;
+ } else {
+ inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode"));
+ }
+ inputType = ctx.MakeType<TOptionalExprType>(inputType);
+ sqlQueryUdfName = "Json2." + sqlQueryUdfName;
+
+ const auto optionalJsonResourceType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode"));
+ TTypeAnnotationNode::TListType arguments{
+ inputType,
+ ctx.MakeType<TResourceExprType>("JsonPath"),
+ ctx.MakeType<TDataExprType>(EDataSlot::Bool),
+ optionalJsonResourceType,
+ ctx.MakeType<TDataExprType>(EDataSlot::Bool),
+ optionalJsonResourceType,
+ };
+
+ auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
+ ctx.MakeType<TTupleExprType>(arguments),
+ ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}),
+ ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{})
+ });
+
+ auto buildShouldThrow = [&](EJsonQueryHandler handler, TPositionHandle pos) {
+ return Build<TCoBool>(ctx, pos)
+ .Literal()
+ .Build(handler == EJsonQueryHandler::Error ? "true" : "false")
+ .Done().Ptr();
+ };
+
+ auto buildHandler = [&](EJsonQueryHandler handler, TPositionHandle pos) {
+ switch (handler) {
+ case EJsonQueryHandler::Error:
+ case EJsonQueryHandler::Null: {
+ // Nothing(Resource<JsonNode>)
+ return Build<TCoNothing>(ctx, pos)
+ .OptionalType(ExpandType(pos, *optionalJsonResourceType, ctx))
+ .Done().Ptr();
+ }
+ case EJsonQueryHandler::EmptyArray: {
+ auto value = Build<TCoJson>(ctx, pos)
+ .Literal()
+ .Build("[]")
+ .Done().Ptr();
+ return BuildJsonParse(value, ctx);
+ }
+ case EJsonQueryHandler::EmptyObject: {
+ auto value = Build<TCoJson>(ctx, pos)
+ .Literal()
+ .Build("{}")
+ .Done().Ptr();
+ return BuildJsonParse(value, ctx);
+ }
+ }
+ };
+
+ const auto jsonQueryPos = jsonQuery.Pos();
+ auto sqlQuery = Build<TCoUdf>(ctx, jsonQueryPos)
+ .MethodName()
+ .Build(sqlQueryUdfName)
+ .RunConfigValue<TCoVoid>()
+ .Build()
+ .UserType(ExpandType(jsonQueryPos, *udfArgumentsType, ctx))
+ .Done().Ptr();
+
+ const auto onEmpty = FromString<EJsonQueryHandler>(jsonQuery.OnEmpty().Ref().Content());
+ const auto onError = FromString<EJsonQueryHandler>(jsonQuery.OnError().Ref().Content());
+ const auto onEmptyPos = jsonQuery.OnEmpty().Pos();
+ const auto onErrorPos = jsonQuery.OnError().Pos();
+
+ auto sqlQueryApply = Build<TCoApply>(ctx, jsonQueryPos)
+ .Callable(sqlQuery)
+ .FreeArgs()
+ .Add(parseJsonExpr)
+ .Add(compilePathExpr)
+ .Add(jsonQuery.Variables())
+ .Add(buildShouldThrow(onEmpty, onEmptyPos))
+ .Add(buildHandler(onEmpty, onEmptyPos))
+ .Add(buildShouldThrow(onError, onErrorPos))
+ .Add(buildHandler(onError, onErrorPos))
+ .Build()
+ .Done().Ptr();
+
+ // In this case we need to serialize Resource<JsonNode> to Json type
+ if (!typesCtx.JsonQueryReturnsJsonDocument) {
+ return BuildJsonSerialize(sqlQueryApply, ctx);
+ }
+
+ // Now we need to serialize Resource<JsonNode> from sqlQueryApply to JsonDocument
+ {
+ auto resourcePos = sqlQueryApply->Pos();
+
+ auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
+ ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")),
+ });
+
+ auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
+ argumentsType,
+ ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}),
+ ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{})
+ });
+
+ TStringBuf serializeUdfName = "Json2.Serialize";
+ if (typesCtx.JsonQueryReturnsJsonDocument) {
+ serializeUdfName = "Json2.SerializeToJsonDocument";
+ }
+ auto parse = Build<TCoUdf>(ctx, resourcePos)
+ .MethodName()
+ .Build(serializeUdfName)
+ .RunConfigValue<TCoVoid>()
+ .Build()
+ .UserType(ExpandType(resourcePos, *udfArgumentsType, ctx))
+ .Done().Ptr();
+
+ return Build<TCoApply>(ctx, resourcePos)
+ .Callable(parse)
+ .FreeArgs()
+ .Add(sqlQueryApply)
+ .Build()
+ .Done().Ptr();
+ }
+};
+
+} // namespace NYql \ No newline at end of file
diff --git a/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.h b/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.h
new file mode 100644
index 00000000000..82af2f2ecc6
--- /dev/null
+++ b/ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <ydb/library/yql/ast/yql_expr.h>
+#include <ydb/library/yql/core/expr_nodes/yql_expr_nodes.h>
+#include <ydb/library/yql/core/yql_type_annotation.h>
+
+namespace NYql {
+ TExprNode::TPtr ExpandJsonValue(const TExprNode::TPtr& node, TExprContext& ctx);
+ TExprNode::TPtr ExpandJsonExists(const TExprNode::TPtr& node, TExprContext& ctx);
+ TExprNode::TPtr ExpandJsonQuery(const TExprNode::TPtr& node, TExprContext& ctx, TTypeAnnotationContext& typesCtx);
+} // namespace NYql \ No newline at end of file
diff --git a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp
index c35056e47ca..d16c1d46ef6 100644
--- a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp
+++ b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp
@@ -1,5 +1,7 @@
#include "yql_opt_peephole_physical.h"
+#include <ydb/library/yql/core/peephole_opt/yql_opt_json_peephole_physical.h>
+#include <ydb/library/yql/core/yql_atom_enums.h>
#include <ydb/library/yql/core/yql_expr_optimize.h>
#include <ydb/library/yql/core/yql_expr_type_annotation.h>
#include <ydb/library/yql/core/yql_expr_constraint.h>
@@ -7158,6 +7160,8 @@ struct TPeepHoleRules {
{"CheckedDiv", &ExpandCheckedDiv},
{"CheckedMod", &ExpandCheckedMod},
{"CheckedMinus", &ExpandCheckedMinus},
+ {"JsonValue", &ExpandJsonValue},
+ {"JsonExists", &ExpandJsonExists}
};
static constexpr std::initializer_list<TExtPeepHoleOptimizerMap::value_type> CommonStageExtRulesInit = {
@@ -7168,6 +7172,7 @@ struct TPeepHoleRules {
{"AggregateMergeFinalize", &ExpandAggregatePeephole},
{"AggregateMergeManyFinalize", &ExpandAggregatePeephole},
{"AggregateFinalize", &ExpandAggregatePeephole},
+ {"JsonQuery", &ExpandJsonQuery},
};
static constexpr std::initializer_list<TPeepHoleOptimizerMap::value_type> SimplifyStageRulesInit = {
diff --git a/ydb/services/ydb/ut/CMakeLists.darwin-x86_64.txt b/ydb/services/ydb/ut/CMakeLists.darwin-x86_64.txt
index e97d638f7b4..b73b9d81828 100644
--- a/ydb/services/ydb/ut/CMakeLists.darwin-x86_64.txt
+++ b/ydb/services/ydb/ut/CMakeLists.darwin-x86_64.txt
@@ -64,8 +64,6 @@ target_sources(ydb-services-ydb-ut PRIVATE
${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_logstore_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_olapstore_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_monitoring_ut.cpp
- ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ut/json_udf.cpp
- ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ut/re2_udf.cpp
${CMAKE_SOURCE_DIR}/ydb/services/ydb/cert_gen.cpp
)
set_property(
diff --git a/ydb/services/ydb/ut/CMakeLists.linux-aarch64.txt b/ydb/services/ydb/ut/CMakeLists.linux-aarch64.txt
index d0b405d213e..017cd291baa 100644
--- a/ydb/services/ydb/ut/CMakeLists.linux-aarch64.txt
+++ b/ydb/services/ydb/ut/CMakeLists.linux-aarch64.txt
@@ -67,8 +67,6 @@ target_sources(ydb-services-ydb-ut PRIVATE
${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_logstore_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_olapstore_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_monitoring_ut.cpp
- ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ut/json_udf.cpp
- ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ut/re2_udf.cpp
${CMAKE_SOURCE_DIR}/ydb/services/ydb/cert_gen.cpp
)
set_property(
diff --git a/ydb/services/ydb/ut/CMakeLists.linux-x86_64.txt b/ydb/services/ydb/ut/CMakeLists.linux-x86_64.txt
index f452cdf9321..77c88076457 100644
--- a/ydb/services/ydb/ut/CMakeLists.linux-x86_64.txt
+++ b/ydb/services/ydb/ut/CMakeLists.linux-x86_64.txt
@@ -68,8 +68,6 @@ target_sources(ydb-services-ydb-ut PRIVATE
${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_logstore_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_olapstore_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_monitoring_ut.cpp
- ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ut/json_udf.cpp
- ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ut/re2_udf.cpp
${CMAKE_SOURCE_DIR}/ydb/services/ydb/cert_gen.cpp
)
set_property(
diff --git a/ydb/services/ydb/ut/CMakeLists.windows-x86_64.txt b/ydb/services/ydb/ut/CMakeLists.windows-x86_64.txt
index 05dffba80cc..64ee11bf384 100644
--- a/ydb/services/ydb/ut/CMakeLists.windows-x86_64.txt
+++ b/ydb/services/ydb/ut/CMakeLists.windows-x86_64.txt
@@ -57,8 +57,6 @@ target_sources(ydb-services-ydb-ut PRIVATE
${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_logstore_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_olapstore_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/services/ydb/ydb_monitoring_ut.cpp
- ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ut/json_udf.cpp
- ${CMAKE_SOURCE_DIR}/ydb/services/ydb/ut/re2_udf.cpp
${CMAKE_SOURCE_DIR}/ydb/services/ydb/cert_gen.cpp
)
set_property(
diff --git a/ydb/services/ydb/ut/re2_udf.cpp b/ydb/services/ydb/ut/re2_udf.cpp
deleted file mode 100644
index e7e4cecd3ad..00000000000
--- a/ydb/services/ydb/ut/re2_udf.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-// HACK: the TRe2Module class is in an anonymous namespace
-// so including the source cpp is the only way to access it
-#include <ydb/library/yql/udfs/common/re2/re2_udf.cpp>
-
-NYql::NUdf::TUniquePtr<NYql::NUdf::IUdfModule> CreateRe2Module() {
- return new TRe2Module<true>();
-}
diff --git a/ydb/services/ydb/ut/udfs.h b/ydb/services/ydb/ut/udfs.h
deleted file mode 100644
index 612ab37da10..00000000000
--- a/ydb/services/ydb/ut/udfs.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#pragma once
-#include <ydb/library/yql/public/udf/udf_registrator.h>
-
-NYql::NUdf::TUniquePtr<NYql::NUdf::IUdfModule> CreateRe2Module();
-NYql::NUdf::TUniquePtr<NYql::NUdf::IUdfModule> CreateJson2Module();
diff --git a/ydb/services/ydb/ydb_olapstore_ut.cpp b/ydb/services/ydb/ydb_olapstore_ut.cpp
index 2fdced9f4a0..402b150b52c 100644
--- a/ydb/services/ydb/ydb_olapstore_ut.cpp
+++ b/ydb/services/ydb/ydb_olapstore_ut.cpp
@@ -1,6 +1,5 @@
#include "ydb_common_ut.h"
-#include <ydb/services/ydb/ut/udfs.h>
#include <ydb/core/kqp/ut/common/kqp_ut_common.h>
#include <ydb/public/sdk/cpp/client/ydb_result/result.h>
@@ -44,14 +43,6 @@ static constexpr const char* testShardingVariants[] = {
Y_UNIT_TEST_SUITE(YdbOlapStore) {
- NMiniKQL::IFunctionRegistry* UdfFrFactory(const NKikimr::NScheme::TTypeRegistry& typeRegistry) {
- Y_UNUSED(typeRegistry);
- auto funcRegistry = NMiniKQL::CreateFunctionRegistry(NMiniKQL::CreateBuiltinRegistry())->Clone();
- funcRegistry->AddModule("fake_re2_path", "Re2", CreateRe2Module());
- funcRegistry->AddModule("fake_json2_path", "Json2", CreateJson2Module());
- return funcRegistry.Release();
- }
-
void EnableDebugLogs(TKikimrWithGrpcAndRootSchema& server) {
server.Server_->GetRuntime()->SetLogPriority(NKikimrServices::FLAT_TX_SCHEMESHARD, NActors::NLog::PRI_DEBUG);
server.Server_->GetRuntime()->SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_DEBUG);
@@ -483,7 +474,7 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) {
template<bool NotNull>
void TestQuery(const TString& query, const TString& sharding) {
NKikimrConfig::TAppConfig appConfig;
- TKikimrWithGrpcAndRootSchema server(appConfig, {}, {}, false, &UdfFrFactory);
+ TKikimrWithGrpcAndRootSchema server(appConfig, {}, {}, false, nullptr);
auto connection = ConnectToServer(server);