diff options
author | laplab <laplab@yandex-team.ru> | 2022-02-10 16:47:56 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:56 +0300 |
commit | f102186b7df1a2a26c35c81eeed5ae914484bdab (patch) | |
tree | a9c6a260744c49fe2a01ab27da07d8c3d3baa3a4 | |
parent | dbca3ecc91dd33d6527ec66a8b06dd6fcb2a3a3e (diff) | |
download | ydb-f102186b7df1a2a26c35c81eeed5ae914484bdab.tar.gz |
Restoring authorship annotation for <laplab@yandex-team.ru>. Commit 1 of 2.
188 files changed, 16957 insertions, 16957 deletions
diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3exception.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3exception.inl index 27f00bdda8..a9fe6ca9a8 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3exception.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3exception.inl @@ -313,12 +313,12 @@ void ANTLR_Exception<ImplTraits, Ex, StreamType>::displayRecognitionError( ANTLR // parse? // count = 0; - size = 0; - if (BaseType::m_expectingSet != NULL) { - std::unique_ptr<BitsetType> errBits(BaseType::m_expectingSet->bitsetLoad()); - numbits = errBits->numBits(); - size = errBits->size(); - } + size = 0; + if (BaseType::m_expectingSet != NULL) { + std::unique_ptr<BitsetType> errBits(BaseType::m_expectingSet->bitsetLoad()); + numbits = errBits->numBits(); + size = errBits->size(); + } if (size > 0) { diff --git a/library/cpp/yson/detail.h b/library/cpp/yson/detail.h index 27f5e8ffff..b585e5dcd8 100644 --- a/library/cpp/yson/detail.h +++ b/library/cpp/yson/detail.h @@ -380,9 +380,9 @@ namespace NYson { void CheckMemoryLimit() { if (MemoryLimit_ && Buffer_.capacity() > *MemoryLimit_) { - ythrow TYsonException() - << "Memory limit exceeded while parsing YSON stream: allocated " - << Buffer_.capacity() << ", limit " << (*MemoryLimit_); + ythrow TYsonException() + << "Memory limit exceeded while parsing YSON stream: allocated " + << Buffer_.capacity() << ", limit " << (*MemoryLimit_); } } @@ -411,7 +411,7 @@ namespace NYson { Buffer_.push_back(ch); result = ENumericResult::Uint64; } else if (isalpha(ch)) { - ythrow TYsonException() << "Unexpected '" << ch << "' in numeric literal"; + ythrow TYsonException() << "Unexpected '" << ch << "' in numeric literal"; } else { break; } @@ -531,7 +531,7 @@ namespace NYson { i32 length = ZigZagDecode32(ulength); if (length < 0) { - ythrow TYsonException() << "Negative binary string literal length " << length; + ythrow TYsonException() << "Negative binary string literal length " << length; } if (TBaseStream::Begin() + length <= TBaseStream::End()) { @@ -564,7 +564,7 @@ namespace NYson { static TStringBuf falseString = "false"; auto throwIncorrectBoolean = [&]() { - ythrow TYsonException() << "Incorrect boolean string " << TString(Buffer_.data(), Buffer_.size()); + ythrow TYsonException() << "Incorrect boolean string " << TString(Buffer_.data(), Buffer_.size()); }; Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>()); @@ -637,7 +637,7 @@ namespace NYson { void SkipCharToken(char symbol) { char ch = SkipSpaceAndGetChar(); if (ch != symbol) { - ythrow TYsonException() << "Expected '" << symbol << "' but found '" << ch << "'"; + ythrow TYsonException() << "Expected '" << symbol << "' but found '" << ch << "'"; } TBaseStream::Advance(1); diff --git a/library/cpp/yson/lexer_detail.h b/library/cpp/yson/lexer_detail.h index 0bba30acdd..de2546903e 100644 --- a/library/cpp/yson/lexer_detail.h +++ b/library/cpp/yson/lexer_detail.h @@ -196,7 +196,7 @@ namespace NYson { } } else { // None Y_ASSERT(state == EReadStartCase::None); - ythrow TYsonException() << "Unexpected " << ch1; + ythrow TYsonException() << "Unexpected " << ch1; } } else { // BinaryScalar = x01b TBase::Advance(1); @@ -243,19 +243,19 @@ namespace NYson { try { *token = TToken(FromString<double>(valueBuffer)); } catch (yexception&) { - ythrow TYsonException() << "Error parsing double literal " << valueBuffer; + ythrow TYsonException() << "Error parsing double literal " << valueBuffer; } } else if (numericResult == ENumericResult::Int64) { try { *token = TToken(FromString<i64>(valueBuffer)); } catch (yexception&) { - ythrow TYsonException() << "Error parsing int64 literal " << valueBuffer; + ythrow TYsonException() << "Error parsing int64 literal " << valueBuffer; } } else if (numericResult == ENumericResult::Uint64) { try { *token = TToken(FromString<ui64>(valueBuffer.SubStr(0, valueBuffer.size() - 1))); } catch (yexception&) { - ythrow TYsonException() << "Error parsing uint64 literal " << valueBuffer; + ythrow TYsonException() << "Error parsing uint64 literal " << valueBuffer; } } } diff --git a/library/cpp/yson/parser_detail.h b/library/cpp/yson/parser_detail.h index 44223caf12..ba44c92e7e 100644 --- a/library/cpp/yson/parser_detail.h +++ b/library/cpp/yson/parser_detail.h @@ -40,7 +40,7 @@ namespace NYson { while (!(TBase::IsFinished() && TBase::IsEmpty())) { if (TBase::template SkipSpaceAndGetChar<true>() != EndSymbol) { - ythrow TYsonException() << "Stray '" << (*TBase::Begin()) << "' found"; + ythrow TYsonException() << "Stray '" << (*TBase::Begin()) << "' found"; } else if (!TBase::IsEmpty()) { TBase::Advance(1); } @@ -163,7 +163,7 @@ namespace NYson { Consumer->OnDoubleScalar(TBase::template ReadNanOrInf<AllowFinish>()); } } else { - ythrow TYsonException() << "Unexpected '" << ch << "' while parsing node"; + ythrow TYsonException() << "Unexpected '" << ch << "' while parsing node"; } } } @@ -195,7 +195,7 @@ namespace NYson { TBase::ReadUnquotedString(&value); Consumer->OnKeyedItem(value); } else { - ythrow TYsonException() << "Unexpected '" << ch << "' while parsing key"; + ythrow TYsonException() << "Unexpected '" << ch << "' while parsing key"; } } } @@ -210,7 +210,7 @@ namespace NYson { if (ch == KeyValueSeparatorSymbol) { TBase::Advance(1); } else { - ythrow TYsonException() << "Expected '" << KeyValueSeparatorSymbol << "' but '" << ch << "' found"; + ythrow TYsonException() << "Expected '" << KeyValueSeparatorSymbol << "' but '" << ch << "' found"; } ParseNode<AllowFinish>(); ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); @@ -218,8 +218,8 @@ namespace NYson { TBase::Advance(1); ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); } else if (ch != endSymbol) { - ythrow TYsonException() << "Expected '" << KeyedItemSeparatorSymbol - << "' or '" << endSymbol << "' but '" << ch << "' found"; + ythrow TYsonException() << "Expected '" << KeyedItemSeparatorSymbol + << "' or '" << endSymbol << "' but '" << ch << "' found"; } } } @@ -246,8 +246,8 @@ namespace NYson { TBase::Advance(1); return true; } else if (ch != endSymbol) { - ythrow TYsonException() << "Expected '" << ListItemSeparatorSymbol - << "' or '" << endSymbol << "' but '" << ch << "' found"; + ythrow TYsonException() << "Expected '" << ListItemSeparatorSymbol + << "' or '" << endSymbol << "' but '" << ch << "' found"; } return false; } @@ -273,7 +273,7 @@ namespace NYson { value = FromString<double>(valueBuffer); } catch (yexception& e) { // This exception is wrapped in parser. - ythrow TYsonException() << "Failed to parse double literal '" << valueBuffer << "'" << e; + ythrow TYsonException() << "Failed to parse double literal '" << valueBuffer << "'" << e; } Consumer->OnDoubleScalar(value); } else if (numericResult == ENumericResult::Int64) { @@ -282,7 +282,7 @@ namespace NYson { value = FromString<i64>(valueBuffer); } catch (yexception& e) { // This exception is wrapped in parser. - ythrow TYsonException() << "Failed to parse int64 literal '" << valueBuffer << "'" << e; + ythrow TYsonException() << "Failed to parse int64 literal '" << valueBuffer << "'" << e; } Consumer->OnInt64Scalar(value); } else if (numericResult == ENumericResult::Uint64) { @@ -291,7 +291,7 @@ namespace NYson { value = FromString<ui64>(valueBuffer.SubStr(0, valueBuffer.size() - 1)); } catch (yexception& e) { // This exception is wrapped in parser. - ythrow TYsonException() << "Failed to parse uint64 literal '" << valueBuffer << "'" << e; + ythrow TYsonException() << "Failed to parse uint64 literal '" << valueBuffer << "'" << e; } Consumer->OnUint64Scalar(value); } diff --git a/library/cpp/yson/token.cpp b/library/cpp/yson/token.cpp index c8584c8c2e..7778ac887f 100644 --- a/library/cpp/yson/token.cpp +++ b/library/cpp/yson/token.cpp @@ -188,11 +188,11 @@ namespace NYson { void TToken::CheckType(ETokenType expectedType) const { if (Type_ != expectedType) { if (Type_ == ETokenType::EndOfStream) { - ythrow TYsonException() << "Unexpected end of stream (ExpectedType: " << TokenTypeToString(expectedType) << ")"; + ythrow TYsonException() << "Unexpected end of stream (ExpectedType: " << TokenTypeToString(expectedType) << ")"; } else { - ythrow TYsonException() << "Unexpected token (Token: '" << ToString(*this) - << "', Type: " << TokenTypeToString(Type_) - << ", ExpectedType: " << TokenTypeToString(expectedType) << ")"; + ythrow TYsonException() << "Unexpected token (Token: '" << ToString(*this) + << "', Type: " << TokenTypeToString(Type_) + << ", ExpectedType: " << TokenTypeToString(expectedType) << ")"; } } } diff --git a/ydb/core/client/server/msgbus_server_db.cpp b/ydb/core/client/server/msgbus_server_db.cpp index 08f3d71786..7fc77f68f0 100644 --- a/ydb/core/client/server/msgbus_server_db.cpp +++ b/ydb/core/client/server/msgbus_server_db.cpp @@ -370,10 +370,10 @@ public: return pgmBuilder.NewDataLiteral<NUdf::EDataSlot::Yson>(jsonValue.GetString()); case NScheme::NTypeIds::Json: return pgmBuilder.NewDataLiteral<NUdf::EDataSlot::Json>(jsonValue.GetString()); - case NScheme::NTypeIds::JsonDocument: - return pgmBuilder.NewDataLiteral<NUdf::EDataSlot::JsonDocument>(jsonValue.GetString()); - case NScheme::NTypeIds::DyNumber: - return pgmBuilder.NewDataLiteral<NUdf::EDataSlot::DyNumber>(jsonValue.GetString()); + case NScheme::NTypeIds::JsonDocument: + return pgmBuilder.NewDataLiteral<NUdf::EDataSlot::JsonDocument>(jsonValue.GetString()); + case NScheme::NTypeIds::DyNumber: + return pgmBuilder.NewDataLiteral<NUdf::EDataSlot::DyNumber>(jsonValue.GetString()); default: // still better than VERIFY return pgmBuilder.NewEmptyOptionalDataLiteral(typeId); diff --git a/ydb/core/engine/mkql_proto.cpp b/ydb/core/engine/mkql_proto.cpp index 3dd053e372..02ff37e7a7 100644 --- a/ydb/core/engine/mkql_proto.cpp +++ b/ydb/core/engine/mkql_proto.cpp @@ -51,10 +51,10 @@ namespace { return NUdf::TUnboxedValuePod(value.uint64_value()); case NUdf::TDataType<NUdf::TInterval>::Id: return NUdf::TUnboxedValuePod(value.int64_value()); - case NUdf::TDataType<NUdf::TJsonDocument>::Id: - return ValueFromString(NUdf::EDataSlot::JsonDocument, value.text_value()); - case NUdf::TDataType<NUdf::TDyNumber>::Id: - return ValueFromString(NUdf::EDataSlot::DyNumber, value.text_value()); + case NUdf::TDataType<NUdf::TJsonDocument>::Id: + return ValueFromString(NUdf::EDataSlot::JsonDocument, value.text_value()); + case NUdf::TDataType<NUdf::TDyNumber>::Id: + return ValueFromString(NUdf::EDataSlot::DyNumber, value.text_value()); default: return MakeString(value.bytes_value()); } @@ -302,12 +302,12 @@ bool CellsFromTuple(const NKikimrMiniKQL::TType* tupleType, c = TCell(v.GetText().data(), v.GetText().size()); break; } - case NScheme::NTypeIds::JsonDocument: - case NScheme::NTypeIds::DyNumber: - { - c = TCell(v.GetBytes().data(), v.GetBytes().size()); - break; - } + case NScheme::NTypeIds::JsonDocument: + case NScheme::NTypeIds::DyNumber: + { + c = TCell(v.GetBytes().data(), v.GetBytes().size()); + break; + } case NScheme::NTypeIds::String: { if (v.HasBytes()) { @@ -407,9 +407,9 @@ bool CellToValue(NScheme::TTypeId typeId, const TCell& c, NKikimrMiniKQL::TValue val.MutableOptional()->SetInt64(ReadUnaligned<i64>(c.Data())); break; - case NScheme::NTypeIds::JsonDocument: + case NScheme::NTypeIds::JsonDocument: case NScheme::NTypeIds::String: - case NScheme::NTypeIds::DyNumber: + case NScheme::NTypeIds::DyNumber: val.MutableOptional()->SetBytes(c.Data(), c.Size()); break; diff --git a/ydb/core/grpc_services/rpc_load_rows.cpp b/ydb/core/grpc_services/rpc_load_rows.cpp index 127347e067..8602525bf2 100644 --- a/ydb/core/grpc_services/rpc_load_rows.cpp +++ b/ydb/core/grpc_services/rpc_load_rows.cpp @@ -12,7 +12,7 @@ #include <ydb/library/binary_json/write.h> #include <ydb/library/dynumber/dynumber.h> - + #include <util/string/vector.h> #include <util/generic/size_literals.h> @@ -225,26 +225,26 @@ private: c = TCell(v.data(), v.size()); break; } - case NScheme::NTypeIds::JsonDocument : { - const auto binaryJson = NBinaryJson::SerializeToBinaryJson(val.Gettext_value()); - if (!binaryJson.Defined()) { - err = "Invalid JSON for JsonDocument provided"; - return false; - } - const auto binaryJsonInPool = valueDataPool.AppendString(TStringBuf(binaryJson->Data(), binaryJson->Size())); - c = TCell(binaryJsonInPool.data(), binaryJsonInPool.size()); - break; - } - case NScheme::NTypeIds::DyNumber : { - const auto dyNumber = NDyNumber::ParseDyNumberString(val.Gettext_value()); - if (!dyNumber.Defined()) { - err = "Invalid DyNumber string representation"; - return false; - } - const auto dyNumberInPool = valueDataPool.AppendString(TStringBuf(*dyNumber)); - c = TCell(dyNumberInPool.data(), dyNumberInPool.size()); - break; - } + case NScheme::NTypeIds::JsonDocument : { + const auto binaryJson = NBinaryJson::SerializeToBinaryJson(val.Gettext_value()); + if (!binaryJson.Defined()) { + err = "Invalid JSON for JsonDocument provided"; + return false; + } + const auto binaryJsonInPool = valueDataPool.AppendString(TStringBuf(binaryJson->Data(), binaryJson->Size())); + c = TCell(binaryJsonInPool.data(), binaryJsonInPool.size()); + break; + } + case NScheme::NTypeIds::DyNumber : { + const auto dyNumber = NDyNumber::ParseDyNumberString(val.Gettext_value()); + if (!dyNumber.Defined()) { + err = "Invalid DyNumber string representation"; + return false; + } + const auto dyNumberInPool = valueDataPool.AppendString(TStringBuf(*dyNumber)); + c = TCell(dyNumberInPool.data(), dyNumberInPool.size()); + break; + } case NScheme::NTypeIds::Yson : case NScheme::NTypeIds::String : { TString v = val.Getbytes_value(); diff --git a/ydb/core/kqp/compile/kqp_compile.cpp b/ydb/core/kqp/compile/kqp_compile.cpp index c7d305ab46..4b82ccc813 100644 --- a/ydb/core/kqp/compile/kqp_compile.cpp +++ b/ydb/core/kqp/compile/kqp_compile.cpp @@ -196,7 +196,7 @@ void FillReadRanges(const TReader& read, const TKikimrTableMetadata& tableMeta, readProto.SetReverse(settings.Reverse); } -template <typename TEffectCallable, typename TEffectProto> +template <typename TEffectCallable, typename TEffectProto> void FillEffectRows(const TEffectCallable& callable, TEffectProto& proto, bool inplace) { if (auto maybeList = callable.Input().template Maybe<TCoIterator>().List()) { if (auto maybeParam = maybeList.Cast().template Maybe<TCoParameter>()) { @@ -491,15 +491,15 @@ private: FillTable(upsertRows.Table(), *tableOp.MutableTable()); FillColumns(upsertRows.Columns(), *tableMeta, tableOp, false); FillEffectRows(upsertRows, *tableOp.MutableUpsertRows(), settings.Inplace); - } else if (auto maybeDeleteRows = node.Maybe<TKqpDeleteRows>()) { - auto deleteRows = maybeDeleteRows.Cast(); - auto tableMeta = TablesData->ExistingTable(Cluster, deleteRows.Table().Path()).Metadata; - YQL_ENSURE(tableMeta); - - YQL_ENSURE(stageProto.GetIsEffectsStage()); - - auto& tableOp = *stageProto.AddTableOps(); - FillTable(deleteRows.Table(), *tableOp.MutableTable()); + } else if (auto maybeDeleteRows = node.Maybe<TKqpDeleteRows>()) { + auto deleteRows = maybeDeleteRows.Cast(); + auto tableMeta = TablesData->ExistingTable(Cluster, deleteRows.Table().Path()).Metadata; + YQL_ENSURE(tableMeta); + + YQL_ENSURE(stageProto.GetIsEffectsStage()); + + auto& tableOp = *stageProto.AddTableOps(); + FillTable(deleteRows.Table(), *tableOp.MutableTable()); FillEffectRows(deleteRows, *tableOp.MutableDeleteRows(), false); } else if (auto maybeWideReadTableRanges = node.Maybe<TKqpWideReadTableRanges>()) { auto readTableRanges = maybeWideReadTableRanges.Cast(); diff --git a/ydb/core/kqp/compile/kqp_mkql_compiler.cpp b/ydb/core/kqp/compile/kqp_mkql_compiler.cpp index 0413044698..533d62265d 100644 --- a/ydb/core/kqp/compile/kqp_mkql_compiler.cpp +++ b/ydb/core/kqp/compile/kqp_mkql_compiler.cpp @@ -320,21 +320,21 @@ TIntrusivePtr<IMkqlCallableCompiler> CreateKqlCompiler(const TKqlCompileContext& return result; }); - compiler->AddCallable(TKqpDeleteRows::CallableName(), - [&ctx](const TExprNode& node, TMkqlBuildContext& buildCtx) { - TKqpDeleteRows deleteRows(&node); - + compiler->AddCallable(TKqpDeleteRows::CallableName(), + [&ctx](const TExprNode& node, TMkqlBuildContext& buildCtx) { + TKqpDeleteRows deleteRows(&node); + const auto& tableMeta = ctx.GetTableMeta(deleteRows.Table()); auto rowsType = deleteRows.Input().Ref().GetTypeAnn()->Cast<TStreamExprType>(); ValidateColumnsType(rowsType, tableMeta); - const auto tableId = MakeTableId(deleteRows.Table()); - const auto rows = MkqlBuildExpr(deleteRows.Input().Ref(), buildCtx); - - return ctx.PgmBuilder().KqpDeleteRows(tableId, rows); - }); - + const auto tableId = MakeTableId(deleteRows.Table()); + const auto rows = MkqlBuildExpr(deleteRows.Input().Ref(), buildCtx); + + return ctx.PgmBuilder().KqpDeleteRows(tableId, rows); + }); + compiler->AddCallable(TKqpEffects::CallableName(), [&ctx](const TExprNode& node, TMkqlBuildContext& buildCtx) { std::vector<TRuntimeNode> args; diff --git a/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json b/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json index 2faa33d5e1..cd849ed4da 100644 --- a/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json +++ b/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json @@ -334,29 +334,29 @@ {"Index": 0, "Name": "Lambda", "Type": "TCoLambda"}, {"Index": 1, "Name": "ArgsType", "Type": "TExprBase"} ] - }, - { - "Name": "TKqlDeleteRowsBase", - "Base": "TKqlTableEffect", - "Match": {"Type": "CallableBase"}, - "Children": [ - {"Index": 1, "Name": "Input", "Type": "TExprBase"} - ] - }, - { - "Name": "TKqlDeleteRows", - "Base": "TKqlDeleteRowsBase", - "Match": {"Type": "Callable", "Name": "KqlDeleteRows"} - }, - { + }, + { + "Name": "TKqlDeleteRowsBase", + "Base": "TKqlTableEffect", + "Match": {"Type": "CallableBase"}, + "Children": [ + {"Index": 1, "Name": "Input", "Type": "TExprBase"} + ] + }, + { + "Name": "TKqlDeleteRows", + "Base": "TKqlDeleteRowsBase", + "Match": {"Type": "Callable", "Name": "KqlDeleteRows"} + }, + { "Name": "TKqlDeleteRowsIndex", "Base": "TKqlDeleteRowsBase", "Match": {"Type": "Callable", "Name": "KqlDeleteRowsIndex"} }, { - "Name": "TKqpDeleteRows", - "Base": "TKqlDeleteRowsBase", - "Match": {"Type": "Callable", "Name": "KqpDeleteRows"} + "Name": "TKqpDeleteRows", + "Base": "TKqlDeleteRowsBase", + "Match": {"Type": "Callable", "Name": "KqpDeleteRows"} }, { "Name": "TKqpOlapOperationBase", diff --git a/ydb/core/kqp/host/kqp_host.cpp b/ydb/core/kqp/host/kqp_host.cpp index 44af6d1168..ee8aa19d07 100644 --- a/ydb/core/kqp/host/kqp_host.cpp +++ b/ydb/core/kqp/host/kqp_host.cpp @@ -1052,7 +1052,7 @@ public: TypesCtx->RandomProvider = TAppData::RandomProvider; TypesCtx->Modules = ModuleResolver; TypesCtx->UserDataStorage = MakeIntrusive<TUserDataStorage>(nullptr, TUserDataTable(), nullptr, nullptr); - TypesCtx->JsonQueryReturnsJsonDocument = true; + TypesCtx->JsonQueryReturnsJsonDocument = true; // Result provider auto writerFactory = [] () { return MakeIntrusive<TKqpResultWriter>(); }; diff --git a/ydb/core/kqp/opt/kqp_opt_effects.cpp b/ydb/core/kqp/opt/kqp_opt_effects.cpp index f5f2401ce9..5bdc896ed7 100644 --- a/ydb/core/kqp/opt/kqp_opt_effects.cpp +++ b/ydb/core/kqp/opt/kqp_opt_effects.cpp @@ -147,37 +147,37 @@ bool BuildUpsertRowsEffect(const TKqlUpsertRows& node, TExprContext& ctx, const return true; } -bool BuildDeleteRowsEffect(const TKqlDeleteRows& node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, - const TCoArgument& inputArg, TMaybeNode<TExprBase>& stageInput, TMaybeNode<TExprBase>& effect) -{ - if (IsDqPureExpr(node.Input())) { +bool BuildDeleteRowsEffect(const TKqlDeleteRows& node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, + const TCoArgument& inputArg, TMaybeNode<TExprBase>& stageInput, TMaybeNode<TExprBase>& effect) +{ + if (IsDqPureExpr(node.Input())) { stageInput = BuildPrecomputeStage(node.Input(), ctx); - effect = Build<TKqpDeleteRows>(ctx, node.Pos()) - .Table(node.Table()) + effect = Build<TKqpDeleteRows>(ctx, node.Pos()) + .Table(node.Table()) .Input<TCoIterator>() .List(inputArg) .Build() - .Done(); - return true; - } - - if (!EnsureDqUnion(node.Input(), ctx)) { - return false; - } - - auto& table = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, node.Table().Path()); - - auto dqUnion = node.Input().Cast<TDqCnUnionAll>(); - auto input = dqUnion.Output().Stage().Program().Body(); - + .Done(); + return true; + } + + if (!EnsureDqUnion(node.Input(), ctx)) { + return false; + } + + auto& table = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, node.Table().Path()); + + auto dqUnion = node.Input().Cast<TDqCnUnionAll>(); + auto input = dqUnion.Output().Stage().Program().Body(); + if (InplaceUpdateEnabled(*kqpCtx.Config) && IsMapWrite(table, input)) { - stageInput = Build<TKqpCnMapShard>(ctx, node.Pos()) - .Output() - .Stage(dqUnion.Output().Stage()) - .Index(dqUnion.Output().Index()) - .Build() - .Done(); + stageInput = Build<TKqpCnMapShard>(ctx, node.Pos()) + .Output() + .Stage(dqUnion.Output().Stage()) + .Index(dqUnion.Output().Index()) + .Build() + .Done(); effect = Build<TKqpDeleteRows>(ctx, node.Pos()) .Table(node.Table()) @@ -185,10 +185,10 @@ bool BuildDeleteRowsEffect(const TKqlDeleteRows& node, TExprContext& ctx, const .Input(inputArg) .Build() .Done(); - } else { + } else { stageInput = Build<TDqPhyPrecompute>(ctx, node.Pos()) - .Connection(dqUnion) - .Done(); + .Connection(dqUnion) + .Done(); effect = Build<TKqpDeleteRows>(ctx, node.Pos()) .Table(node.Table()) @@ -196,11 +196,11 @@ bool BuildDeleteRowsEffect(const TKqlDeleteRows& node, TExprContext& ctx, const .List(inputArg) .Build() .Done(); - } - - return true; -} - + } + + return true; +} + bool BuildEffects(TPositionHandle pos, const TVector<TKqlTableEffect>& effects, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, TVector<TExprBase>& builtEffects) { @@ -223,12 +223,12 @@ bool BuildEffects(TPositionHandle pos, const TVector<TKqlTableEffect>& effects, } } - if (auto maybeDeleteRows = effect.Maybe<TKqlDeleteRows>()) { - if (!BuildDeleteRowsEffect(maybeDeleteRows.Cast(), ctx, kqpCtx, inputArg, input, newEffect)) { - return false; - } - } - + if (auto maybeDeleteRows = effect.Maybe<TKqlDeleteRows>()) { + if (!BuildDeleteRowsEffect(maybeDeleteRows.Cast(), ctx, kqpCtx, inputArg, input, newEffect)) { + return false; + } + } + YQL_ENSURE(newEffect); newEffects.push_back(newEffect.Cast()); diff --git a/ydb/core/kqp/opt/kqp_opt_kql.cpp b/ydb/core/kqp/opt/kqp_opt_kql.cpp index 1eeb95daee..5e216f848b 100644 --- a/ydb/core/kqp/opt/kqp_opt_kql.cpp +++ b/ydb/core/kqp/opt/kqp_opt_kql.cpp @@ -221,15 +221,15 @@ TExprBase BuildUpdateOnTableWithIndex(const TKiWriteTable& write, const TCoAtomL .Done(); } -TExprBase BuildDeleteTable(const TKiWriteTable& write, const TKikimrTableDescription& tableData, TExprContext& ctx) { - const auto keysToDelete = ProjectColumns(write.Input(), tableData.Metadata->KeyColumnNames, ctx); - - return Build<TKqlDeleteRows>(ctx, write.Pos()) - .Table(BuildTableMeta(tableData, write.Pos(), ctx)) - .Input(keysToDelete) - .Done(); -} - +TExprBase BuildDeleteTable(const TKiWriteTable& write, const TKikimrTableDescription& tableData, TExprContext& ctx) { + const auto keysToDelete = ProjectColumns(write.Input(), tableData.Metadata->KeyColumnNames, ctx); + + return Build<TKqlDeleteRows>(ctx, write.Pos()) + .Table(BuildTableMeta(tableData, write.Pos(), ctx)) + .Input(keysToDelete) + .Done(); +} + TExprBase BuildDeleteTableWithIndex(const TKiWriteTable& write, const TKikimrTableDescription& tableData, TExprContext& ctx) { const auto keysToDelete = ProjectColumns(write.Input(), tableData.Metadata->KeyColumnNames, ctx); @@ -244,38 +244,38 @@ TExprBase BuildRowsToDelete(const TKikimrTableDescription& tableData, bool withS { const auto tableMeta = BuildTableMeta(tableData, pos, ctx); const auto tableColumns = BuildColumnsList(tableData, pos, ctx, withSystemColumns); - + const auto allRows = Build<TKqlReadTable>(ctx, pos) - .Table(tableMeta) - .Range() - .From<TKqlKeyInc>() - .Build() - .To<TKqlKeyInc>() - .Build() - .Build() + .Table(tableMeta) + .Range() + .From<TKqlKeyInc>() + .Build() + .To<TKqlKeyInc>() + .Build() + .Build() .Columns(tableColumns) .Settings() .Build() - .Done(); - + .Done(); + return Build<TCoFilter>(ctx, pos) - .Input(allRows) + .Input(allRows) .Lambda(filter) - .Done(); + .Done(); } - + TExprBase BuildDeleteTable(const TKiDeleteTable& del, const TKikimrTableDescription& tableData, bool withSystemColumns, TExprContext& ctx) { auto rowsToDelete = BuildRowsToDelete(tableData, withSystemColumns, del.Filter(), del.Pos(), ctx); auto keysToDelete = ProjectColumns(rowsToDelete, tableData.Metadata->KeyColumnNames, ctx); - + return Build<TKqlDeleteRows>(ctx, del.Pos()) .Table(BuildTableMeta(tableData, del.Pos(), ctx)) - .Input(keysToDelete) - .Done(); -} - + .Input(keysToDelete) + .Done(); +} + TVector<TExprBase> BuildDeleteTableWithIndex(const TKiDeleteTable& del, const TKikimrTableDescription& tableData, bool withSystemColumns, TExprContext& ctx) { @@ -576,9 +576,9 @@ TExprBase WriteTableSimple(const TKiWriteTable& write, const TCoAtomList& inputC case TYdbOperation::UpdateOn: return BuildUpdateOnTable(write, inputColumns, tableData, ctx); case TYdbOperation::Delete: - return BuildDeleteTable(write, tableData, ctx); + return BuildDeleteTable(write, tableData, ctx); case TYdbOperation::DeleteOn: - return BuildDeleteTable(write, tableData, ctx); + return BuildDeleteTable(write, tableData, ctx); default: YQL_ENSURE(false, "Unsupported table operation: " << op << ", table: " << tableData.Metadata->Name); } @@ -636,7 +636,7 @@ TVector<TExprBase> HandleUpdateTable(const TKiUpdateTable& update, TExprContext& TVector<TExprBase> HandleDeleteTable(const TKiDeleteTable& del, TExprContext& ctx, const TKikimrTablesData& tablesData, bool withSystemColumns) { - auto& tableData = GetTableData(tablesData, del.DataSink().Cluster(), del.Table().Value()); + auto& tableData = GetTableData(tablesData, del.DataSink().Cluster(), del.Table().Value()); if (HasIndexesToWrite(tableData)) { return BuildDeleteTableWithIndex(del, tableData, withSystemColumns, ctx); } else { diff --git a/ydb/core/kqp/prepare/kqp_type_ann.cpp b/ydb/core/kqp/prepare/kqp_type_ann.cpp index ee51c7349c..d405e5fd2b 100644 --- a/ydb/core/kqp/prepare/kqp_type_ann.cpp +++ b/ydb/core/kqp/prepare/kqp_type_ann.cpp @@ -638,64 +638,64 @@ TStatus AnnotateUpdateRows(const TExprNode::TPtr& node, TExprContext& ctx, const } TStatus AnnotateDeleteRows(const TExprNode::TPtr& node, TExprContext& ctx, const TString& cluster, - const TKikimrTablesData& tablesData) -{ + const TKikimrTablesData& tablesData) +{ if (!EnsureArgsCount(*node, 2, ctx)) { - return TStatus::Error; - } - + return TStatus::Error; + } + auto table = ResolveTable(node->Child(TKqlDeleteRowsBase::idx_Table), ctx, cluster, tablesData); if (!table.second) { return TStatus::Error; } - const TTypeAnnotationNode* itemType = nullptr; - bool isStream = false; - + const TTypeAnnotationNode* itemType = nullptr; + bool isStream = false; + auto* input = node->Child(TKqlDeleteRowsBase::idx_Input); if (TKqpDeleteRows::Match(node.Get())) { if (!EnsureStreamType(*input, ctx)) { - return TStatus::Error; - } + return TStatus::Error; + } itemType = input->GetTypeAnn()->Cast<TStreamExprType>()->GetItemType(); - isStream = true; - } else { + isStream = true; + } else { YQL_ENSURE(TKqlDeleteRows::Match(node.Get()) || TKqlDeleteRowsIndex::Match(node.Get())); if (!EnsureListType(*input, ctx)) { - return TStatus::Error; - } + return TStatus::Error; + } itemType = input->GetTypeAnn()->Cast<TListExprType>()->GetItemType(); - isStream = false; - } - + isStream = false; + } + if (!EnsureStructType(input->Pos(), *itemType, ctx)) { - return TStatus::Error; - } - - auto rowType = itemType->Cast<TStructExprType>(); + return TStatus::Error; + } + + auto rowType = itemType->Cast<TStructExprType>(); for (auto& keyColumnName : table.second->Metadata->KeyColumnNames) { - if (!rowType->FindItem(keyColumnName)) { + if (!rowType->FindItem(keyColumnName)) { ctx.AddError(YqlIssue(ctx.GetPosition(node->Pos()), TIssuesIds::KIKIMR_PRECONDITION_FAILED, TStringBuilder() << "Missing key column in input type: " << keyColumnName)); - return TStatus::Error; - } - } - + return TStatus::Error; + } + } + if (rowType->GetItems().size() != table.second->Metadata->KeyColumnNames.size()) { ctx.AddError(TIssue(ctx.GetPosition(node->Pos()), "Input type contains non-key columns")); return TStatus::Error; - } - - auto effectType = MakeKqpEffectType(ctx); - if (isStream) { + } + + auto effectType = MakeKqpEffectType(ctx); + if (isStream) { node->SetTypeAnn(ctx.MakeType<TStreamExprType>(effectType)); - } else { + } else { node->SetTypeAnn(ctx.MakeType<TListExprType>(effectType)); - } - return TStatus::Ok; -} - + } + return TStatus::Ok; +} + TStatus AnnotateOlapFilter(const TExprNode::TPtr& node, TExprContext& ctx) { if (!EnsureArgsCount(*node, 2, ctx)) { return TStatus::Error; @@ -1092,8 +1092,8 @@ TAutoPtr<IGraphTransformer> CreateKqpTypeAnnotationTransformer(const TString& cl if (TKqpCnMapShard::Match(input.Get()) || TKqpCnShuffleShard::Match(input.Get())) { return AnnotateDqConnection(input, ctx); - } - + } + if (TKqpTxResultBinding::Match(input.Get())) { return AnnotateKqpTxResultBinding(input, ctx); } diff --git a/ydb/core/kqp/runtime/kqp_program_builder.cpp b/ydb/core/kqp/runtime/kqp_program_builder.cpp index b5169c19d8..3ce7ee90ec 100644 --- a/ydb/core/kqp/runtime/kqp_program_builder.cpp +++ b/ydb/core/kqp/runtime/kqp_program_builder.cpp @@ -222,16 +222,16 @@ TRuntimeNode TKqpProgramBuilder::KqpUpsertRows(const TTableId& tableId, const TR return TRuntimeNode(builder.Build(), false); } -TRuntimeNode TKqpProgramBuilder::KqpDeleteRows(const TTableId& tableId, const TRuntimeNode& rows) { - auto returnType = NewStreamType(NewResourceType(NYql::KqpEffectTag)); - - TCallableBuilder builder(Env, __func__, returnType); - builder.Add(BuildTableIdLiteral(tableId, *this)); - builder.Add(rows); - - return TRuntimeNode(builder.Build(), false); -} - +TRuntimeNode TKqpProgramBuilder::KqpDeleteRows(const TTableId& tableId, const TRuntimeNode& rows) { + auto returnType = NewStreamType(NewResourceType(NYql::KqpEffectTag)); + + TCallableBuilder builder(Env, __func__, returnType); + builder.Add(BuildTableIdLiteral(tableId, *this)); + builder.Add(rows); + + return TRuntimeNode(builder.Build(), false); +} + TRuntimeNode TKqpProgramBuilder::KqpEffects(const TArrayRef<const TRuntimeNode>& effects) { auto returnType = NewStreamType(NewResourceType(NYql::KqpEffectTag)); TCallableBuilder builder(Env, __func__, returnType); diff --git a/ydb/core/kqp/ut/kqp_newengine_ut.cpp b/ydb/core/kqp/ut/kqp_newengine_ut.cpp index 3737687d8d..6656827c40 100644 --- a/ydb/core/kqp/ut/kqp_newengine_ut.cpp +++ b/ydb/core/kqp/ut/kqp_newengine_ut.cpp @@ -1634,98 +1634,98 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { ])", FormatResultSetYson(result.GetResultSet(0))); } - Y_UNIT_TEST(Delete) { - TKikimrRunner kikimr; - auto db = kikimr.GetTableClient(); - auto session = db.CreateSession().GetValueSync().GetSession(); + Y_UNIT_TEST(Delete) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); - NYdb::NTable::TExecDataQuerySettings execSettings; - execSettings.CollectQueryStats(ECollectQueryStatsMode::Basic); - - auto result = session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; + NYdb::NTable::TExecDataQuerySettings execSettings; + execSettings.CollectQueryStats(ECollectQueryStatsMode::Basic); + auto result = session.ExecuteDataQuery(R"( + PRAGMA kikimr.UseNewEngine = "true"; + DELETE FROM [/Root/TwoShard] WHERE Value2 = -1; - )", TTxControl::BeginTx().CommitTx(), execSettings).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - - auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); + )", TTxControl::BeginTx().CommitTx(), execSettings).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 2); - + // Phase reading rows to delete - UNIT_ASSERT(stats.query_phases(0).duration_us() > 0); + UNIT_ASSERT(stats.query_phases(0).duration_us() > 0); UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TwoShard"); UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 6); - + // Phase deleting rows - UNIT_ASSERT(stats.query_phases(1).duration_us() > 0); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(1).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(1).table_access(0).name(), "/Root/TwoShard"); + UNIT_ASSERT(stats.query_phases(1).duration_us() > 0); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(1).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(1).table_access(0).name(), "/Root/TwoShard"); UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(1).table_access(0).deletes().rows(), 2); - - result = session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; - + + result = session.ExecuteDataQuery(R"( + PRAGMA kikimr.UseNewEngine = "true"; + SELECT * FROM [/Root/TwoShard] ORDER BY Key; - )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - - CompareYson(R"([ - [[2u];["Two"];[0]]; - [[3u];["Three"];[1]]; - [[4000000002u];["BigTwo"];[0]]; - [[4000000003u];["BigThree"];[1]] - ])", FormatResultSetYson(result.GetResultSet(0))); - } - - Y_UNIT_TEST(DeleteOn) { - TKikimrRunner kikimr; - auto db = kikimr.GetTableClient(); - auto session = db.CreateSession().GetValueSync().GetSession(); - - NYdb::NTable::TExecDataQuerySettings execSettings; - execSettings.CollectQueryStats(ECollectQueryStatsMode::Basic); - - auto result = session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; - + )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + CompareYson(R"([ + [[2u];["Two"];[0]]; + [[3u];["Three"];[1]]; + [[4000000002u];["BigTwo"];[0]]; + [[4000000003u];["BigThree"];[1]] + ])", FormatResultSetYson(result.GetResultSet(0))); + } + + Y_UNIT_TEST(DeleteOn) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + NYdb::NTable::TExecDataQuerySettings execSettings; + execSettings.CollectQueryStats(ECollectQueryStatsMode::Basic); + + auto result = session.ExecuteDataQuery(R"( + PRAGMA kikimr.UseNewEngine = "true"; + DELETE FROM [/Root/TwoShard] ON SELECT * FROM [/Root/TwoShard] WHERE Value2 = 1; - )", TTxControl::BeginTx().CommitTx(), execSettings).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - - auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); + )", TTxControl::BeginTx().CommitTx(), execSettings).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 2); - + // Phase reading rows to delete - UNIT_ASSERT(stats.query_phases(0).duration_us() > 0); + UNIT_ASSERT(stats.query_phases(0).duration_us() > 0); UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TwoShard"); UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 6); - + // Phase deleting rows - UNIT_ASSERT(stats.query_phases(1).duration_us() > 0); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(1).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(1).table_access(0).name(), "/Root/TwoShard"); + UNIT_ASSERT(stats.query_phases(1).duration_us() > 0); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(1).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(1).table_access(0).name(), "/Root/TwoShard"); UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(1).table_access(0).deletes().rows(), 2); - - result = session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; - + + result = session.ExecuteDataQuery(R"( + PRAGMA kikimr.UseNewEngine = "true"; + SELECT * FROM [/Root/TwoShard] ORDER BY Key; - )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - - CompareYson(R"([ - [[1u];["One"];[-1]]; - [[2u];["Two"];[0]]; - [[4000000001u];["BigOne"];[-1]]; - [[4000000002u];["BigTwo"];[0]]; - ])", FormatResultSetYson(result.GetResultSet(0))); - } - + )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + CompareYson(R"([ + [[1u];["One"];[-1]]; + [[2u];["Two"];[0]]; + [[4000000001u];["BigOne"];[-1]]; + [[4000000002u];["BigTwo"];[0]]; + ])", FormatResultSetYson(result.GetResultSet(0))); + } + Y_UNIT_TEST(MultiEffects) { TKikimrRunner kikimr; auto db = kikimr.GetTableClient(); diff --git a/ydb/core/persqueue/type_codecs_defs.cpp b/ydb/core/persqueue/type_codecs_defs.cpp index f333be5cfc..7b8ca7872f 100644 --- a/ydb/core/persqueue/type_codecs_defs.cpp +++ b/ydb/core/persqueue/type_codecs_defs.cpp @@ -112,8 +112,8 @@ TTypeCodecs::TTypeCodecs(TTypeId typeId) { case NTypeIds::String2m: case NTypeIds::Utf8: case NTypeIds::Json: - case NTypeIds::JsonDocument: - case NTypeIds::DyNumber: + case NTypeIds::JsonDocument: + case NTypeIds::DyNumber: InitDefaults(this, TCodecType::VarLen); break; diff --git a/ydb/core/protos/kqp_physical.proto b/ydb/core/protos/kqp_physical.proto index 44d8f58716..593a3f93d7 100644 --- a/ydb/core/protos/kqp_physical.proto +++ b/ydb/core/protos/kqp_physical.proto @@ -118,10 +118,10 @@ message TKqpPhyOpUpsertRows { TKqpPhyValue RowsValue = 1; } -message TKqpPhyOpDeleteRows { - TKqpPhyValue RowsValue = 1; -} - +message TKqpPhyOpDeleteRows { + TKqpPhyValue RowsValue = 1; +} + message TKqpPhyOpLookup { TKqpPhyValue KeysValue = 1; } @@ -164,7 +164,7 @@ message TKqpPhyTableOperation { oneof Type { TKqpPhyOpReadRange ReadRange = 3; TKqpPhyOpUpsertRows UpsertRows = 4; - TKqpPhyOpDeleteRows DeleteRows = 5; + TKqpPhyOpDeleteRows DeleteRows = 5; TKqpPhyOpLookup Lookup = 6; TKqpPhyOpReadOlapRanges ReadOlapRange = 7; TKqpPhyOpReadRanges ReadRanges = 8; diff --git a/ydb/core/scheme/scheme_tablecell.h b/ydb/core/scheme/scheme_tablecell.h index bccfdca97d..e6d9dae46a 100644 --- a/ydb/core/scheme/scheme_tablecell.h +++ b/ydb/core/scheme/scheme_tablecell.h @@ -163,9 +163,9 @@ inline int CompareTypedCells(const TCell& a, const TCell& b, NScheme::TTypeIdOrd case NKikimr::NScheme::NTypeIds::Utf8: case NKikimr::NScheme::NTypeIds::Json: case NKikimr::NScheme::NTypeIds::Yson: - // XXX: using memcmp is meaningless for both JsonDocument and Json - case NKikimr::NScheme::NTypeIds::JsonDocument: - case NKikimr::NScheme::NTypeIds::DyNumber: + // XXX: using memcmp is meaningless for both JsonDocument and Json + case NKikimr::NScheme::NTypeIds::JsonDocument: + case NKikimr::NScheme::NTypeIds::DyNumber: { const char* pa = (const char*)a.Data(); const char* pb = (const char*)b.Data(); @@ -268,8 +268,8 @@ inline ui64 GetValueHash(NScheme::TTypeId type, const TCell& cell) { case NYql::NProto::TypeIds::Yson: case NYql::NProto::TypeIds::Json: case NYql::NProto::TypeIds::Decimal: - case NYql::NProto::TypeIds::JsonDocument: - case NYql::NProto::TypeIds::DyNumber: + case NYql::NProto::TypeIds::JsonDocument: + case NYql::NProto::TypeIds::DyNumber: return ComputeHash(TStringBuf{cell.Data(), cell.Size()}); default: diff --git a/ydb/core/scheme/scheme_tablecell_ut.cpp b/ydb/core/scheme/scheme_tablecell_ut.cpp index 5bd4a27ab9..8de5848aa0 100644 --- a/ydb/core/scheme/scheme_tablecell_ut.cpp +++ b/ydb/core/scheme/scheme_tablecell_ut.cpp @@ -323,8 +323,8 @@ Y_UNIT_TEST_SUITE(Scheme) { case NScheme::NTypeIds::Utf8: case NScheme::NTypeIds::Yson: case NScheme::NTypeIds::Json: - case NScheme::NTypeIds::JsonDocument: - case NScheme::NTypeIds::DyNumber: + case NScheme::NTypeIds::JsonDocument: + case NScheme::NTypeIds::DyNumber: GetValueHash(typeId, TCell(charArr, 30)); CompareTypedCells(TCell(charArr, 30), TCell(charArr, 30), typeId); break; diff --git a/ydb/core/scheme_types/scheme_type_registry.cpp b/ydb/core/scheme_types/scheme_type_registry.cpp index d737254361..8ad962b874 100644 --- a/ydb/core/scheme_types/scheme_type_registry.cpp +++ b/ydb/core/scheme_types/scheme_type_registry.cpp @@ -29,13 +29,13 @@ TTypeRegistry::TTypeRegistry() RegisterType<TUtf8>(); RegisterType<TYson>(); RegisterType<TJson>(); - RegisterType<TJsonDocument>(); + RegisterType<TJsonDocument>(); RegisterType<TDecimal>(); RegisterType<TDate>(); RegisterType<TDatetime>(); RegisterType<TTimestamp>(); RegisterType<TInterval>(); - RegisterType<TDyNumber>(); + RegisterType<TDyNumber>(); } void TTypeRegistry::CalculateMetadataEtag() { diff --git a/ydb/core/scheme_types/scheme_type_traits.h b/ydb/core/scheme_types/scheme_type_traits.h index 7e07f95976..e8c25cfc22 100644 --- a/ydb/core/scheme_types/scheme_type_traits.h +++ b/ydb/core/scheme_types/scheme_type_traits.h @@ -48,8 +48,8 @@ constexpr bool IsStringType(TTypeId id) noexcept { || NTypeIds::Utf8 == id || NTypeIds::Yson == id || NTypeIds::Json == id - || NTypeIds::JsonDocument == id - || NTypeIds::DyNumber + || NTypeIds::JsonDocument == id + || NTypeIds::DyNumber ; } diff --git a/ydb/core/scheme_types/scheme_types_defs.cpp b/ydb/core/scheme_types/scheme_types_defs.cpp index eeaba2e9af..d277716382 100644 --- a/ydb/core/scheme_types/scheme_types_defs.cpp +++ b/ydb/core/scheme_types/scheme_types_defs.cpp @@ -23,7 +23,7 @@ namespace NNames { DECLARE_TYPED_TYPE_NAME(Utf8); DECLARE_TYPED_TYPE_NAME(Yson); DECLARE_TYPED_TYPE_NAME(Json); - DECLARE_TYPED_TYPE_NAME(JsonDocument); + DECLARE_TYPED_TYPE_NAME(JsonDocument); DECLARE_TYPED_TYPE_NAME(Decimal); @@ -31,8 +31,8 @@ namespace NNames { DECLARE_TYPED_TYPE_NAME(Datetime); DECLARE_TYPED_TYPE_NAME(Timestamp); DECLARE_TYPED_TYPE_NAME(Interval); - - DECLARE_TYPED_TYPE_NAME(DyNumber); + + DECLARE_TYPED_TYPE_NAME(DyNumber); } void WriteEscapedValue(IOutputStream &out, const char *data, size_t size) { diff --git a/ydb/core/scheme_types/scheme_types_defs.h b/ydb/core/scheme_types/scheme_types_defs.h index bb06b09702..7d4b9f2a50 100644 --- a/ydb/core/scheme_types/scheme_types_defs.h +++ b/ydb/core/scheme_types/scheme_types_defs.h @@ -160,8 +160,8 @@ namespace NNames { extern const char Utf8[5]; extern const char Yson[5]; extern const char Json[5]; - extern const char JsonDocument[13]; - extern const char DyNumber[9]; + extern const char JsonDocument[13]; + extern const char DyNumber[9]; } void WriteEscapedValue(IOutputStream &out, const char *data, size_t size); @@ -180,14 +180,14 @@ class TJson : public TStringBase<TJson, NTypeIds::Json, NNames::Json> { public: }; -class TJsonDocument : public TStringBase<TJsonDocument, NTypeIds::JsonDocument, NNames::JsonDocument> { -public: -}; - -class TDyNumber : public TStringBase<TDyNumber, NTypeIds::DyNumber, NNames::DyNumber> { -public: -}; - +class TJsonDocument : public TStringBase<TJsonDocument, NTypeIds::JsonDocument, NNames::JsonDocument> { +public: +}; + +class TDyNumber : public TStringBase<TDyNumber, NTypeIds::DyNumber, NNames::DyNumber> { +public: +}; + template <ui32 TMaxSize, TTypeId TypeId, const char* Name> class TBoundedString : public TStringBase<TBoundedString<TMaxSize, TypeId, Name>, TypeId, Name> { public: @@ -248,13 +248,13 @@ class TInterval : public IIntegerTypeWithKeyString<i64, NTypeIds::Interval, NNam xx(Utf8, TUtf8, __VA_ARGS__) \ xx(Yson, TYson, __VA_ARGS__) \ xx(Json, TJson, __VA_ARGS__) \ - xx(JsonDocument, TJsonDocument, __VA_ARGS__) \ + xx(JsonDocument, TJsonDocument, __VA_ARGS__) \ xx(Decimal, TDecimal, __VA_ARGS__) \ xx(Date, TDate, __VA_ARGS__) \ xx(Datetime, TDatetime, __VA_ARGS__) \ xx(Timestamp, TTimestamp, __VA_ARGS__) \ xx(Interval, TInterval, __VA_ARGS__) \ - xx(DyNumber, TDyNumber, __VA_ARGS__) \ + xx(DyNumber, TDyNumber, __VA_ARGS__) \ /**/ diff --git a/ydb/core/tablet_flat/flat_executor_db_mon.cpp b/ydb/core/tablet_flat/flat_executor_db_mon.cpp index d5353747be..26dc9697d8 100644 --- a/ydb/core/tablet_flat/flat_executor_db_mon.cpp +++ b/ydb/core/tablet_flat/flat_executor_db_mon.cpp @@ -1,8 +1,8 @@ #include "flat_executor.h" - + #include <ydb/library/binary_json/read.h> #include <ydb/library/dynumber/dynumber.h> - + #include <util/stream/hex.h> #include <util/string/escape.h> #include <library/cpp/html/pcdata/pcdata.h> @@ -211,16 +211,16 @@ public: case NScheme::NTypeIds::Json: str << EncodeHtmlPcdata(TStringBuf((const char*)data, size)); break; - case NScheme::NTypeIds::JsonDocument: { - const auto json = NBinaryJson::SerializeToJson(TStringBuf((const char*)data, size)); - str << "(JsonDocument) " << EncodeHtmlPcdata(json); - break; - } - case NScheme::NTypeIds::DyNumber: { - const auto number = NDyNumber::DyNumberToString(TStringBuf((const char*)data, size)); - str << "(DyNumber) " << number; - break; - } + case NScheme::NTypeIds::JsonDocument: { + const auto json = NBinaryJson::SerializeToJson(TStringBuf((const char*)data, size)); + str << "(JsonDocument) " << EncodeHtmlPcdata(json); + break; + } + case NScheme::NTypeIds::DyNumber: { + const auto number = NDyNumber::DyNumberToString(TStringBuf((const char*)data, size)); + str << "(DyNumber) " << number; + break; + } default: str << "<i>unknown type " << tuple.Types[i] << "</i>"; break; diff --git a/ydb/core/tx/datashard/datashard_kqp_compute.cpp b/ydb/core/tx/datashard/datashard_kqp_compute.cpp index edd6ca8e19..248e107628 100644 --- a/ydb/core/tx/datashard/datashard_kqp_compute.cpp +++ b/ydb/core/tx/datashard/datashard_kqp_compute.cpp @@ -31,7 +31,7 @@ struct TKqpDatashardComputationMap { Map["KqpWideReadTableRanges"] = &WrapKqpWideReadTableRanges; Map["KqpLookupTable"] = &WrapKqpLookupTable; Map["KqpUpsertRows"] = &WrapKqpUpsertRows; - Map["KqpDeleteRows"] = &WrapKqpDeleteRows; + Map["KqpDeleteRows"] = &WrapKqpDeleteRows; Map["KqpEffects"] = &WrapKqpEffects; } diff --git a/ydb/core/tx/datashard/datashard_kqp_compute.h b/ydb/core/tx/datashard/datashard_kqp_compute.h index 4bbb8fa0de..5b3c64407c 100644 --- a/ydb/core/tx/datashard/datashard_kqp_compute.h +++ b/ydb/core/tx/datashard/datashard_kqp_compute.h @@ -98,8 +98,8 @@ IComputationNode* WrapKqpLookupTable(TCallable& callable, const TComputationNode TKqpDatashardComputeContext& computeCtx); IComputationNode* WrapKqpUpsertRows(TCallable& callable, const TComputationNodeFactoryContext& ctx, TKqpDatashardComputeContext& computeCtx); -IComputationNode* WrapKqpDeleteRows(TCallable& callable, const TComputationNodeFactoryContext& ctx, - TKqpDatashardComputeContext& computeCtx); +IComputationNode* WrapKqpDeleteRows(TCallable& callable, const TComputationNodeFactoryContext& ctx, + TKqpDatashardComputeContext& computeCtx); IComputationNode* WrapKqpEffects(TCallable& callable, const TComputationNodeFactoryContext& ctx, TKqpDatashardComputeContext& computeCtx); IComputationNode* WrapKqpWideReadTable(TCallable& callable, const TComputationNodeFactoryContext& ctx, diff --git a/ydb/core/tx/datashard/datashard_kqp_delete_rows.cpp b/ydb/core/tx/datashard/datashard_kqp_delete_rows.cpp index 6a6c3d8231..d2912b3025 100644 --- a/ydb/core/tx/datashard/datashard_kqp_delete_rows.cpp +++ b/ydb/core/tx/datashard/datashard_kqp_delete_rows.cpp @@ -1,53 +1,53 @@ -#include "datashard_kqp_compute.h" - +#include "datashard_kqp_compute.h" + #include <ydb/core/engine/mkql_keys.h> #include <ydb/core/engine/mkql_engine_flat_host.h> #include <ydb/core/kqp/runtime/kqp_runtime_impl.h> - + #include <ydb/library/yql/minikql/computation/mkql_computation_node_holders.h> #include <ydb/library/yql/minikql/computation/mkql_computation_node_impl.h> #include <ydb/library/yql/minikql/mkql_node.h> #include <ydb/library/yql/minikql/mkql_node_cast.h> - -#include <util/generic/cast.h> - -namespace NKikimr { -namespace NMiniKQL { - -using namespace NTable; -using namespace NUdf; - -namespace { - -class TKqpDeleteRowsWrapper : public TMutableComputationNode<TKqpDeleteRowsWrapper> { - using TBase = TMutableComputationNode<TKqpDeleteRowsWrapper>; - -public: - class TRowResult : public TComputationValue<TRowResult> { - using TBase = TComputationValue<TRowResult>; - - public: - TRowResult(TMemoryUsageInfo* memInfo, const TKqpDeleteRowsWrapper& owner, - NUdf::TUnboxedValue&& row) - : TBase(memInfo) - , Owner(owner) - , Row(std::move(row)) {} - - private: - void Apply(NUdf::IApplyContext& applyContext) const override { - auto& engineCtx = *CheckedCast<TKqpDatashardApplyContext*>(&applyContext); - + +#include <util/generic/cast.h> + +namespace NKikimr { +namespace NMiniKQL { + +using namespace NTable; +using namespace NUdf; + +namespace { + +class TKqpDeleteRowsWrapper : public TMutableComputationNode<TKqpDeleteRowsWrapper> { + using TBase = TMutableComputationNode<TKqpDeleteRowsWrapper>; + +public: + class TRowResult : public TComputationValue<TRowResult> { + using TBase = TComputationValue<TRowResult>; + + public: + TRowResult(TMemoryUsageInfo* memInfo, const TKqpDeleteRowsWrapper& owner, + NUdf::TUnboxedValue&& row) + : TBase(memInfo) + , Owner(owner) + , Row(std::move(row)) {} + + private: + void Apply(NUdf::IApplyContext& applyContext) const override { + auto& engineCtx = *CheckedCast<TKqpDatashardApplyContext*>(&applyContext); + TVector<TCell> keyTuple(Owner.KeyIndices.size()); FillKeyTupleValue(Row, Owner.KeyIndices, Owner.RowTypes, keyTuple, Owner.Env); - if (engineCtx.Host->IsPathErased(Owner.TableId)) { - return; - } - + if (engineCtx.Host->IsPathErased(Owner.TableId)) { + return; + } + if (!engineCtx.Host->IsMyKey(Owner.TableId, keyTuple)) { - return; - } - + return; + } + ui64 nEraseRow = Owner.ShardTableStats.NEraseRow; engineCtx.Host->EraseRow(Owner.TableId, keyTuple); @@ -55,86 +55,86 @@ public: if (i64 delta = Owner.ShardTableStats.NEraseRow - nEraseRow; delta > 0) { Owner.TaskTableStats.NEraseRow += delta; } - }; - - private: - const TKqpDeleteRowsWrapper& Owner; - NUdf::TUnboxedValue Row; - }; - - class TRowsResult : public TComputationValue<TRowsResult> { - using TBase = TComputationValue<TRowsResult>; - - public: - TRowsResult(TMemoryUsageInfo* memInfo, const TKqpDeleteRowsWrapper& owner, - NUdf::TUnboxedValue&& rows) - : TBase(memInfo) - , Owner(owner) - , Rows(std::move(rows)) {} - - NUdf::EFetchStatus Fetch(NUdf::TUnboxedValue& result) final { - NUdf::TUnboxedValue row; - auto status = Rows.Fetch(row); - - if (status == NUdf::EFetchStatus::Ok) { - result = NUdf::TUnboxedValuePod(new TRowResult(GetMemInfo(), Owner, std::move(row))); - } - - return status; - } - - private: - const TKqpDeleteRowsWrapper& Owner; - NUdf::TUnboxedValue Rows; - }; - - NUdf::TUnboxedValuePod DoCalculate(TComputationContext& ctx) const { - return ctx.HolderFactory.Create<TRowsResult>(*this, RowsNode->GetValue(ctx)); - } - -public: + }; + + private: + const TKqpDeleteRowsWrapper& Owner; + NUdf::TUnboxedValue Row; + }; + + class TRowsResult : public TComputationValue<TRowsResult> { + using TBase = TComputationValue<TRowsResult>; + + public: + TRowsResult(TMemoryUsageInfo* memInfo, const TKqpDeleteRowsWrapper& owner, + NUdf::TUnboxedValue&& rows) + : TBase(memInfo) + , Owner(owner) + , Rows(std::move(rows)) {} + + NUdf::EFetchStatus Fetch(NUdf::TUnboxedValue& result) final { + NUdf::TUnboxedValue row; + auto status = Rows.Fetch(row); + + if (status == NUdf::EFetchStatus::Ok) { + result = NUdf::TUnboxedValuePod(new TRowResult(GetMemInfo(), Owner, std::move(row))); + } + + return status; + } + + private: + const TKqpDeleteRowsWrapper& Owner; + NUdf::TUnboxedValue Rows; + }; + + NUdf::TUnboxedValuePod DoCalculate(TComputationContext& ctx) const { + return ctx.HolderFactory.Create<TRowsResult>(*this, RowsNode->GetValue(ctx)); + } + +public: TKqpDeleteRowsWrapper(TComputationMutables& mutables, TKqpDatashardComputeContext& computeCtx, const TTableId& tableId, IComputationNode* rowsNode, TVector<NUdf::TDataTypeId> rowTypes, TVector<ui32> keyIndices, const TTypeEnvironment& env) - : TBase(mutables) - , TableId(tableId) - , RowsNode(rowsNode) + : TBase(mutables) + , TableId(tableId) + , RowsNode(rowsNode) , RowTypes(std::move(rowTypes)) , KeyIndices(std::move(keyIndices)) - , Env(env) + , Env(env) , ShardTableStats(computeCtx.GetDatashardCounters()) , TaskTableStats(computeCtx.GetTaskCounters(computeCtx.GetCurrentTaskId())) {} - -private: - void RegisterDependencies() const final { - DependsOn(RowsNode); - } - -private: - TTableId TableId; - IComputationNode* RowsNode; + +private: + void RegisterDependencies() const final { + DependsOn(RowsNode); + } + +private: + TTableId TableId; + IComputationNode* RowsNode; const TVector<NUdf::TDataTypeId> RowTypes; const TVector<ui32> KeyIndices; - const TTypeEnvironment& Env; + const TTypeEnvironment& Env; TKqpTableStats& ShardTableStats; TKqpTableStats& TaskTableStats; -}; - -} // namespace - -IComputationNode* WrapKqpDeleteRows(TCallable& callable, const TComputationNodeFactoryContext& ctx, - TKqpDatashardComputeContext& computeCtx) -{ - MKQL_ENSURE_S(callable.GetInputsCount() == 2); - - auto tableNode = callable.GetInput(0); - auto rowsNode = callable.GetInput(1); - - auto tableId = NKqp::ParseTableId(tableNode); - auto localTableId = computeCtx.GetLocalTableId(tableId); - MKQL_ENSURE_S(localTableId); +}; + +} // namespace + +IComputationNode* WrapKqpDeleteRows(TCallable& callable, const TComputationNodeFactoryContext& ctx, + TKqpDatashardComputeContext& computeCtx) +{ + MKQL_ENSURE_S(callable.GetInputsCount() == 2); + + auto tableNode = callable.GetInput(0); + auto rowsNode = callable.GetInput(1); + + auto tableId = NKqp::ParseTableId(tableNode); + auto localTableId = computeCtx.GetLocalTableId(tableId); + MKQL_ENSURE_S(localTableId); auto tableKeyTypes = computeCtx.GetKeyColumnsInfo(tableId); - - auto rowType = AS_TYPE(TStructType, AS_TYPE(TStreamType, rowsNode.GetStaticType())->GetItemType()); + + auto rowType = AS_TYPE(TStructType, AS_TYPE(TStreamType, rowsNode.GetStaticType())->GetItemType()); MKQL_ENSURE_S(tableKeyTypes.size() == rowType->GetMembersCount(), "Table key column count mismatch" << ", expected: " << tableKeyTypes.size() << ", actual: " << rowType->GetMembersCount()); @@ -152,7 +152,7 @@ IComputationNode* WrapKqpDeleteRows(TCallable& callable, const TComputationNodeF rowTypes[i] = typeId; } - + TVector<ui32> keyIndices(tableKeyTypes.size()); for (ui32 i = 0; i < tableKeyTypes.size(); i++) { auto it = inputIndex.find(tableKeyTypes[i].second); @@ -165,9 +165,9 @@ IComputationNode* WrapKqpDeleteRows(TCallable& callable, const TComputationNodeF keyIndices[i] = it->second; } - return new TKqpDeleteRowsWrapper(ctx.Mutables, computeCtx, tableId, + return new TKqpDeleteRowsWrapper(ctx.Mutables, computeCtx, tableId, LocateNode(ctx.NodeLocator, *rowsNode.GetNode()), std::move(rowTypes), std::move(keyIndices), ctx.Env); -} - -} // namespace NMiniKQL -} // namespace NKikimr +} + +} // namespace NMiniKQL +} // namespace NKikimr diff --git a/ydb/core/tx/datashard/datashard_kqp_upsert_rows.cpp b/ydb/core/tx/datashard/datashard_kqp_upsert_rows.cpp index a2dfa61bbf..68d88ee398 100644 --- a/ydb/core/tx/datashard/datashard_kqp_upsert_rows.cpp +++ b/ydb/core/tx/datashard/datashard_kqp_upsert_rows.cpp @@ -229,5 +229,5 @@ IComputationNode* WrapKqpUpsertRows(TCallable& callable, const TComputationNodeF std::move(upsertColumns), ctx.Env); } -} // namespace NMiniKQL +} // namespace NMiniKQL } // namespace NKikimr diff --git a/ydb/core/tx/datashard/read_table_scan.cpp b/ydb/core/tx/datashard/read_table_scan.cpp index 63e94fde96..d1560f578f 100644 --- a/ydb/core/tx/datashard/read_table_scan.cpp +++ b/ydb/core/tx/datashard/read_table_scan.cpp @@ -8,7 +8,7 @@ #include <ydb/library/binary_json/read.h> #include <ydb/library/dynumber/dynumber.h> - + //#include <library/cpp/actors/interconnect/interconnect.h> //#include <util/generic/cast.h> @@ -92,17 +92,17 @@ Y_FORCE_INLINE void AddCell(TOutValue& row, NScheme::TTypeId type, const TCell & case NUdf::TDataType<NUdf::TInterval>::Id: val.set_int64_value(cell.AsValue<i64>()); break; - case NUdf::TDataType<NUdf::TJsonDocument>::Id: { - const auto json = NBinaryJson::SerializeToJson(TStringBuf(cell.Data(), cell.Size())); - val.set_text_value(json); - break; - } - case NUdf::TDataType<NUdf::TDyNumber>::Id: { - const auto number = NDyNumber::DyNumberToString(TStringBuf(cell.Data(), cell.Size())); - Y_VERIFY(number.Defined(), "Invalid DyNumber binary representation"); - val.set_text_value(*number); - break; - } + case NUdf::TDataType<NUdf::TJsonDocument>::Id: { + const auto json = NBinaryJson::SerializeToJson(TStringBuf(cell.Data(), cell.Size())); + val.set_text_value(json); + break; + } + case NUdf::TDataType<NUdf::TDyNumber>::Id: { + const auto number = NDyNumber::DyNumberToString(TStringBuf(cell.Data(), cell.Size())); + Y_VERIFY(number.Defined(), "Invalid DyNumber binary representation"); + val.set_text_value(*number); + break; + } default: val.set_bytes_value(cell.Data(), cell.Size()); } diff --git a/ydb/core/tx/datashard/ya.make b/ydb/core/tx/datashard/ya.make index 9c98e8e79b..fbe5f38546 100644 --- a/ydb/core/tx/datashard/ya.make +++ b/ydb/core/tx/datashard/ya.make @@ -111,7 +111,7 @@ SRCS( datashard_kqp_lookup_table.cpp datashard_kqp_read_table.cpp datashard_kqp_upsert_rows.cpp - datashard_kqp_delete_rows.cpp + datashard_kqp_delete_rows.cpp datashard_kqp.cpp datashard_kqp.h datashard_repl_apply.cpp diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.h b/ydb/core/tx/schemeshard/schemeshard_utils.h index de7d17f316..938aced659 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.h +++ b/ydb/core/tx/schemeshard/schemeshard_utils.h @@ -27,7 +27,7 @@ inline bool IsAllowedKeyType(NScheme::TTypeId typeId) { case NScheme::NTypeIds::Yson: case NScheme::NTypeIds::Float: case NScheme::NTypeIds::Double: - case NScheme::NTypeIds::JsonDocument: + case NScheme::NTypeIds::JsonDocument: return false; default: return true; diff --git a/ydb/core/ydb_convert/ydb_convert.cpp b/ydb/core/ydb_convert/ydb_convert.cpp index 11c804cff5..2b36a96502 100644 --- a/ydb/core/ydb_convert/ydb_convert.cpp +++ b/ydb/core/ydb_convert/ydb_convert.cpp @@ -6,7 +6,7 @@ #include <ydb/library/binary_json/read.h> #include <ydb/library/binary_json/write.h> #include <ydb/library/dynumber/dynumber.h> - + #include <ydb/library/yql/minikql/dom/json.h> #include <ydb/library/yql/minikql/dom/yson.h> #include <ydb/library/yql/public/udf/udf_types.h> @@ -268,17 +268,17 @@ Y_FORCE_INLINE void ConvertData(NUdf::TDataTypeId typeId, const NKikimrMiniKQL:: res.set_high_128(value.GetHi128()); break; } - case NUdf::TDataType<NUdf::TJsonDocument>::Id: { - const auto json = NBinaryJson::SerializeToJson(value.GetBytes()); - res.set_text_value(json); - break; - } - case NUdf::TDataType<NUdf::TDyNumber>::Id: { - const auto number = NDyNumber::DyNumberToString(value.GetBytes()); - Y_ENSURE(number.Defined(), "Invalid DyNumber binary representation"); - res.set_text_value(*number); - break; - } + case NUdf::TDataType<NUdf::TJsonDocument>::Id: { + const auto json = NBinaryJson::SerializeToJson(value.GetBytes()); + res.set_text_value(json); + break; + } + case NUdf::TDataType<NUdf::TDyNumber>::Id: { + const auto number = NDyNumber::DyNumberToString(value.GetBytes()); + Y_ENSURE(number.Defined(), "Invalid DyNumber binary representation"); + res.set_text_value(*number); + break; + } default: const auto& stringRef = value.GetBytes(); res.set_bytes_value(stringRef.data(), stringRef.size()); @@ -406,24 +406,24 @@ Y_FORCE_INLINE void ConvertData(NUdf::TDataTypeId typeId, const Ydb::Value& valu res.SetLow128(value.low_128()); res.SetHi128(value.high_128()); break; - case NUdf::TDataType<NUdf::TJsonDocument>::Id: { + case NUdf::TDataType<NUdf::TJsonDocument>::Id: { CheckTypeId(value.value_case(), Ydb::Value::kTextValue, "JsonDocument"); - const auto binaryJson = NBinaryJson::SerializeToBinaryJson(value.text_value()); + const auto binaryJson = NBinaryJson::SerializeToBinaryJson(value.text_value()); if (!binaryJson.Defined()) { throw yexception() << "Invalid JsonDocument value"; } - res.SetBytes(binaryJson->Data(), binaryJson->Size()); - break; - } - case NUdf::TDataType<NUdf::TDyNumber>::Id: { + res.SetBytes(binaryJson->Data(), binaryJson->Size()); + break; + } + case NUdf::TDataType<NUdf::TDyNumber>::Id: { CheckTypeId(value.value_case(), Ydb::Value::kTextValue, "DyNumber"); - const auto dyNumber = NDyNumber::ParseDyNumberString(value.text_value()); + const auto dyNumber = NDyNumber::ParseDyNumberString(value.text_value()); if (!dyNumber.Defined()) { throw yexception() << "Invalid DyNumber value"; } - res.SetBytes(dyNumber->Data(), dyNumber->Size()); - break; - } + res.SetBytes(dyNumber->Data(), dyNumber->Size()); + break; + } case NUdf::TDataType<char*>::Id: { CheckTypeId(value.value_case(), Ydb::Value::kBytesValue, "String"); const auto& stringRef = value.bytes_value(); diff --git a/ydb/library/backup/query_builder.cpp b/ydb/library/backup/query_builder.cpp index dd931c82bb..afb07959ff 100644 --- a/ydb/library/backup/query_builder.cpp +++ b/ydb/library/backup/query_builder.cpp @@ -175,19 +175,19 @@ void TQueryBuilder::AddPrimitiveMember(EPrimitiveType type, TStringBuf buf) { Value.OptionalJson(TryParse<TString>(buf)); break; - case EPrimitiveType::JsonDocument: - Value.OptionalJsonDocument(TryParse<TString>(buf)); - break; + case EPrimitiveType::JsonDocument: + Value.OptionalJsonDocument(TryParse<TString>(buf)); + break; - case EPrimitiveType::DyNumber: + case EPrimitiveType::DyNumber: if (buf == "null") { Value.OptionalDyNumber(Nothing()); } else { Y_ENSURE(NKikimr::NDyNumber::IsValidDyNumberString(buf)); Value.OptionalDyNumber(TString(buf)); } - break; - + break; + case EPrimitiveType::Uuid: Y_ENSURE(false, TStringBuilder() << "Unexpected Primitive kind while parsing line: " << type); break; diff --git a/ydb/library/binary_json/README.md b/ydb/library/binary_json/README.md index cb126f2f3b..8a4f0bc293 100644 --- a/ydb/library/binary_json/README.md +++ b/ydb/library/binary_json/README.md @@ -1,220 +1,220 @@ -# BinaryJson Design Doc - +# BinaryJson Design Doc + ## Introduction - + BinaryJson is on-disk binary format for JSON. Its main characteristics are the following: - Access to values inside JSON document without document parsing; - Minimal effort to value deserialization. - + ## Main Idea - + Let's separate storing values of JSON document and document's structure. Document's structure would be represented as sequence of fixed size entries, each entry describes a node in the JSON document. Simple type values would be stored inside these entries, complex type values would be stored in special indexes. We build a dictionary of document's string values to operate string indexes instead of strings themselves. - + ## Data Structures - + BinaryJson contains the following parts: - -``` -+--------+------+--------------+--------------+ -| Header | Tree | String index | Number index | -+--------+------+--------------+--------------+ -``` - + +``` ++--------+------+--------------+--------------+ +| Header | Tree | String index | Number index | ++--------+------+--------------+--------------+ +``` + - `Header` - metadata about BinaryJson - `Tree` - store documents structure - `String index` - a place to store all string values - `Number index` - a place to store all numbers - -### Header - -`Header ` хранит метаинформацию о BinaryJson документе. - -Структура: - -``` -+----------------+-----------------------------+ -| Version, 5 бит | String index offset, 27 бит | -+----------------+-----------------------------+ -``` - -- `Version` - номер версии BinaryJson. Всегда равен `1` -- `String index offset` - сдвиг на начало `String index` - -### Tree - -Дерево JSON документа, где каждый узел представлен структурой `Entry`, `KeyEntry` или `Meta`. - -#### Entry - -`Entry` - это `uint32_t`, представляющий узел в дереве JSON. `Entry` может в зависимости от типа: -- Хранить значение в себе (для простых типов вроде `boolean` и `null`) -- Указывать на другой узел дерева (для массивов и объектов) -- Указывать на элемент в `String index` или `Number index` (для строк и чисел) - -`Entry` имеет следующую структуру: -``` -+-------------------+---------------+ -| Entry type, 5 бит | Value, 27 бит | -+-------------------+---------------+ -``` - -- `Entry type`. Задает тип значения: - - `0` - bool значение `false` - - `1` - bool значение `true` - - `2` - значение `null` - - `3` - строка - - `4` - число - - `5` - массив или объект - - Остальные типы зарезервированы на будущее -- `Value`. В зависимости от типа: - - Сдвиг указывающий на начало `SEntry` в `String index` (для строк) - - Сдвиг указывающий на начало числа в `Number index` (для чисел) - - Сдвиг указывающий на начало структуры `Meta` (для массивов и объектов) - - Для остальных типов не определено - -#### KeyEntry - -`KeyEntry` - это `uint32_t`, являющийся сдвигом на начало `SEntry` в `String index`. Указывает на строку которая хранит ключ объекта - -#### Meta - -`Meta` - это `uint32_t` хранящий в себе тип контейнера (массив или объект) и его размер (длину массива или количество ключей в объекте) - -`Meta` имеет следующую структуру: -``` -+-----------------------+--------------+ -| Container type, 5 бит | Size, 27 бит | -+-----------------------+--------------+ -``` - -- `Container type` - - `0` если `Meta` описывает массив - - `1` если `Meta` описывает объект - - `2` если `Meta` описывает top-level скалярное значение (см раздел "Сериализация") - - Остальные типы зарезервированы на будущее -- `Size`. В зависимости от типа: - - Количество элементов в массиве (для массивов) - - Количество ключей в объекте (для объектов) - - Для остальных типов не определено - -#### Массивы - -Массивы хранятся как последовательность `Entry` для каждого элемента массива. - -Массивы имеют следующую структуру: -``` -+------+---------+-----+------------+ -| Meta | Entry 1 | ... | Entry Size | -+------+---------+-----+------------+ -``` - -- `Meta`. Хранит количество элементов в массиве `Size`, имеет `Container type` равный `0` или `2`. -- Последовательность `Entry`, `Size` штук. `Entry` с номером `i` описывает `i`ый элемент массива. - -#### Объекты - -Объект хранится как массив ключей, сопровождаемый массивом значений. Пары ключ-значение (где ключ берется из первого массива, а значение из второго) отсортированы по ключу в возрастающем лексикографическом порядке - -Объекты имеют следующую структуру: -``` -+------+------------+-----+---------------+---------+-----+------------+ -| Meta | KeyEntry 1 | ... | KeyEntry Size | Entry 1 | ... | Entry Size | -+------+------------+-----+---------------+---------+-----+------------+ -``` - -- `Meta`. Хранит количество пар ключ-значение в объекте `Size`, имеет `Container type` равный `1`. -- Последовательность `KeyEntry`, `Size` штук. Это `KeyEntry` для ключа из каждой пары ключ-значение в объекте -- Последовательность `Entry`, `Size` штук. Это `Entry` для значения из каждой пары ключ-значение в объекте - -### String index - -`String index` - это место хранения всех строк (и ключей объектов и значений) из JSON документа. Все строки внутри `String index` уникальны. - -Каждая строка описывается двумя структурами -- `SEntry` хранит местоположение строки в индексе -- `SData` хранит содержание строки - -`String index` имеет следующую структуру: -``` -+----------------+----------+-----+--------------+---------+-----+-------------+ -| Count, 32 бита | SEntry 1 | ... | SEntry Count | SData 1 | ... | SData Count | -+----------------+----------+-----+--------------+---------+-----+-------------+ -``` - -- `Count`. Это `uint32_t` хранящий количество строк в индексе -- `SEntry`, `Count` штук. `SEntry` для каждой строки в индексе -- `SData`, `Count` штук. `SData` для каждой строки в индексе - -#### SEntry - -`SEntry` - это `uint32_t`, хранящий сдвиг, который указывает на символ сразу после соответствующего `SData` для строки. - -`SEntry` имеет следующую структуру: - -``` -+--------------------+-----------------------+ -| String type, 5 бит | String offset, 27 бит | -+--------------------+-----------------------+ -``` - -- `String type` зарезервирован на будущее -- `String offset` - сдвиг, указывающий на байт сразу после соответствующего `SData` для строки - -#### SData - -`SData` - это содержимое строки, включая символ `\0` в конце - -### Number index - -`Number index` - это место хранения всех чисел из JSON документа. Числа в BinaryJson представлены как double, поэтому это просто последовательность double. - -## Сериализация - -1. `Entry` элементов массива записываются в том порядке как они идут в JSON массиве. -2. `KeyEntry` и `Entry` для пар ключ-значение объектов записываются в возрастающем лексикографическом порядке ключей. Если есть несколько одинаковых ключей, берется значение первого из них. -4. Для представления JSON, состоящего из одного top-level скалярного (не массив и не объект) значения записывается массив из одного элемента. При этом в `Meta` устанавливается `Container type` равный `2`. -5. Все строки в `String index` должны быть уникальны и записываться в возрастающем лексикографическом порядке. Если несколько узлов JSON документа содержат равные строки, соответствующие им `Entry` должны указывать на один и тот же `SEntry`. - -## Поиск значений - -### Поиск в массиве по индексу - -Дано: -- Сдвиг `start` на начало структуры `Meta` массива -- Индекс элемента массива `i` - -Найти: Сдвиг на начало `Entry` для элемента массива с индексом `i` - -Способ: `start + sizeof(Meta) + i * sizeof(Entry)` - -Сложность: `O(1)` - -### Поиск в объекте по ключу - -Дано: -- Сдвиг `start` на начало структуры `Meta` массива -- Ключ `key` - -Найти: Сдвиг на начало `Entry` для значения которое соответствует ключу `key` в объекте - -Способ: С помощью бинарного поиска находим в объекте пару ключ-значение для строки `key` - -Сложность: `O(log2(Size) + log2(Total count of strings in JSON))` - -## Идеи - -- Использовать NaN tagging. - В double есть значение NaN. Оно устроено так, что умеет хранить 53 бита информации. - Я предлагаю хранить все Entry как double. -Если значение NaN - читаем эти 53 бита информации, там храним тип ноды, сдвиг если нужен. Поскольку бита теперь 53, можем хранить большие сдвиги, большие JSONы. -Если значение не NaN - это нода с числом. - Данный подход используется в [LuaJIT](http://lua-users.org/lists/lua-l/2009-11/msg00089.html). Статья с [подробностями](https://nikic.github.io/2012/02/02/Pointer-magic-for-efficient-dynamic-value-representations.html). -- Использовать perfect hashing для хранения объектов. Сейчас чтобы произвести lookup в JSON объекте по ключу необходимо сделать бинарный поиск в последовательности KeyEntry. Поскольку объекты в BinaryJson не изменяемы, можно было бы применить perfect hashing чтобы сразу вычислить сдвиг по которому находится значение - -## Что нужно обсудить - -- Структуры `Header`, `Entry`, `Meta` и `SEntry` резервируют 27 бит на хранение сдвигов. Это вводит ограничение на длину хранимого JSON значения: `2^27 = 128 Mb`. Мы не уверены достаточно ли это для всех пользовательских кейсов. Возможно, стоит рассмотреть увеличение размера этих структур (например, использовать `uint64_t`). + +### Header + +`Header ` хранит метаинформацию о BinaryJson документе. + +Структура: + +``` ++----------------+-----------------------------+ +| Version, 5 бит | String index offset, 27 бит | ++----------------+-----------------------------+ +``` + +- `Version` - номер версии BinaryJson. Всегда равен `1` +- `String index offset` - сдвиг на начало `String index` + +### Tree + +Дерево JSON документа, где каждый узел представлен структурой `Entry`, `KeyEntry` или `Meta`. + +#### Entry + +`Entry` - это `uint32_t`, представляющий узел в дереве JSON. `Entry` может в зависимости от типа: +- Хранить значение в себе (для простых типов вроде `boolean` и `null`) +- Указывать на другой узел дерева (для массивов и объектов) +- Указывать на элемент в `String index` или `Number index` (для строк и чисел) + +`Entry` имеет следующую структуру: +``` ++-------------------+---------------+ +| Entry type, 5 бит | Value, 27 бит | ++-------------------+---------------+ +``` + +- `Entry type`. Задает тип значения: + - `0` - bool значение `false` + - `1` - bool значение `true` + - `2` - значение `null` + - `3` - строка + - `4` - число + - `5` - массив или объект + - Остальные типы зарезервированы на будущее +- `Value`. В зависимости от типа: + - Сдвиг указывающий на начало `SEntry` в `String index` (для строк) + - Сдвиг указывающий на начало числа в `Number index` (для чисел) + - Сдвиг указывающий на начало структуры `Meta` (для массивов и объектов) + - Для остальных типов не определено + +#### KeyEntry + +`KeyEntry` - это `uint32_t`, являющийся сдвигом на начало `SEntry` в `String index`. Указывает на строку которая хранит ключ объекта + +#### Meta + +`Meta` - это `uint32_t` хранящий в себе тип контейнера (массив или объект) и его размер (длину массива или количество ключей в объекте) + +`Meta` имеет следующую структуру: +``` ++-----------------------+--------------+ +| Container type, 5 бит | Size, 27 бит | ++-----------------------+--------------+ +``` + +- `Container type` + - `0` если `Meta` описывает массив + - `1` если `Meta` описывает объект + - `2` если `Meta` описывает top-level скалярное значение (см раздел "Сериализация") + - Остальные типы зарезервированы на будущее +- `Size`. В зависимости от типа: + - Количество элементов в массиве (для массивов) + - Количество ключей в объекте (для объектов) + - Для остальных типов не определено + +#### Массивы + +Массивы хранятся как последовательность `Entry` для каждого элемента массива. + +Массивы имеют следующую структуру: +``` ++------+---------+-----+------------+ +| Meta | Entry 1 | ... | Entry Size | ++------+---------+-----+------------+ +``` + +- `Meta`. Хранит количество элементов в массиве `Size`, имеет `Container type` равный `0` или `2`. +- Последовательность `Entry`, `Size` штук. `Entry` с номером `i` описывает `i`ый элемент массива. + +#### Объекты + +Объект хранится как массив ключей, сопровождаемый массивом значений. Пары ключ-значение (где ключ берется из первого массива, а значение из второго) отсортированы по ключу в возрастающем лексикографическом порядке + +Объекты имеют следующую структуру: +``` ++------+------------+-----+---------------+---------+-----+------------+ +| Meta | KeyEntry 1 | ... | KeyEntry Size | Entry 1 | ... | Entry Size | ++------+------------+-----+---------------+---------+-----+------------+ +``` + +- `Meta`. Хранит количество пар ключ-значение в объекте `Size`, имеет `Container type` равный `1`. +- Последовательность `KeyEntry`, `Size` штук. Это `KeyEntry` для ключа из каждой пары ключ-значение в объекте +- Последовательность `Entry`, `Size` штук. Это `Entry` для значения из каждой пары ключ-значение в объекте + +### String index + +`String index` - это место хранения всех строк (и ключей объектов и значений) из JSON документа. Все строки внутри `String index` уникальны. + +Каждая строка описывается двумя структурами +- `SEntry` хранит местоположение строки в индексе +- `SData` хранит содержание строки + +`String index` имеет следующую структуру: +``` ++----------------+----------+-----+--------------+---------+-----+-------------+ +| Count, 32 бита | SEntry 1 | ... | SEntry Count | SData 1 | ... | SData Count | ++----------------+----------+-----+--------------+---------+-----+-------------+ +``` + +- `Count`. Это `uint32_t` хранящий количество строк в индексе +- `SEntry`, `Count` штук. `SEntry` для каждой строки в индексе +- `SData`, `Count` штук. `SData` для каждой строки в индексе + +#### SEntry + +`SEntry` - это `uint32_t`, хранящий сдвиг, который указывает на символ сразу после соответствующего `SData` для строки. + +`SEntry` имеет следующую структуру: + +``` ++--------------------+-----------------------+ +| String type, 5 бит | String offset, 27 бит | ++--------------------+-----------------------+ +``` + +- `String type` зарезервирован на будущее +- `String offset` - сдвиг, указывающий на байт сразу после соответствующего `SData` для строки + +#### SData + +`SData` - это содержимое строки, включая символ `\0` в конце + +### Number index + +`Number index` - это место хранения всех чисел из JSON документа. Числа в BinaryJson представлены как double, поэтому это просто последовательность double. + +## Сериализация + +1. `Entry` элементов массива записываются в том порядке как они идут в JSON массиве. +2. `KeyEntry` и `Entry` для пар ключ-значение объектов записываются в возрастающем лексикографическом порядке ключей. Если есть несколько одинаковых ключей, берется значение первого из них. +4. Для представления JSON, состоящего из одного top-level скалярного (не массив и не объект) значения записывается массив из одного элемента. При этом в `Meta` устанавливается `Container type` равный `2`. +5. Все строки в `String index` должны быть уникальны и записываться в возрастающем лексикографическом порядке. Если несколько узлов JSON документа содержат равные строки, соответствующие им `Entry` должны указывать на один и тот же `SEntry`. + +## Поиск значений + +### Поиск в массиве по индексу + +Дано: +- Сдвиг `start` на начало структуры `Meta` массива +- Индекс элемента массива `i` + +Найти: Сдвиг на начало `Entry` для элемента массива с индексом `i` + +Способ: `start + sizeof(Meta) + i * sizeof(Entry)` + +Сложность: `O(1)` + +### Поиск в объекте по ключу + +Дано: +- Сдвиг `start` на начало структуры `Meta` массива +- Ключ `key` + +Найти: Сдвиг на начало `Entry` для значения которое соответствует ключу `key` в объекте + +Способ: С помощью бинарного поиска находим в объекте пару ключ-значение для строки `key` + +Сложность: `O(log2(Size) + log2(Total count of strings in JSON))` + +## Идеи + +- Использовать NaN tagging. + В double есть значение NaN. Оно устроено так, что умеет хранить 53 бита информации. + Я предлагаю хранить все Entry как double. +Если значение NaN - читаем эти 53 бита информации, там храним тип ноды, сдвиг если нужен. Поскольку бита теперь 53, можем хранить большие сдвиги, большие JSONы. +Если значение не NaN - это нода с числом. + Данный подход используется в [LuaJIT](http://lua-users.org/lists/lua-l/2009-11/msg00089.html). Статья с [подробностями](https://nikic.github.io/2012/02/02/Pointer-magic-for-efficient-dynamic-value-representations.html). +- Использовать perfect hashing для хранения объектов. Сейчас чтобы произвести lookup в JSON объекте по ключу необходимо сделать бинарный поиск в последовательности KeyEntry. Поскольку объекты в BinaryJson не изменяемы, можно было бы применить perfect hashing чтобы сразу вычислить сдвиг по которому находится значение + +## Что нужно обсудить + +- Структуры `Header`, `Entry`, `Meta` и `SEntry` резервируют 27 бит на хранение сдвигов. Это вводит ограничение на длину хранимого JSON значения: `2^27 = 128 Mb`. Мы не уверены достаточно ли это для всех пользовательских кейсов. Возможно, стоит рассмотреть увеличение размера этих структур (например, использовать `uint64_t`). - Структуры `Entry`, `Meta` и `SEntry` резервируют по 5 бит на хранение типа, что даем нам 32 варианта типов. Мы не уверены будет ли этого достаточно для наших целей учитывая что некоторые типы могут иметь параметры (например что-то вроде Decimal). С учетом этого может не хватить расширения структур даже до `uint64_t`. Решением может быть хранить дополнительные `Entry` для некоторых типов, которые будут содержать необходимое описание. К сожалению, сейчас так сделать не получится так как формат полагается на то что все `Entry` имеют фиксированный размер. Возможно, нужно вводить отдельный индекс для сложных типов. diff --git a/ydb/library/binary_json/format.cpp b/ydb/library/binary_json/format.cpp index b0d6a10246..198ab7a42a 100644 --- a/ydb/library/binary_json/format.cpp +++ b/ydb/library/binary_json/format.cpp @@ -1 +1 @@ -#include "format.h" +#include "format.h" diff --git a/ydb/library/binary_json/format.h b/ydb/library/binary_json/format.h index e996303e2e..af70882eb1 100644 --- a/ydb/library/binary_json/format.h +++ b/ydb/library/binary_json/format.h @@ -1,142 +1,142 @@ -#pragma once - -#include <util/generic/buffer.h> - -namespace NKikimr::NBinaryJson { - -constexpr ui32 TYPE_SHIFT = 5; -constexpr ui32 MAX_TYPE = (1 << TYPE_SHIFT) - 1; -constexpr ui32 OFFSET_SHIFT = 27; -constexpr ui32 MAX_OFFSET = (1 << OFFSET_SHIFT) - 1; - -/** - * @brief THeader stores BinaryJson version and offset to the String index - * - * Structure: - * +-----------------+------------------------------+ - * | Version, 5 bits | String index offset, 27 bits | - * +-----------------+------------------------------+ - */ -enum class EVersion { - Draft = 0, - V1 = 1, - MaxVersion = MAX_TYPE, -}; - -constexpr EVersion CURRENT_VERSION = EVersion::V1; - -struct THeader { - THeader() = default; - - THeader(EVersion version, ui32 stringOffset) - : Version(version) - , StringOffset(stringOffset) - { - Y_VERIFY_DEBUG(StringOffset <= MAX_OFFSET); - } - - EVersion Version : 5; - ui32 StringOffset : 27; -}; -static_assert(sizeof(THeader) == sizeof(ui32)); - -/** - * @brief TEntry stores type of BinaryJson node and optional offset to the TSEntry or container - * - * Structure: - * +--------------------+----------------+ - * | Entry type, 5 bits | Value, 27 bits | - * +--------------------+----------------+ - */ -enum class EEntryType { - BoolFalse = 0, - BoolTrue = 1, - Null = 2, - String = 3, - Number = 4, - Container = 5, -}; - -struct TEntry { - TEntry() = default; - - TEntry(EEntryType type, ui32 value = 0) - : Type(type) - , Value(value) - { - Y_VERIFY_DEBUG(value <= MAX_OFFSET); - } - - EEntryType Type : 5; - ui32 Value : 27; -}; -static_assert(sizeof(TEntry) == sizeof(ui32)); - -/** - * @brief TKeyEntry stores offset to the TSEntry containing object key - */ -using TKeyEntry = ui32; - -/** - * @brief TSEntry stores string type and offset to the string value in String index - * - * Structure: - * +---------------------+------------------------+ - * | String type, 5 bits | String offset, 27 bits | - * +---------------------+------------------------+ - */ -enum class EStringType { - RawNullTerminated = 0, -}; - -struct TSEntry { - TSEntry() = default; - - TSEntry(EStringType type, ui32 value) - : Type(type) - , Value(value) - { - Y_VERIFY_DEBUG(value <= MAX_OFFSET); - } - - EStringType Type : 5; - ui32 Value : 27; -}; -static_assert(sizeof(TSEntry) == sizeof(ui32)); - -/** - * @brief TMeta stores container type and container size. For arrays container size is simply - * array size and for objects it is 2 * (number of key-value pairs) - * - * Structure: - * +------------------------+---------------+ - * | Container type, 5 bits | Size, 27 bits | - * +------------------------+---------------+ - */ -enum class EContainerType { - Array = 0, - Object = 1, - TopLevelScalar = 2, -}; - -struct TMeta { - TMeta() = default; - - TMeta(EContainerType type, ui32 size) - : Type(type) - , Size(size) - { - Y_VERIFY_DEBUG(size <= MAX_OFFSET); - } - - EContainerType Type : 5; - ui32 Size : 27; -}; -static_assert(sizeof(TMeta) == sizeof(ui32)); - -/** - * @brief Buffer to store serialized BinaryJson - */ -using TBinaryJson = TBuffer; - -}
\ No newline at end of file +#pragma once + +#include <util/generic/buffer.h> + +namespace NKikimr::NBinaryJson { + +constexpr ui32 TYPE_SHIFT = 5; +constexpr ui32 MAX_TYPE = (1 << TYPE_SHIFT) - 1; +constexpr ui32 OFFSET_SHIFT = 27; +constexpr ui32 MAX_OFFSET = (1 << OFFSET_SHIFT) - 1; + +/** + * @brief THeader stores BinaryJson version and offset to the String index + * + * Structure: + * +-----------------+------------------------------+ + * | Version, 5 bits | String index offset, 27 bits | + * +-----------------+------------------------------+ + */ +enum class EVersion { + Draft = 0, + V1 = 1, + MaxVersion = MAX_TYPE, +}; + +constexpr EVersion CURRENT_VERSION = EVersion::V1; + +struct THeader { + THeader() = default; + + THeader(EVersion version, ui32 stringOffset) + : Version(version) + , StringOffset(stringOffset) + { + Y_VERIFY_DEBUG(StringOffset <= MAX_OFFSET); + } + + EVersion Version : 5; + ui32 StringOffset : 27; +}; +static_assert(sizeof(THeader) == sizeof(ui32)); + +/** + * @brief TEntry stores type of BinaryJson node and optional offset to the TSEntry or container + * + * Structure: + * +--------------------+----------------+ + * | Entry type, 5 bits | Value, 27 bits | + * +--------------------+----------------+ + */ +enum class EEntryType { + BoolFalse = 0, + BoolTrue = 1, + Null = 2, + String = 3, + Number = 4, + Container = 5, +}; + +struct TEntry { + TEntry() = default; + + TEntry(EEntryType type, ui32 value = 0) + : Type(type) + , Value(value) + { + Y_VERIFY_DEBUG(value <= MAX_OFFSET); + } + + EEntryType Type : 5; + ui32 Value : 27; +}; +static_assert(sizeof(TEntry) == sizeof(ui32)); + +/** + * @brief TKeyEntry stores offset to the TSEntry containing object key + */ +using TKeyEntry = ui32; + +/** + * @brief TSEntry stores string type and offset to the string value in String index + * + * Structure: + * +---------------------+------------------------+ + * | String type, 5 bits | String offset, 27 bits | + * +---------------------+------------------------+ + */ +enum class EStringType { + RawNullTerminated = 0, +}; + +struct TSEntry { + TSEntry() = default; + + TSEntry(EStringType type, ui32 value) + : Type(type) + , Value(value) + { + Y_VERIFY_DEBUG(value <= MAX_OFFSET); + } + + EStringType Type : 5; + ui32 Value : 27; +}; +static_assert(sizeof(TSEntry) == sizeof(ui32)); + +/** + * @brief TMeta stores container type and container size. For arrays container size is simply + * array size and for objects it is 2 * (number of key-value pairs) + * + * Structure: + * +------------------------+---------------+ + * | Container type, 5 bits | Size, 27 bits | + * +------------------------+---------------+ + */ +enum class EContainerType { + Array = 0, + Object = 1, + TopLevelScalar = 2, +}; + +struct TMeta { + TMeta() = default; + + TMeta(EContainerType type, ui32 size) + : Type(type) + , Size(size) + { + Y_VERIFY_DEBUG(size <= MAX_OFFSET); + } + + EContainerType Type : 5; + ui32 Size : 27; +}; +static_assert(sizeof(TMeta) == sizeof(ui32)); + +/** + * @brief Buffer to store serialized BinaryJson + */ +using TBinaryJson = TBuffer; + +}
\ No newline at end of file diff --git a/ydb/library/binary_json/read.cpp b/ydb/library/binary_json/read.cpp index d0ee07490f..4613090199 100644 --- a/ydb/library/binary_json/read.cpp +++ b/ydb/library/binary_json/read.cpp @@ -1,572 +1,572 @@ -#include "read.h" - -#include <library/cpp/json/json_writer.h> - -#include <util/stream/str.h> -#include <util/generic/vector.h> -#include <util/string/builder.h> - -#include <cmath> - -namespace NKikimr::NBinaryJson { - -using namespace NUdf; -using namespace NYql::NDom; -using namespace NJson; - -TEntryCursor::TEntryCursor(const TBinaryJsonReaderPtr reader, TEntry entry) - : Reader(reader) - , Entry(entry) -{ -} - -EEntryType TEntryCursor::GetType() const { - return Entry.Type; -} - -TContainerCursor TEntryCursor::GetContainer() const { - Y_VERIFY_DEBUG(Entry.Type == EEntryType::Container, "Expected container type"); - return TContainerCursor(Reader, Entry.Value); -} - -TStringBuf TEntryCursor::GetString() const { - Y_VERIFY_DEBUG(Entry.Type == EEntryType::String, "Expected string type"); - return Reader->ReadString(Entry.Value); -} - -double TEntryCursor::GetNumber() const { - Y_VERIFY_DEBUG(Entry.Type == EEntryType::Number, "Expected number type"); - return Reader->ReadNumber(Entry.Value); -} - -TArrayIterator::TArrayIterator(const TBinaryJsonReaderPtr reader, ui32 startOffset, ui32 count) - : Reader(reader) - , Offset(startOffset) -{ - EndOffset = Offset + count * sizeof(TEntry); -} - -TEntryCursor TArrayIterator::Next() { - Y_VERIFY_DEBUG(HasNext()); - TEntryCursor element(Reader, Reader->ReadEntry(Offset)); - Offset += sizeof(TEntry); - return element; -} - -bool TArrayIterator::HasNext() const { - return Offset < EndOffset; -} - -TObjectIterator::TObjectIterator(const TBinaryJsonReaderPtr reader, ui32 startOffset, ui32 count) - : Reader(reader) -{ - KeyOffset = startOffset; - ValueOffset = KeyOffset + count * sizeof(TKeyEntry); - ValueEndOffset = ValueOffset + count * sizeof(TEntry); -} - -std::pair<TEntryCursor, TEntryCursor> TObjectIterator::Next() { - Y_VERIFY_DEBUG(HasNext()); - // Here we create fake Entry to return Entry cursor - const auto stringOffset = static_cast<ui32>(Reader->ReadKeyEntry(KeyOffset)); - TEntryCursor key(Reader, TEntry(EEntryType::String, stringOffset)); - TEntryCursor value(Reader, Reader->ReadEntry(ValueOffset)); - KeyOffset += sizeof(TKeyEntry); - ValueOffset += sizeof(TEntry); - return std::make_pair(std::move(key), std::move(value)); -} - -bool TObjectIterator::HasNext() const { - return ValueOffset < ValueEndOffset; -} - -TContainerCursor::TContainerCursor(const TBinaryJsonReaderPtr reader, ui32 startOffset) - : Reader(reader) - , StartOffset(startOffset) -{ - Meta = Reader->ReadMeta(StartOffset); - StartOffset += sizeof(Meta); -} - -EContainerType TContainerCursor::GetType() const { - return Meta.Type; -} - -ui32 TContainerCursor::GetSize() const { - return Meta.Size; -} - -TEntryCursor TContainerCursor::GetElement(ui32 index) const { - Y_VERIFY_DEBUG(Meta.Type == EContainerType::Array || Meta.Type == EContainerType::TopLevelScalar, "Expected array"); - Y_VERIFY_DEBUG(index < GetSize(), "Invalid index"); - const ui32 offset = StartOffset + index * sizeof(TEntry); - return TEntryCursor(Reader, Reader->ReadEntry(offset)); -} - -TArrayIterator TContainerCursor::GetArrayIterator() const { - Y_VERIFY_DEBUG(Meta.Type == EContainerType::Array || Meta.Type == EContainerType::TopLevelScalar, "Expected array"); - return TArrayIterator(Reader, StartOffset, Meta.Size); -} - -TMaybe<TEntryCursor> TContainerCursor::Lookup(const TStringBuf key) const { - if (Meta.Size == 0) { - return Nothing(); - } - - i32 left = 0; - i32 right = Meta.Size - 1; - while (left <= right) { - const i32 middle = (left + right) / 2; - const ui32 keyEntryOffset = StartOffset + middle * sizeof(TKeyEntry); - const auto keyStringOffset = Reader->ReadKeyEntry(keyEntryOffset); - - const int compare = Reader->ReadString(keyStringOffset).compare(key); - if (compare == 0) { - const ui32 entryOffset = StartOffset + Meta.Size * sizeof(TKeyEntry) + middle * sizeof(TEntry); - return TEntryCursor(Reader, Reader->ReadEntry(entryOffset)); - } else if (compare < 0) { - left = middle + 1; - } else { - right = middle - 1; - } - } - return Nothing(); -} - -TObjectIterator TContainerCursor::GetObjectIterator() const { - Y_VERIFY_DEBUG(Meta.Type == EContainerType::Object, "Expected object"); - return TObjectIterator(Reader, StartOffset, Meta.Size); -} - -TBinaryJsonReader::TBinaryJsonReader(const TBinaryJson& buffer) - : TBinaryJsonReader(TStringBuf(buffer.Data(), buffer.Size())) -{ -} - -TBinaryJsonReader::TBinaryJsonReader(TStringBuf buffer) - : Buffer(buffer) -{ - // Header is stored at the beginning of BinaryJson - Header = ReadPOD<THeader>(0); - - Y_ENSURE( - Header.Version == CURRENT_VERSION, - TStringBuilder() << "Version in BinaryJson `" << static_cast<ui64>(Header.Version) << "` " - << "does not match current version `" << static_cast<ui64>(CURRENT_VERSION) << "`" - ); - - // Tree starts right after Header - TreeStart = sizeof(Header); - - // SEntry sequence starts right after count of strings - StringCount = ReadPOD<ui32>(Header.StringOffset); - StringSEntryStart = Header.StringOffset + sizeof(ui32); -} - -TContainerCursor TBinaryJsonReader::GetRootCursor() { - return TContainerCursor(TIntrusivePtr(this), TreeStart); -} - -TMeta TBinaryJsonReader::ReadMeta(ui32 offset) const { - Y_VERIFY_DEBUG(TreeStart <= offset && offset < Header.StringOffset, "Offset is not inside Tree section"); - return ReadPOD<TMeta>(offset); -} - -TEntry TBinaryJsonReader::ReadEntry(ui32 offset) const { - Y_VERIFY_DEBUG(TreeStart <= offset && offset < Header.StringOffset, "Offset is not inside Tree section"); - return ReadPOD<TEntry>(offset); -} - -TKeyEntry TBinaryJsonReader::ReadKeyEntry(ui32 offset) const { - Y_VERIFY_DEBUG(TreeStart <= offset && offset < Header.StringOffset, "Offset is not inside Tree section"); - return ReadPOD<TKeyEntry>(offset); -} - -const TStringBuf TBinaryJsonReader::ReadString(ui32 offset) const { - Y_VERIFY_DEBUG(StringSEntryStart <= offset && offset < StringSEntryStart + StringCount * sizeof(TSEntry), "Offset is not inside string index"); - ui32 startOffset = 0; - if (offset == StringSEntryStart) { - startOffset = StringSEntryStart + StringCount * sizeof(TSEntry); - } else { - ui32 previousOffset = offset - sizeof(TSEntry); - const auto previousEntry = ReadPOD<TSEntry>(previousOffset); - startOffset = previousEntry.Value; - } - const auto entry = ReadPOD<TSEntry>(offset); - const auto endOffset = entry.Value - 1; - return TStringBuf(Buffer.Data() + startOffset, endOffset - startOffset); -} - -double TBinaryJsonReader::ReadNumber(ui32 offset) const { - double result; - MemCopy(reinterpret_cast<char*>(&result), Buffer.Data() + offset, sizeof(result)); - return result; -} - -TUnboxedValue ReadElementToJsonDom(const TEntryCursor& cursor, const NUdf::IValueBuilder* valueBuilder) { - switch (cursor.GetType()) { - case EEntryType::BoolFalse: - return MakeBool(false); - case EEntryType::BoolTrue: - return MakeBool(true); - case EEntryType::Null: - return MakeEntity(); - case EEntryType::String: - return MakeString(cursor.GetString(), valueBuilder); - case EEntryType::Number: - return MakeDouble(cursor.GetNumber()); - case EEntryType::Container: - return ReadContainerToJsonDom(cursor.GetContainer(), valueBuilder); - } -} - -TUnboxedValue ReadContainerToJsonDom(const TContainerCursor& cursor, const NUdf::IValueBuilder* valueBuilder) { - switch (cursor.GetType()) { - case EContainerType::TopLevelScalar: { - return ReadElementToJsonDom(cursor.GetElement(0), valueBuilder); - } - case EContainerType::Array: { - TVector<TUnboxedValue> items; - items.reserve(cursor.GetSize()); - - auto it = cursor.GetArrayIterator(); - while (it.HasNext()) { - const auto element = ReadElementToJsonDom(it.Next(), valueBuilder); - items.push_back(element); - } - return MakeList(items.data(), items.size(), valueBuilder); - } - case EContainerType::Object: { - TVector<TPair> items; - items.reserve(cursor.GetSize()); - - auto it = cursor.GetObjectIterator(); - while (it.HasNext()) { - const auto [sourceKey, sourceValue] = it.Next(); - auto key = ReadElementToJsonDom(sourceKey, valueBuilder); - auto value = ReadElementToJsonDom(sourceValue, valueBuilder); - items.emplace_back(std::move(key), std::move(value)); - } - return MakeDict(items.data(), items.size()); - } - } -} - -TUnboxedValue ReadToJsonDom(const TBinaryJson& binaryJson, const NUdf::IValueBuilder* valueBuilder) { - return ReadToJsonDom(TStringBuf(binaryJson.Data(), binaryJson.Size()), valueBuilder); -} - -TUnboxedValue ReadToJsonDom(TStringBuf binaryJson, const NUdf::IValueBuilder* valueBuilder) { - auto reader = TBinaryJsonReader::Make(binaryJson); - return ReadContainerToJsonDom(reader->GetRootCursor(), valueBuilder); -} - -namespace { - -void ReadContainerToJson(const TContainerCursor& cursor, TJsonWriter& writer); - -void ReadElementToJson(const TEntryCursor& cursor, TJsonWriter& writer) { - switch (cursor.GetType()) { - case EEntryType::BoolFalse: - writer.Write(false); - break; - case EEntryType::BoolTrue: - writer.Write(true); - break; - case EEntryType::Null: - writer.WriteNull(); - break; - case EEntryType::String: - writer.Write(cursor.GetString()); - break; - case EEntryType::Number: - writer.Write(cursor.GetNumber()); - break; - case EEntryType::Container: - ReadContainerToJson(cursor.GetContainer(), writer); - break; - } -} - -void ReadContainerToJson(const TContainerCursor& cursor, TJsonWriter& writer) { - switch (cursor.GetType()) { - case EContainerType::TopLevelScalar: { - ReadElementToJson(cursor.GetElement(0), writer); - break; - } - case EContainerType::Array: { - writer.OpenArray(); - auto it = cursor.GetArrayIterator(); - while (it.HasNext()) { - ReadElementToJson(it.Next(), writer); - } - writer.CloseArray(); - break; - } - case EContainerType::Object: { - writer.OpenMap(); - auto it = cursor.GetObjectIterator(); - while (it.HasNext()) { - const auto [key, value] = it.Next(); - writer.WriteKey(key.GetString()); - ReadElementToJson(value, writer); - } - writer.CloseMap(); - break; - } - } -} - -} - -TString SerializeToJson(const TBinaryJson& binaryJson) { - return SerializeToJson(TStringBuf(binaryJson.Data(), binaryJson.Size())); -} - -TString SerializeToJson(TStringBuf binaryJson) { - auto reader = TBinaryJsonReader::Make(binaryJson); - TStringStream output; - TJsonWriter writer(&output, /* formatOutput */ false); - ReadContainerToJson(reader->GetRootCursor(), writer); - writer.Flush(); - return output.Str(); -} - -namespace { - -struct TPODReader { - TPODReader(TStringBuf buffer) - : TPODReader(buffer, 0, buffer.Size()) - { - } - - TPODReader(TStringBuf buffer, ui32 start, ui32 end) - : Buffer(buffer) - , Pos(start) - , End(end) - { - Y_VERIFY_DEBUG(Pos <= End && End <= Buffer.Size()); - } - - template <typename T> - TMaybe<T> Read() { - static_assert(std::is_pod_v<T>, "TPODReader can read only POD values"); - if (Pos + sizeof(T) > End) { - return Nothing(); - } - TMaybe<T> result{ReadUnaligned<T>(Buffer.Data() + Pos)}; - Pos += sizeof(T); - return result; - } - - template <typename T> - void Skip(ui32 count) { - static_assert(std::is_pod_v<T>, "TPODReader can read only POD values"); - Pos += sizeof(T) * count; - } - - TStringBuf Buffer; - ui32 Pos; - ui32 End; -}; - -struct TBinaryJsonValidator { - TBinaryJsonValidator(TStringBuf buffer) - : Buffer(buffer) - { - } - - TMaybe<TStringBuf> ValidateWithError() && { - // Validate Header - TPODReader reader(Buffer); - const auto header = reader.Read<THeader>(); - if (!header.Defined()) { +#include "read.h" + +#include <library/cpp/json/json_writer.h> + +#include <util/stream/str.h> +#include <util/generic/vector.h> +#include <util/string/builder.h> + +#include <cmath> + +namespace NKikimr::NBinaryJson { + +using namespace NUdf; +using namespace NYql::NDom; +using namespace NJson; + +TEntryCursor::TEntryCursor(const TBinaryJsonReaderPtr reader, TEntry entry) + : Reader(reader) + , Entry(entry) +{ +} + +EEntryType TEntryCursor::GetType() const { + return Entry.Type; +} + +TContainerCursor TEntryCursor::GetContainer() const { + Y_VERIFY_DEBUG(Entry.Type == EEntryType::Container, "Expected container type"); + return TContainerCursor(Reader, Entry.Value); +} + +TStringBuf TEntryCursor::GetString() const { + Y_VERIFY_DEBUG(Entry.Type == EEntryType::String, "Expected string type"); + return Reader->ReadString(Entry.Value); +} + +double TEntryCursor::GetNumber() const { + Y_VERIFY_DEBUG(Entry.Type == EEntryType::Number, "Expected number type"); + return Reader->ReadNumber(Entry.Value); +} + +TArrayIterator::TArrayIterator(const TBinaryJsonReaderPtr reader, ui32 startOffset, ui32 count) + : Reader(reader) + , Offset(startOffset) +{ + EndOffset = Offset + count * sizeof(TEntry); +} + +TEntryCursor TArrayIterator::Next() { + Y_VERIFY_DEBUG(HasNext()); + TEntryCursor element(Reader, Reader->ReadEntry(Offset)); + Offset += sizeof(TEntry); + return element; +} + +bool TArrayIterator::HasNext() const { + return Offset < EndOffset; +} + +TObjectIterator::TObjectIterator(const TBinaryJsonReaderPtr reader, ui32 startOffset, ui32 count) + : Reader(reader) +{ + KeyOffset = startOffset; + ValueOffset = KeyOffset + count * sizeof(TKeyEntry); + ValueEndOffset = ValueOffset + count * sizeof(TEntry); +} + +std::pair<TEntryCursor, TEntryCursor> TObjectIterator::Next() { + Y_VERIFY_DEBUG(HasNext()); + // Here we create fake Entry to return Entry cursor + const auto stringOffset = static_cast<ui32>(Reader->ReadKeyEntry(KeyOffset)); + TEntryCursor key(Reader, TEntry(EEntryType::String, stringOffset)); + TEntryCursor value(Reader, Reader->ReadEntry(ValueOffset)); + KeyOffset += sizeof(TKeyEntry); + ValueOffset += sizeof(TEntry); + return std::make_pair(std::move(key), std::move(value)); +} + +bool TObjectIterator::HasNext() const { + return ValueOffset < ValueEndOffset; +} + +TContainerCursor::TContainerCursor(const TBinaryJsonReaderPtr reader, ui32 startOffset) + : Reader(reader) + , StartOffset(startOffset) +{ + Meta = Reader->ReadMeta(StartOffset); + StartOffset += sizeof(Meta); +} + +EContainerType TContainerCursor::GetType() const { + return Meta.Type; +} + +ui32 TContainerCursor::GetSize() const { + return Meta.Size; +} + +TEntryCursor TContainerCursor::GetElement(ui32 index) const { + Y_VERIFY_DEBUG(Meta.Type == EContainerType::Array || Meta.Type == EContainerType::TopLevelScalar, "Expected array"); + Y_VERIFY_DEBUG(index < GetSize(), "Invalid index"); + const ui32 offset = StartOffset + index * sizeof(TEntry); + return TEntryCursor(Reader, Reader->ReadEntry(offset)); +} + +TArrayIterator TContainerCursor::GetArrayIterator() const { + Y_VERIFY_DEBUG(Meta.Type == EContainerType::Array || Meta.Type == EContainerType::TopLevelScalar, "Expected array"); + return TArrayIterator(Reader, StartOffset, Meta.Size); +} + +TMaybe<TEntryCursor> TContainerCursor::Lookup(const TStringBuf key) const { + if (Meta.Size == 0) { + return Nothing(); + } + + i32 left = 0; + i32 right = Meta.Size - 1; + while (left <= right) { + const i32 middle = (left + right) / 2; + const ui32 keyEntryOffset = StartOffset + middle * sizeof(TKeyEntry); + const auto keyStringOffset = Reader->ReadKeyEntry(keyEntryOffset); + + const int compare = Reader->ReadString(keyStringOffset).compare(key); + if (compare == 0) { + const ui32 entryOffset = StartOffset + Meta.Size * sizeof(TKeyEntry) + middle * sizeof(TEntry); + return TEntryCursor(Reader, Reader->ReadEntry(entryOffset)); + } else if (compare < 0) { + left = middle + 1; + } else { + right = middle - 1; + } + } + return Nothing(); +} + +TObjectIterator TContainerCursor::GetObjectIterator() const { + Y_VERIFY_DEBUG(Meta.Type == EContainerType::Object, "Expected object"); + return TObjectIterator(Reader, StartOffset, Meta.Size); +} + +TBinaryJsonReader::TBinaryJsonReader(const TBinaryJson& buffer) + : TBinaryJsonReader(TStringBuf(buffer.Data(), buffer.Size())) +{ +} + +TBinaryJsonReader::TBinaryJsonReader(TStringBuf buffer) + : Buffer(buffer) +{ + // Header is stored at the beginning of BinaryJson + Header = ReadPOD<THeader>(0); + + Y_ENSURE( + Header.Version == CURRENT_VERSION, + TStringBuilder() << "Version in BinaryJson `" << static_cast<ui64>(Header.Version) << "` " + << "does not match current version `" << static_cast<ui64>(CURRENT_VERSION) << "`" + ); + + // Tree starts right after Header + TreeStart = sizeof(Header); + + // SEntry sequence starts right after count of strings + StringCount = ReadPOD<ui32>(Header.StringOffset); + StringSEntryStart = Header.StringOffset + sizeof(ui32); +} + +TContainerCursor TBinaryJsonReader::GetRootCursor() { + return TContainerCursor(TIntrusivePtr(this), TreeStart); +} + +TMeta TBinaryJsonReader::ReadMeta(ui32 offset) const { + Y_VERIFY_DEBUG(TreeStart <= offset && offset < Header.StringOffset, "Offset is not inside Tree section"); + return ReadPOD<TMeta>(offset); +} + +TEntry TBinaryJsonReader::ReadEntry(ui32 offset) const { + Y_VERIFY_DEBUG(TreeStart <= offset && offset < Header.StringOffset, "Offset is not inside Tree section"); + return ReadPOD<TEntry>(offset); +} + +TKeyEntry TBinaryJsonReader::ReadKeyEntry(ui32 offset) const { + Y_VERIFY_DEBUG(TreeStart <= offset && offset < Header.StringOffset, "Offset is not inside Tree section"); + return ReadPOD<TKeyEntry>(offset); +} + +const TStringBuf TBinaryJsonReader::ReadString(ui32 offset) const { + Y_VERIFY_DEBUG(StringSEntryStart <= offset && offset < StringSEntryStart + StringCount * sizeof(TSEntry), "Offset is not inside string index"); + ui32 startOffset = 0; + if (offset == StringSEntryStart) { + startOffset = StringSEntryStart + StringCount * sizeof(TSEntry); + } else { + ui32 previousOffset = offset - sizeof(TSEntry); + const auto previousEntry = ReadPOD<TSEntry>(previousOffset); + startOffset = previousEntry.Value; + } + const auto entry = ReadPOD<TSEntry>(offset); + const auto endOffset = entry.Value - 1; + return TStringBuf(Buffer.Data() + startOffset, endOffset - startOffset); +} + +double TBinaryJsonReader::ReadNumber(ui32 offset) const { + double result; + MemCopy(reinterpret_cast<char*>(&result), Buffer.Data() + offset, sizeof(result)); + return result; +} + +TUnboxedValue ReadElementToJsonDom(const TEntryCursor& cursor, const NUdf::IValueBuilder* valueBuilder) { + switch (cursor.GetType()) { + case EEntryType::BoolFalse: + return MakeBool(false); + case EEntryType::BoolTrue: + return MakeBool(true); + case EEntryType::Null: + return MakeEntity(); + case EEntryType::String: + return MakeString(cursor.GetString(), valueBuilder); + case EEntryType::Number: + return MakeDouble(cursor.GetNumber()); + case EEntryType::Container: + return ReadContainerToJsonDom(cursor.GetContainer(), valueBuilder); + } +} + +TUnboxedValue ReadContainerToJsonDom(const TContainerCursor& cursor, const NUdf::IValueBuilder* valueBuilder) { + switch (cursor.GetType()) { + case EContainerType::TopLevelScalar: { + return ReadElementToJsonDom(cursor.GetElement(0), valueBuilder); + } + case EContainerType::Array: { + TVector<TUnboxedValue> items; + items.reserve(cursor.GetSize()); + + auto it = cursor.GetArrayIterator(); + while (it.HasNext()) { + const auto element = ReadElementToJsonDom(it.Next(), valueBuilder); + items.push_back(element); + } + return MakeList(items.data(), items.size(), valueBuilder); + } + case EContainerType::Object: { + TVector<TPair> items; + items.reserve(cursor.GetSize()); + + auto it = cursor.GetObjectIterator(); + while (it.HasNext()) { + const auto [sourceKey, sourceValue] = it.Next(); + auto key = ReadElementToJsonDom(sourceKey, valueBuilder); + auto value = ReadElementToJsonDom(sourceValue, valueBuilder); + items.emplace_back(std::move(key), std::move(value)); + } + return MakeDict(items.data(), items.size()); + } + } +} + +TUnboxedValue ReadToJsonDom(const TBinaryJson& binaryJson, const NUdf::IValueBuilder* valueBuilder) { + return ReadToJsonDom(TStringBuf(binaryJson.Data(), binaryJson.Size()), valueBuilder); +} + +TUnboxedValue ReadToJsonDom(TStringBuf binaryJson, const NUdf::IValueBuilder* valueBuilder) { + auto reader = TBinaryJsonReader::Make(binaryJson); + return ReadContainerToJsonDom(reader->GetRootCursor(), valueBuilder); +} + +namespace { + +void ReadContainerToJson(const TContainerCursor& cursor, TJsonWriter& writer); + +void ReadElementToJson(const TEntryCursor& cursor, TJsonWriter& writer) { + switch (cursor.GetType()) { + case EEntryType::BoolFalse: + writer.Write(false); + break; + case EEntryType::BoolTrue: + writer.Write(true); + break; + case EEntryType::Null: + writer.WriteNull(); + break; + case EEntryType::String: + writer.Write(cursor.GetString()); + break; + case EEntryType::Number: + writer.Write(cursor.GetNumber()); + break; + case EEntryType::Container: + ReadContainerToJson(cursor.GetContainer(), writer); + break; + } +} + +void ReadContainerToJson(const TContainerCursor& cursor, TJsonWriter& writer) { + switch (cursor.GetType()) { + case EContainerType::TopLevelScalar: { + ReadElementToJson(cursor.GetElement(0), writer); + break; + } + case EContainerType::Array: { + writer.OpenArray(); + auto it = cursor.GetArrayIterator(); + while (it.HasNext()) { + ReadElementToJson(it.Next(), writer); + } + writer.CloseArray(); + break; + } + case EContainerType::Object: { + writer.OpenMap(); + auto it = cursor.GetObjectIterator(); + while (it.HasNext()) { + const auto [key, value] = it.Next(); + writer.WriteKey(key.GetString()); + ReadElementToJson(value, writer); + } + writer.CloseMap(); + break; + } + } +} + +} + +TString SerializeToJson(const TBinaryJson& binaryJson) { + return SerializeToJson(TStringBuf(binaryJson.Data(), binaryJson.Size())); +} + +TString SerializeToJson(TStringBuf binaryJson) { + auto reader = TBinaryJsonReader::Make(binaryJson); + TStringStream output; + TJsonWriter writer(&output, /* formatOutput */ false); + ReadContainerToJson(reader->GetRootCursor(), writer); + writer.Flush(); + return output.Str(); +} + +namespace { + +struct TPODReader { + TPODReader(TStringBuf buffer) + : TPODReader(buffer, 0, buffer.Size()) + { + } + + TPODReader(TStringBuf buffer, ui32 start, ui32 end) + : Buffer(buffer) + , Pos(start) + , End(end) + { + Y_VERIFY_DEBUG(Pos <= End && End <= Buffer.Size()); + } + + template <typename T> + TMaybe<T> Read() { + static_assert(std::is_pod_v<T>, "TPODReader can read only POD values"); + if (Pos + sizeof(T) > End) { + return Nothing(); + } + TMaybe<T> result{ReadUnaligned<T>(Buffer.Data() + Pos)}; + Pos += sizeof(T); + return result; + } + + template <typename T> + void Skip(ui32 count) { + static_assert(std::is_pod_v<T>, "TPODReader can read only POD values"); + Pos += sizeof(T) * count; + } + + TStringBuf Buffer; + ui32 Pos; + ui32 End; +}; + +struct TBinaryJsonValidator { + TBinaryJsonValidator(TStringBuf buffer) + : Buffer(buffer) + { + } + + TMaybe<TStringBuf> ValidateWithError() && { + // Validate Header + TPODReader reader(Buffer); + const auto header = reader.Read<THeader>(); + if (!header.Defined()) { return "Missing header"sv; - } - if (header->Version != CURRENT_VERSION) { + } + if (header->Version != CURRENT_VERSION) { return "Version does not match current"sv; - } - if (header->Version > EVersion::MaxVersion) { + } + if (header->Version > EVersion::MaxVersion) { return "Invalid version"sv; - } - if (header->StringOffset >= Buffer.Size()) { + } + if (header->StringOffset >= Buffer.Size()) { return "String index offset points outside of buffer"sv; - } - StringIndexStart = header->StringOffset; - - // Validate String index - TPODReader stringReader(Buffer, /* start */ StringIndexStart, /* end */ Buffer.Size()); - const auto stringCount = stringReader.Read<ui32>(); - if (!stringCount.Defined()) { + } + StringIndexStart = header->StringOffset; + + // Validate String index + TPODReader stringReader(Buffer, /* start */ StringIndexStart, /* end */ Buffer.Size()); + const auto stringCount = stringReader.Read<ui32>(); + if (!stringCount.Defined()) { return "Missing string index size"sv; - } - StringEntryStart = StringIndexStart + sizeof(ui32); - StringDataStart = StringEntryStart + (*stringCount) * sizeof(TSEntry); - - ui32 totalLength = 0; - ui32 lastStringOffset = StringDataStart; - for (ui32 i = 0; i < *stringCount; i++) { - const auto entry = stringReader.Read<TSEntry>(); - if (!entry.Defined()) { + } + StringEntryStart = StringIndexStart + sizeof(ui32); + StringDataStart = StringEntryStart + (*stringCount) * sizeof(TSEntry); + + ui32 totalLength = 0; + ui32 lastStringOffset = StringDataStart; + for (ui32 i = 0; i < *stringCount; i++) { + const auto entry = stringReader.Read<TSEntry>(); + if (!entry.Defined()) { return "Missing entry in string index"sv; - } - if (entry->Type != EStringType::RawNullTerminated) { + } + if (entry->Type != EStringType::RawNullTerminated) { return "String entry type is invalid"sv; - } - if (lastStringOffset >= entry->Value) { + } + if (lastStringOffset >= entry->Value) { return "String entry offset points to invalid location"sv; - } - totalLength += entry->Value - lastStringOffset; - lastStringOffset = entry->Value; - } - - NumberIndexStart = StringDataStart + totalLength; - if (NumberIndexStart > Buffer.Size()) { + } + totalLength += entry->Value - lastStringOffset; + lastStringOffset = entry->Value; + } + + NumberIndexStart = StringDataStart + totalLength; + if (NumberIndexStart > Buffer.Size()) { return "Total length of strings in String index exceeds Buffer size"sv; - } - - // Validate Number index - if ((Buffer.Size() - NumberIndexStart) % sizeof(double) != 0) { + } + + // Validate Number index + if ((Buffer.Size() - NumberIndexStart) % sizeof(double) != 0) { return "Number index cannot be split into doubles"sv; - } - - TPODReader numberReader(Buffer, /* start */ NumberIndexStart, /* end */ Buffer.Size()); - TMaybe<double> current; - while (current = numberReader.Read<double>()) { - if (std::isnan(*current)) { + } + + TPODReader numberReader(Buffer, /* start */ NumberIndexStart, /* end */ Buffer.Size()); + TMaybe<double> current; + while (current = numberReader.Read<double>()) { + if (std::isnan(*current)) { return "Number index element is NaN"sv; - } - if (std::isinf(*current)) { + } + if (std::isinf(*current)) { return "Number index element is infinite"sv; - } - } - - // Validate Tree - return IsValidContainer(reader, /* depth */ 0); - } - -private: - TMaybe<TStringBuf> IsValidStringOffset(ui32 offset) { - if (offset < StringEntryStart || offset >= StringDataStart) { + } + } + + // Validate Tree + return IsValidContainer(reader, /* depth */ 0); + } + +private: + TMaybe<TStringBuf> IsValidStringOffset(ui32 offset) { + if (offset < StringEntryStart || offset >= StringDataStart) { return "String offset is out of String index entries section"sv; - } - if ((offset - StringEntryStart) % sizeof(TSEntry) != 0) { + } + if ((offset - StringEntryStart) % sizeof(TSEntry) != 0) { return "String offset does not point to the start of entry"sv; - } - return Nothing(); - } - - TMaybe<TStringBuf> IsValidEntry(TPODReader& reader, ui32 depth, bool containersAllowed = true) { - const auto entry = reader.Read<TEntry>(); - if (!entry.Defined()) { + } + return Nothing(); + } + + TMaybe<TStringBuf> IsValidEntry(TPODReader& reader, ui32 depth, bool containersAllowed = true) { + const auto entry = reader.Read<TEntry>(); + if (!entry.Defined()) { return "Missing entry"sv; - } - - switch (entry->Type) { - case EEntryType::BoolFalse: - case EEntryType::BoolTrue: - case EEntryType::Null: - // Nothing is stored in value of such entry, nothing to check - break; - case EEntryType::String: - return IsValidStringOffset(entry->Value); - case EEntryType::Number: { - const auto numberOffset = entry->Value; - if (numberOffset < NumberIndexStart || numberOffset >= Buffer.Size()) { + } + + switch (entry->Type) { + case EEntryType::BoolFalse: + case EEntryType::BoolTrue: + case EEntryType::Null: + // Nothing is stored in value of such entry, nothing to check + break; + case EEntryType::String: + return IsValidStringOffset(entry->Value); + case EEntryType::Number: { + const auto numberOffset = entry->Value; + if (numberOffset < NumberIndexStart || numberOffset >= Buffer.Size()) { return "Number offset cannot point outside of Number index"sv; - } - if ((numberOffset - NumberIndexStart) % sizeof(double) != 0) { + } + if ((numberOffset - NumberIndexStart) % sizeof(double) != 0) { return "Number offset does not point to the start of number"sv; - } - break; - } - case EEntryType::Container: { - if (!containersAllowed) { + } + break; + } + case EEntryType::Container: { + if (!containersAllowed) { return "This entry cannot be a container"sv; - } - const auto metaOffset = entry->Value; - if (metaOffset < reader.Pos) { + } + const auto metaOffset = entry->Value; + if (metaOffset < reader.Pos) { return "Offset to container cannot point before element"sv; - } - if (metaOffset >= StringIndexStart) { + } + if (metaOffset >= StringIndexStart) { return "Offset to container cannot point outside of Tree section"sv; - } - TPODReader containerReader(reader.Buffer, metaOffset, StringIndexStart); - return IsValidContainer(containerReader, depth + 1); - } - default: + } + TPODReader containerReader(reader.Buffer, metaOffset, StringIndexStart); + return IsValidContainer(containerReader, depth + 1); + } + default: return "Invalid entry type"sv; - } - - return Nothing(); - } - - TMaybe<TStringBuf> IsValidContainer(TPODReader& reader, ui32 depth) { - const auto meta = reader.Read<TMeta>(); - if (!meta.Defined()) { + } + + return Nothing(); + } + + TMaybe<TStringBuf> IsValidContainer(TPODReader& reader, ui32 depth) { + const auto meta = reader.Read<TMeta>(); + if (!meta.Defined()) { return "Missing Meta for container"sv; - } - - switch (meta->Type) { - case EContainerType::TopLevelScalar: { - if (depth > 0) { + } + + switch (meta->Type) { + case EContainerType::TopLevelScalar: { + if (depth > 0) { return "Top level scalar can be located only at the root of BinaryJson tree"sv; - } - - return IsValidEntry(reader, depth, /* containersAllowed */ false); - } - case EContainerType::Array: { - for (ui32 i = 0; i < meta->Size; i++) { - const auto error = IsValidEntry(reader, depth); - if (error.Defined()) { - return error; - } - } - break; - } - case EContainerType::Object: { - TPODReader keyReader(reader); - reader.Skip<TKeyEntry>(meta->Size); - - for (ui32 i = 0; i < meta->Size; i++) { - const auto keyOffset = keyReader.Read<TKeyEntry>(); - if (!keyOffset.Defined()) { + } + + return IsValidEntry(reader, depth, /* containersAllowed */ false); + } + case EContainerType::Array: { + for (ui32 i = 0; i < meta->Size; i++) { + const auto error = IsValidEntry(reader, depth); + if (error.Defined()) { + return error; + } + } + break; + } + case EContainerType::Object: { + TPODReader keyReader(reader); + reader.Skip<TKeyEntry>(meta->Size); + + for (ui32 i = 0; i < meta->Size; i++) { + const auto keyOffset = keyReader.Read<TKeyEntry>(); + if (!keyOffset.Defined()) { return "Cannot read key offset"sv; - } - auto error = IsValidStringOffset(*keyOffset); - if (error.Defined()) { - return error; - } - - error = IsValidEntry(reader, depth); - if (error.Defined()) { - return error; - } - } - break; - } - default: + } + auto error = IsValidStringOffset(*keyOffset); + if (error.Defined()) { + return error; + } + + error = IsValidEntry(reader, depth); + if (error.Defined()) { + return error; + } + } + break; + } + default: return "Invalid container type"sv; - } - - return Nothing(); - } - - ui32 StringIndexStart = 0; - ui32 StringEntryStart = 0; - ui32 StringDataStart = 0; - ui32 NumberIndexStart = 0; - TStringBuf Buffer; -}; - -} - -TMaybe<TStringBuf> IsValidBinaryJsonWithError(TStringBuf buffer) { - return TBinaryJsonValidator(buffer).ValidateWithError(); -} - -bool IsValidBinaryJson(TStringBuf buffer) { - return !IsValidBinaryJsonWithError(buffer).Defined(); -} - -} + } + + return Nothing(); + } + + ui32 StringIndexStart = 0; + ui32 StringEntryStart = 0; + ui32 StringDataStart = 0; + ui32 NumberIndexStart = 0; + TStringBuf Buffer; +}; + +} + +TMaybe<TStringBuf> IsValidBinaryJsonWithError(TStringBuf buffer) { + return TBinaryJsonValidator(buffer).ValidateWithError(); +} + +bool IsValidBinaryJson(TStringBuf buffer) { + return !IsValidBinaryJsonWithError(buffer).Defined(); +} + +} diff --git a/ydb/library/binary_json/read.h b/ydb/library/binary_json/read.h index 091c269ab6..7da43bcd2d 100644 --- a/ydb/library/binary_json/read.h +++ b/ydb/library/binary_json/read.h @@ -1,186 +1,186 @@ -#pragma once - -#include "format.h" - +#pragma once + +#include "format.h" + #include <ydb/library/yql/minikql/dom/node.h> - -#include <util/system/unaligned_mem.h> -#include <util/generic/ptr.h> -#include <util/generic/maybe.h> - -namespace NKikimr::NBinaryJson { - -class TContainerCursor; - -/** - * @brief Reads values inside BinaryJson. `Read...` methods of this class are not intended for direct use. - * Consider using `GetRootCursor` method to get more convenient interface over BinaryJson data - */ -class TBinaryJsonReader : public TSimpleRefCount<TBinaryJsonReader> { -public: - template <typename... Args> - static TIntrusivePtr<TBinaryJsonReader> Make(Args&&... args) { - return new TBinaryJsonReader{std::forward<Args>(args)...}; - } - - TContainerCursor GetRootCursor(); - - TMeta ReadMeta(ui32 offset) const; - - TEntry ReadEntry(ui32 offset) const; - - TKeyEntry ReadKeyEntry(ui32 offset) const; - - /** - * @brief Reads string from String index - * - * @param offset Offset to the beginning of TSEntry - */ - const TStringBuf ReadString(ui32 offset) const; - - /** - * @brief Reads number from Number index - * - * @param offset Offset to the beginning of number - */ - double ReadNumber(ui32 offset) const; - -private: - explicit TBinaryJsonReader(const TBinaryJson& buffer); - - explicit TBinaryJsonReader(TStringBuf buffer); - - template <typename T> - T ReadPOD(ui32 offset) const { - static_assert(std::is_pod_v<T>, "Type must be POD"); - Y_VERIFY_DEBUG(offset + sizeof(T) <= Buffer.Size(), "Not enough space in buffer to read value"); - return ReadUnaligned<T>(Buffer.Data() + offset); - } - - TStringBuf Buffer; - THeader Header; - ui32 TreeStart; - ui32 StringSEntryStart; - ui32 StringCount; -}; - -using TBinaryJsonReaderPtr = TIntrusivePtr<TBinaryJsonReader>; - -/** - * @brief Interface to single TEntry inside BinaryJson - */ -class TEntryCursor { -public: - TEntryCursor(TBinaryJsonReaderPtr reader, TEntry entry); - - EEntryType GetType() const; - - TContainerCursor GetContainer() const; - - TStringBuf GetString() const; - - double GetNumber() const; - -private: - TBinaryJsonReaderPtr Reader; - TEntry Entry; -}; - -/** - * @brief Iterator to walk through array elements - */ -class TArrayIterator { -public: - TArrayIterator(TBinaryJsonReaderPtr reader, ui32 startOffset, ui32 count); - - TEntryCursor Next(); - - bool HasNext() const; - -private: - TBinaryJsonReaderPtr Reader; - ui32 Offset; - ui32 EndOffset; -}; - -/** - * @brief Iterator to walk through object key-value pairs - */ -class TObjectIterator { -public: - TObjectIterator(TBinaryJsonReaderPtr reader, ui32 startOffset, ui32 count); - - std::pair<TEntryCursor, TEntryCursor> Next(); - - bool HasNext() const; - -private: - TBinaryJsonReaderPtr Reader; - ui32 KeyOffset; - ui32 ValueOffset; - ui32 ValueEndOffset; -}; - -/** - * @brief Interface to container inside BinaryJson - */ -class TContainerCursor { -public: - TContainerCursor(TBinaryJsonReaderPtr reader, ui32 startOffset); - - EContainerType GetType() const; - - /** - * @brief Get container size. Array length for arrays and count of unique keys for objects - */ - ui32 GetSize() const; - - /** - * @brief Get array element at specified index - */ - TEntryCursor GetElement(ui32 index) const; - - /** - * @brief Get iterator to array elements - */ - TArrayIterator GetArrayIterator() const; - - /** - * @brief Get value corresponding to given key in object - */ - TMaybe<TEntryCursor> Lookup(const TStringBuf key) const; - - /** - * @brief Get iterator to object key-value pairs - */ - TObjectIterator GetObjectIterator() const; - -private: - TBinaryJsonReaderPtr Reader; - ui32 StartOffset; - TMeta Meta; -}; - -NUdf::TUnboxedValue ReadContainerToJsonDom(const TContainerCursor& cursor, const NUdf::IValueBuilder* valueBuilder); - -NUdf::TUnboxedValue ReadElementToJsonDom(const TEntryCursor& cursor, const NUdf::IValueBuilder* valueBuilder); - -/** - * @brief Reads whole BinaryJson into TUnboxedValue using DOM layout from `yql/library/dom` library - */ -NUdf::TUnboxedValue ReadToJsonDom(const TBinaryJson& binaryJson, const NUdf::IValueBuilder* valueBuilder); - -NUdf::TUnboxedValue ReadToJsonDom(TStringBuf binaryJson, const NUdf::IValueBuilder* valueBuilder); - -/** - * @brief Serializes whole BinaryJson into textual JSON - */ -TString SerializeToJson(const TBinaryJson& binaryJson); - -TString SerializeToJson(TStringBuf binaryJson); - -bool IsValidBinaryJson(TStringBuf buffer); - -TMaybe<TStringBuf> IsValidBinaryJsonWithError(TStringBuf buffer); - -} + +#include <util/system/unaligned_mem.h> +#include <util/generic/ptr.h> +#include <util/generic/maybe.h> + +namespace NKikimr::NBinaryJson { + +class TContainerCursor; + +/** + * @brief Reads values inside BinaryJson. `Read...` methods of this class are not intended for direct use. + * Consider using `GetRootCursor` method to get more convenient interface over BinaryJson data + */ +class TBinaryJsonReader : public TSimpleRefCount<TBinaryJsonReader> { +public: + template <typename... Args> + static TIntrusivePtr<TBinaryJsonReader> Make(Args&&... args) { + return new TBinaryJsonReader{std::forward<Args>(args)...}; + } + + TContainerCursor GetRootCursor(); + + TMeta ReadMeta(ui32 offset) const; + + TEntry ReadEntry(ui32 offset) const; + + TKeyEntry ReadKeyEntry(ui32 offset) const; + + /** + * @brief Reads string from String index + * + * @param offset Offset to the beginning of TSEntry + */ + const TStringBuf ReadString(ui32 offset) const; + + /** + * @brief Reads number from Number index + * + * @param offset Offset to the beginning of number + */ + double ReadNumber(ui32 offset) const; + +private: + explicit TBinaryJsonReader(const TBinaryJson& buffer); + + explicit TBinaryJsonReader(TStringBuf buffer); + + template <typename T> + T ReadPOD(ui32 offset) const { + static_assert(std::is_pod_v<T>, "Type must be POD"); + Y_VERIFY_DEBUG(offset + sizeof(T) <= Buffer.Size(), "Not enough space in buffer to read value"); + return ReadUnaligned<T>(Buffer.Data() + offset); + } + + TStringBuf Buffer; + THeader Header; + ui32 TreeStart; + ui32 StringSEntryStart; + ui32 StringCount; +}; + +using TBinaryJsonReaderPtr = TIntrusivePtr<TBinaryJsonReader>; + +/** + * @brief Interface to single TEntry inside BinaryJson + */ +class TEntryCursor { +public: + TEntryCursor(TBinaryJsonReaderPtr reader, TEntry entry); + + EEntryType GetType() const; + + TContainerCursor GetContainer() const; + + TStringBuf GetString() const; + + double GetNumber() const; + +private: + TBinaryJsonReaderPtr Reader; + TEntry Entry; +}; + +/** + * @brief Iterator to walk through array elements + */ +class TArrayIterator { +public: + TArrayIterator(TBinaryJsonReaderPtr reader, ui32 startOffset, ui32 count); + + TEntryCursor Next(); + + bool HasNext() const; + +private: + TBinaryJsonReaderPtr Reader; + ui32 Offset; + ui32 EndOffset; +}; + +/** + * @brief Iterator to walk through object key-value pairs + */ +class TObjectIterator { +public: + TObjectIterator(TBinaryJsonReaderPtr reader, ui32 startOffset, ui32 count); + + std::pair<TEntryCursor, TEntryCursor> Next(); + + bool HasNext() const; + +private: + TBinaryJsonReaderPtr Reader; + ui32 KeyOffset; + ui32 ValueOffset; + ui32 ValueEndOffset; +}; + +/** + * @brief Interface to container inside BinaryJson + */ +class TContainerCursor { +public: + TContainerCursor(TBinaryJsonReaderPtr reader, ui32 startOffset); + + EContainerType GetType() const; + + /** + * @brief Get container size. Array length for arrays and count of unique keys for objects + */ + ui32 GetSize() const; + + /** + * @brief Get array element at specified index + */ + TEntryCursor GetElement(ui32 index) const; + + /** + * @brief Get iterator to array elements + */ + TArrayIterator GetArrayIterator() const; + + /** + * @brief Get value corresponding to given key in object + */ + TMaybe<TEntryCursor> Lookup(const TStringBuf key) const; + + /** + * @brief Get iterator to object key-value pairs + */ + TObjectIterator GetObjectIterator() const; + +private: + TBinaryJsonReaderPtr Reader; + ui32 StartOffset; + TMeta Meta; +}; + +NUdf::TUnboxedValue ReadContainerToJsonDom(const TContainerCursor& cursor, const NUdf::IValueBuilder* valueBuilder); + +NUdf::TUnboxedValue ReadElementToJsonDom(const TEntryCursor& cursor, const NUdf::IValueBuilder* valueBuilder); + +/** + * @brief Reads whole BinaryJson into TUnboxedValue using DOM layout from `yql/library/dom` library + */ +NUdf::TUnboxedValue ReadToJsonDom(const TBinaryJson& binaryJson, const NUdf::IValueBuilder* valueBuilder); + +NUdf::TUnboxedValue ReadToJsonDom(TStringBuf binaryJson, const NUdf::IValueBuilder* valueBuilder); + +/** + * @brief Serializes whole BinaryJson into textual JSON + */ +TString SerializeToJson(const TBinaryJson& binaryJson); + +TString SerializeToJson(TStringBuf binaryJson); + +bool IsValidBinaryJson(TStringBuf buffer); + +TMaybe<TStringBuf> IsValidBinaryJsonWithError(TStringBuf buffer); + +} diff --git a/ydb/library/binary_json/ut/container_ut.cpp b/ydb/library/binary_json/ut/container_ut.cpp index b726079817..6d98d976d0 100644 --- a/ydb/library/binary_json/ut/container_ut.cpp +++ b/ydb/library/binary_json/ut/container_ut.cpp @@ -1,210 +1,210 @@ -#include "test_base.h" - +#include "test_base.h" + #include <ydb/library/binary_json/write.h> #include <ydb/library/binary_json/read.h> - -using namespace NKikimr::NBinaryJson; - -class TBinaryJsonContainerTest : public TBinaryJsonTestBase { -public: - TBinaryJsonContainerTest() - : TBinaryJsonTestBase() - { - } - - UNIT_TEST_SUITE(TBinaryJsonContainerTest); - UNIT_TEST(TestGetType); - UNIT_TEST(TestGetSize); - UNIT_TEST(TestGetElement); - UNIT_TEST(TestArrayIterator); - UNIT_TEST(TestLookup); - UNIT_TEST(TestObjectIterator); - UNIT_TEST_SUITE_END(); - - void TestGetType() { - const TVector<std::pair<TString, EContainerType>> testCases = { - {"1", EContainerType::TopLevelScalar}, - {"\"string\"", EContainerType::TopLevelScalar}, - {"null", EContainerType::TopLevelScalar}, - {"true", EContainerType::TopLevelScalar}, - {"false", EContainerType::TopLevelScalar}, - {"[]", EContainerType::Array}, - {"[1, 2, 3, 4]", EContainerType::Array}, - {"{}", EContainerType::Object}, - {R"({"key": 1, "another": null})", EContainerType::Object}, - }; - - for (const auto testCase : testCases) { - const auto binaryJson = *SerializeToBinaryJson(testCase.first); - const auto reader = TBinaryJsonReader::Make(binaryJson); - const auto container = reader->GetRootCursor(); - - UNIT_ASSERT_VALUES_EQUAL(container.GetType(), testCase.second); - } - } - - void TestGetSize() { - const TVector<std::pair<TString, ui32>> testCases = { - {"[]", 0}, - {"{}", 0}, - {R"([1, {}, [], true, false, "string", null])", 7}, - {R"({"key": true, "another_key": 2.34})", 2}, - }; - - for (const auto testCase : testCases) { - const auto binaryJson = *SerializeToBinaryJson(testCase.first); - const auto reader = TBinaryJsonReader::Make(binaryJson); - const auto container = reader->GetRootCursor(); - - UNIT_ASSERT_VALUES_EQUAL(container.GetSize(), testCase.second); - } - } - - struct TGetElementTestCase { - TString Json; - ui32 Index; - TString Element; - }; - - void TestGetElement() { - const TVector<TGetElementTestCase> testCases = { - {R"([1, {}, [], true, false, "string", null])", 0, "1"}, - {R"([1, {}, [], true, false, "string", null])", 1, "{}"}, - {R"([1, {}, [], true, false, "string", null])", 2, "[]"}, - {R"([1, {}, [], true, false, "string", null])", 3, "true"}, - {R"([1, {}, [], true, false, "string", null])", 4, "false"}, - {R"([1, {}, [], true, false, "string", null])", 5, "\"string\""}, - {R"([1, {}, [], true, false, "string", null])", 6, "null"}, - }; - - for (const auto testCase : testCases) { - const auto binaryJson = *SerializeToBinaryJson(testCase.Json); - const auto reader = TBinaryJsonReader::Make(binaryJson); - const auto container = reader->GetRootCursor(); - const auto element = container.GetElement(testCase.Index); - - UNIT_ASSERT_VALUES_EQUAL(EntryToJsonText(element), testCase.Element); - } - } - - struct TArrayIteratorTestCase { - TString Json; - TVector<TString> Result; - }; - - void TestArrayIterator() { - const TVector<TArrayIteratorTestCase> testCases = { - {"[]", {}}, - {"[1, 2, 3, 4, 5]", {"1", "2", "3", "4", "5"}}, - {R"([1, {}, [], true, false, "string", null])", {"1", "{}", "[]", "true", "false", "\"string\"", "null"}}, - }; - - for (const auto testCase : testCases) { - const auto binaryJson = *SerializeToBinaryJson(testCase.Json); - const auto reader = TBinaryJsonReader::Make(binaryJson); - const auto container = reader->GetRootCursor(); - - TVector<TString> result; - auto it = container.GetArrayIterator(); - while (it.HasNext()) { - result.push_back(EntryToJsonText(it.Next())); - } - - UNIT_ASSERT_VALUES_EQUAL(testCase.Result.size(), result.size()); - - for (ui32 i = 0; i < result.size(); i++) { - UNIT_ASSERT_VALUES_EQUAL(result[i], testCase.Result[i]); - } - } - } - - struct TLookupTestCase { - TString Json; - TString Key; - TMaybe<TString> Result; - }; - - void TestLookup() { - const TVector<TLookupTestCase> testCases = { - {"{}", "key", Nothing()}, - {R"({"_key_": 123})", "key", Nothing()}, - {R"({"key": "another"})", "another", Nothing()}, - - {R"({"key": 123})", "key", {"123"}}, - {R"({"key": "string"})", "key", {"\"string\""}}, - {R"({"key": null})", "key", {"null"}}, - {R"({"key": true})", "key", {"true"}}, - {R"({"key": false})", "key", {"false"}}, - {R"({"key": {}})", "key", {"{}"}}, - {R"({"key": []})", "key", {"[]"}}, - - {R"({"one": 1, "two": 2, "three": 3, "four": 4})", "one", {"1"}}, - {R"({"one": 1, "two": 2, "three": 3, "four": 4})", "two", {"2"}}, - {R"({"one": 1, "two": 2, "three": 3, "four": 4})", "three", {"3"}}, - {R"({"one": 1, "two": 2, "three": 3, "four": 4})", "four", {"4"}}, - }; - - for (const auto testCase : testCases) { - const auto binaryJson = *SerializeToBinaryJson(testCase.Json); - const auto reader = TBinaryJsonReader::Make(binaryJson); - const auto container = reader->GetRootCursor(); - const auto result = container.Lookup(testCase.Key); - - UNIT_ASSERT_VALUES_EQUAL(result.Defined(), testCase.Result.Defined()); - if (result.Defined()) { - UNIT_ASSERT_VALUES_EQUAL(EntryToJsonText(*result), *testCase.Result); - } - } - } - - struct TObjectIteratorTestCase { - TString Json; - THashMap<TString, TString> Result; - }; - - void TestObjectIterator() { - const TVector<TObjectIteratorTestCase> testCases = { - {"{}", {}}, - {R"({"key": 123})", {{"key", "123"}}}, - {R"({ - "one": 123, - "two": null, - "three": false, - "four": true, - "five": "string", - "six": [], - "seven": {} - })", { - {"one", "123"}, - {"two", "null"}, - {"three", "false"}, - {"four", "true"}, - {"five", "\"string\""}, - {"six", "[]"}, - {"seven", "{}"}, - }}, - }; - - for (const auto testCase : testCases) { - const auto binaryJson = *SerializeToBinaryJson(testCase.Json); - const auto reader = TBinaryJsonReader::Make(binaryJson); - const auto container = reader->GetRootCursor(); - - THashMap<TString, TString> result; - auto it = container.GetObjectIterator(); - while (it.HasNext()) { - const auto pair = it.Next(); - result[pair.first.GetString()] = EntryToJsonText(pair.second); - } - - UNIT_ASSERT_VALUES_EQUAL(testCase.Result.size(), result.size()); - - for (const auto it : testCase.Result) { - UNIT_ASSERT(result.contains(it.first)); - UNIT_ASSERT_VALUES_EQUAL(result.at(it.first), it.second); - } - } - } -}; - -UNIT_TEST_SUITE_REGISTRATION(TBinaryJsonContainerTest); + +using namespace NKikimr::NBinaryJson; + +class TBinaryJsonContainerTest : public TBinaryJsonTestBase { +public: + TBinaryJsonContainerTest() + : TBinaryJsonTestBase() + { + } + + UNIT_TEST_SUITE(TBinaryJsonContainerTest); + UNIT_TEST(TestGetType); + UNIT_TEST(TestGetSize); + UNIT_TEST(TestGetElement); + UNIT_TEST(TestArrayIterator); + UNIT_TEST(TestLookup); + UNIT_TEST(TestObjectIterator); + UNIT_TEST_SUITE_END(); + + void TestGetType() { + const TVector<std::pair<TString, EContainerType>> testCases = { + {"1", EContainerType::TopLevelScalar}, + {"\"string\"", EContainerType::TopLevelScalar}, + {"null", EContainerType::TopLevelScalar}, + {"true", EContainerType::TopLevelScalar}, + {"false", EContainerType::TopLevelScalar}, + {"[]", EContainerType::Array}, + {"[1, 2, 3, 4]", EContainerType::Array}, + {"{}", EContainerType::Object}, + {R"({"key": 1, "another": null})", EContainerType::Object}, + }; + + for (const auto testCase : testCases) { + const auto binaryJson = *SerializeToBinaryJson(testCase.first); + const auto reader = TBinaryJsonReader::Make(binaryJson); + const auto container = reader->GetRootCursor(); + + UNIT_ASSERT_VALUES_EQUAL(container.GetType(), testCase.second); + } + } + + void TestGetSize() { + const TVector<std::pair<TString, ui32>> testCases = { + {"[]", 0}, + {"{}", 0}, + {R"([1, {}, [], true, false, "string", null])", 7}, + {R"({"key": true, "another_key": 2.34})", 2}, + }; + + for (const auto testCase : testCases) { + const auto binaryJson = *SerializeToBinaryJson(testCase.first); + const auto reader = TBinaryJsonReader::Make(binaryJson); + const auto container = reader->GetRootCursor(); + + UNIT_ASSERT_VALUES_EQUAL(container.GetSize(), testCase.second); + } + } + + struct TGetElementTestCase { + TString Json; + ui32 Index; + TString Element; + }; + + void TestGetElement() { + const TVector<TGetElementTestCase> testCases = { + {R"([1, {}, [], true, false, "string", null])", 0, "1"}, + {R"([1, {}, [], true, false, "string", null])", 1, "{}"}, + {R"([1, {}, [], true, false, "string", null])", 2, "[]"}, + {R"([1, {}, [], true, false, "string", null])", 3, "true"}, + {R"([1, {}, [], true, false, "string", null])", 4, "false"}, + {R"([1, {}, [], true, false, "string", null])", 5, "\"string\""}, + {R"([1, {}, [], true, false, "string", null])", 6, "null"}, + }; + + for (const auto testCase : testCases) { + const auto binaryJson = *SerializeToBinaryJson(testCase.Json); + const auto reader = TBinaryJsonReader::Make(binaryJson); + const auto container = reader->GetRootCursor(); + const auto element = container.GetElement(testCase.Index); + + UNIT_ASSERT_VALUES_EQUAL(EntryToJsonText(element), testCase.Element); + } + } + + struct TArrayIteratorTestCase { + TString Json; + TVector<TString> Result; + }; + + void TestArrayIterator() { + const TVector<TArrayIteratorTestCase> testCases = { + {"[]", {}}, + {"[1, 2, 3, 4, 5]", {"1", "2", "3", "4", "5"}}, + {R"([1, {}, [], true, false, "string", null])", {"1", "{}", "[]", "true", "false", "\"string\"", "null"}}, + }; + + for (const auto testCase : testCases) { + const auto binaryJson = *SerializeToBinaryJson(testCase.Json); + const auto reader = TBinaryJsonReader::Make(binaryJson); + const auto container = reader->GetRootCursor(); + + TVector<TString> result; + auto it = container.GetArrayIterator(); + while (it.HasNext()) { + result.push_back(EntryToJsonText(it.Next())); + } + + UNIT_ASSERT_VALUES_EQUAL(testCase.Result.size(), result.size()); + + for (ui32 i = 0; i < result.size(); i++) { + UNIT_ASSERT_VALUES_EQUAL(result[i], testCase.Result[i]); + } + } + } + + struct TLookupTestCase { + TString Json; + TString Key; + TMaybe<TString> Result; + }; + + void TestLookup() { + const TVector<TLookupTestCase> testCases = { + {"{}", "key", Nothing()}, + {R"({"_key_": 123})", "key", Nothing()}, + {R"({"key": "another"})", "another", Nothing()}, + + {R"({"key": 123})", "key", {"123"}}, + {R"({"key": "string"})", "key", {"\"string\""}}, + {R"({"key": null})", "key", {"null"}}, + {R"({"key": true})", "key", {"true"}}, + {R"({"key": false})", "key", {"false"}}, + {R"({"key": {}})", "key", {"{}"}}, + {R"({"key": []})", "key", {"[]"}}, + + {R"({"one": 1, "two": 2, "three": 3, "four": 4})", "one", {"1"}}, + {R"({"one": 1, "two": 2, "three": 3, "four": 4})", "two", {"2"}}, + {R"({"one": 1, "two": 2, "three": 3, "four": 4})", "three", {"3"}}, + {R"({"one": 1, "two": 2, "three": 3, "four": 4})", "four", {"4"}}, + }; + + for (const auto testCase : testCases) { + const auto binaryJson = *SerializeToBinaryJson(testCase.Json); + const auto reader = TBinaryJsonReader::Make(binaryJson); + const auto container = reader->GetRootCursor(); + const auto result = container.Lookup(testCase.Key); + + UNIT_ASSERT_VALUES_EQUAL(result.Defined(), testCase.Result.Defined()); + if (result.Defined()) { + UNIT_ASSERT_VALUES_EQUAL(EntryToJsonText(*result), *testCase.Result); + } + } + } + + struct TObjectIteratorTestCase { + TString Json; + THashMap<TString, TString> Result; + }; + + void TestObjectIterator() { + const TVector<TObjectIteratorTestCase> testCases = { + {"{}", {}}, + {R"({"key": 123})", {{"key", "123"}}}, + {R"({ + "one": 123, + "two": null, + "three": false, + "four": true, + "five": "string", + "six": [], + "seven": {} + })", { + {"one", "123"}, + {"two", "null"}, + {"three", "false"}, + {"four", "true"}, + {"five", "\"string\""}, + {"six", "[]"}, + {"seven", "{}"}, + }}, + }; + + for (const auto testCase : testCases) { + const auto binaryJson = *SerializeToBinaryJson(testCase.Json); + const auto reader = TBinaryJsonReader::Make(binaryJson); + const auto container = reader->GetRootCursor(); + + THashMap<TString, TString> result; + auto it = container.GetObjectIterator(); + while (it.HasNext()) { + const auto pair = it.Next(); + result[pair.first.GetString()] = EntryToJsonText(pair.second); + } + + UNIT_ASSERT_VALUES_EQUAL(testCase.Result.size(), result.size()); + + for (const auto it : testCase.Result) { + UNIT_ASSERT(result.contains(it.first)); + UNIT_ASSERT_VALUES_EQUAL(result.at(it.first), it.second); + } + } + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TBinaryJsonContainerTest); diff --git a/ydb/library/binary_json/ut/entry_ut.cpp b/ydb/library/binary_json/ut/entry_ut.cpp index c89534e97b..8ce69e6792 100644 --- a/ydb/library/binary_json/ut/entry_ut.cpp +++ b/ydb/library/binary_json/ut/entry_ut.cpp @@ -1,98 +1,98 @@ -#include "test_base.h" - +#include "test_base.h" + #include <ydb/library/binary_json/write.h> #include <ydb/library/binary_json/read.h> - -using namespace NKikimr::NBinaryJson; - -class TBinaryJsonEntryTest : public TBinaryJsonTestBase { -public: - TBinaryJsonEntryTest() - : TBinaryJsonTestBase() - { - } - - UNIT_TEST_SUITE(TBinaryJsonEntryTest); - UNIT_TEST(TestGetType); - UNIT_TEST(TestGetContainer); - UNIT_TEST(TestGetString); - UNIT_TEST(TestGetNumber); - UNIT_TEST_SUITE_END(); - - void TestGetType() { - const TVector<std::pair<TString, EEntryType>> testCases = { - {"1", EEntryType::Number}, - {"\"string\"", EEntryType::String}, - {"null", EEntryType::Null}, - {"true", EEntryType::BoolTrue}, - {"false", EEntryType::BoolFalse}, - {"[[]]", EEntryType::Container}, - {"[[1, 2, 3, 4]]", EEntryType::Container}, - {"[{}]", EEntryType::Container}, - {R"([{"key": 1, "another": null}])", EEntryType::Container}, - }; - - for (const auto testCase : testCases) { - const auto binaryJson = *SerializeToBinaryJson(testCase.first); - const auto reader = TBinaryJsonReader::Make(binaryJson); - const auto container = reader->GetRootCursor(); - - UNIT_ASSERT_VALUES_EQUAL(container.GetElement(0).GetType(), testCase.second); - } - } - - void TestGetContainer() { - const TVector<std::pair<TString, TString>> testCases = { - {"[[]]", "[]"}, - {"[[1.2, 3.4, 5.6]]", "[1.2,3.4,5.6]"}, - {"[{}]", "{}"}, - {R"([{"abc": 123, "def": 456}])", R"({"abc":123,"def":456})"}, - }; - - for (const auto testCase : testCases) { - const auto binaryJson = *SerializeToBinaryJson(testCase.first); - const auto reader = TBinaryJsonReader::Make(binaryJson); - const auto container = reader->GetRootCursor(); - const auto innerContainer = container.GetElement(0).GetContainer(); - - UNIT_ASSERT_VALUES_EQUAL(ContainerToJsonText(innerContainer), testCase.second); - } - } - - void TestGetString() { - const TVector<std::pair<TString, TString>> testCases = { - {R"("")", ""}, - {R"("string")", "string"}, - {R"(["string", "another", "string"])", "string"}, - }; - - for (const auto testCase : testCases) { - const auto binaryJson = *SerializeToBinaryJson(testCase.first); - const auto reader = TBinaryJsonReader::Make(binaryJson); - const auto container = reader->GetRootCursor(); - - UNIT_ASSERT_VALUES_EQUAL(container.GetElement(0).GetString(), testCase.second); - } - } - - void TestGetNumber() { - const TVector<std::pair<TString, double>> testCases = { - {"0", 0}, - {"0.1234", 0.1234}, - {"1.2345", 1.2345}, - {"-0.12345", -0.12345}, - {"-1.2345", -1.2345}, - {"[1.5, 2, 3, 1.5]", 1.5}, - }; - - for (const auto testCase : testCases) { - const auto binaryJson = *SerializeToBinaryJson(testCase.first); - const auto reader = TBinaryJsonReader::Make(binaryJson); - const auto container = reader->GetRootCursor(); - - UNIT_ASSERT_VALUES_EQUAL(container.GetElement(0).GetNumber(), testCase.second); - } - } -}; - -UNIT_TEST_SUITE_REGISTRATION(TBinaryJsonEntryTest); + +using namespace NKikimr::NBinaryJson; + +class TBinaryJsonEntryTest : public TBinaryJsonTestBase { +public: + TBinaryJsonEntryTest() + : TBinaryJsonTestBase() + { + } + + UNIT_TEST_SUITE(TBinaryJsonEntryTest); + UNIT_TEST(TestGetType); + UNIT_TEST(TestGetContainer); + UNIT_TEST(TestGetString); + UNIT_TEST(TestGetNumber); + UNIT_TEST_SUITE_END(); + + void TestGetType() { + const TVector<std::pair<TString, EEntryType>> testCases = { + {"1", EEntryType::Number}, + {"\"string\"", EEntryType::String}, + {"null", EEntryType::Null}, + {"true", EEntryType::BoolTrue}, + {"false", EEntryType::BoolFalse}, + {"[[]]", EEntryType::Container}, + {"[[1, 2, 3, 4]]", EEntryType::Container}, + {"[{}]", EEntryType::Container}, + {R"([{"key": 1, "another": null}])", EEntryType::Container}, + }; + + for (const auto testCase : testCases) { + const auto binaryJson = *SerializeToBinaryJson(testCase.first); + const auto reader = TBinaryJsonReader::Make(binaryJson); + const auto container = reader->GetRootCursor(); + + UNIT_ASSERT_VALUES_EQUAL(container.GetElement(0).GetType(), testCase.second); + } + } + + void TestGetContainer() { + const TVector<std::pair<TString, TString>> testCases = { + {"[[]]", "[]"}, + {"[[1.2, 3.4, 5.6]]", "[1.2,3.4,5.6]"}, + {"[{}]", "{}"}, + {R"([{"abc": 123, "def": 456}])", R"({"abc":123,"def":456})"}, + }; + + for (const auto testCase : testCases) { + const auto binaryJson = *SerializeToBinaryJson(testCase.first); + const auto reader = TBinaryJsonReader::Make(binaryJson); + const auto container = reader->GetRootCursor(); + const auto innerContainer = container.GetElement(0).GetContainer(); + + UNIT_ASSERT_VALUES_EQUAL(ContainerToJsonText(innerContainer), testCase.second); + } + } + + void TestGetString() { + const TVector<std::pair<TString, TString>> testCases = { + {R"("")", ""}, + {R"("string")", "string"}, + {R"(["string", "another", "string"])", "string"}, + }; + + for (const auto testCase : testCases) { + const auto binaryJson = *SerializeToBinaryJson(testCase.first); + const auto reader = TBinaryJsonReader::Make(binaryJson); + const auto container = reader->GetRootCursor(); + + UNIT_ASSERT_VALUES_EQUAL(container.GetElement(0).GetString(), testCase.second); + } + } + + void TestGetNumber() { + const TVector<std::pair<TString, double>> testCases = { + {"0", 0}, + {"0.1234", 0.1234}, + {"1.2345", 1.2345}, + {"-0.12345", -0.12345}, + {"-1.2345", -1.2345}, + {"[1.5, 2, 3, 1.5]", 1.5}, + }; + + for (const auto testCase : testCases) { + const auto binaryJson = *SerializeToBinaryJson(testCase.first); + const auto reader = TBinaryJsonReader::Make(binaryJson); + const auto container = reader->GetRootCursor(); + + UNIT_ASSERT_VALUES_EQUAL(container.GetElement(0).GetNumber(), testCase.second); + } + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TBinaryJsonEntryTest); diff --git a/ydb/library/binary_json/ut/identity_ut.cpp b/ydb/library/binary_json/ut/identity_ut.cpp index 7b4bdcc4a7..e70144e651 100644 --- a/ydb/library/binary_json/ut/identity_ut.cpp +++ b/ydb/library/binary_json/ut/identity_ut.cpp @@ -1,71 +1,71 @@ -#include "test_base.h" - +#include "test_base.h" + #include <ydb/library/binary_json/write.h> #include <ydb/library/binary_json/read.h> - + #include <ydb/library/yql/minikql/dom/json.h> - -using namespace NKikimr; - -class TBinaryJsonIdentityTest : public TBinaryJsonTestBase { -public: - TBinaryJsonIdentityTest() - : TBinaryJsonTestBase() - { - } - - UNIT_TEST_SUITE(TBinaryJsonIdentityTest); - UNIT_TEST(TestReadToJsonDom); - UNIT_TEST(TestSerializeToJson); - UNIT_TEST(TestSerializeDomToBinaryJson); - UNIT_TEST_SUITE_END(); - - const TVector<TString> TestCases = { - "false", - "true", - "null", - "\"test string\"", - "\"\"", - "1.2345", - "1", - "-23", - "0", - "0.12345", - "{}", - "{\"a\":1}", - "[]", - "[1]", - R"([{"key":[true,false,null,"first","second","second","third"]},"fourth",0.34])", - }; - - void TestReadToJsonDom() { - for (const TStringBuf json : TestCases) { - const auto binaryJson = *NBinaryJson::SerializeToBinaryJson(json); - const auto value = NBinaryJson::ReadToJsonDom(binaryJson, &ValueBuilder); - const auto jsonAfterBinaryJson = NDom::SerializeJsonDom(value); - - UNIT_ASSERT_VALUES_EQUAL(json, jsonAfterBinaryJson); - } - } - - void TestSerializeToJson() { - for (const TStringBuf json : TestCases) { - const auto binaryJson = *NBinaryJson::SerializeToBinaryJson(json); - const auto jsonAfterBinaryJson = NBinaryJson::SerializeToJson(binaryJson); - - UNIT_ASSERT_VALUES_EQUAL(json, jsonAfterBinaryJson); - } - } - - void TestSerializeDomToBinaryJson() { - for (const TStringBuf json : TestCases) { - const auto dom = NDom::TryParseJsonDom(json, &ValueBuilder); - const auto binaryJson = NBinaryJson::SerializeToBinaryJson(dom); - const auto jsonAfterBinaryJson = NBinaryJson::SerializeToJson(binaryJson); - - UNIT_ASSERT_VALUES_EQUAL(json, jsonAfterBinaryJson); - } - } -}; - -UNIT_TEST_SUITE_REGISTRATION(TBinaryJsonIdentityTest); + +using namespace NKikimr; + +class TBinaryJsonIdentityTest : public TBinaryJsonTestBase { +public: + TBinaryJsonIdentityTest() + : TBinaryJsonTestBase() + { + } + + UNIT_TEST_SUITE(TBinaryJsonIdentityTest); + UNIT_TEST(TestReadToJsonDom); + UNIT_TEST(TestSerializeToJson); + UNIT_TEST(TestSerializeDomToBinaryJson); + UNIT_TEST_SUITE_END(); + + const TVector<TString> TestCases = { + "false", + "true", + "null", + "\"test string\"", + "\"\"", + "1.2345", + "1", + "-23", + "0", + "0.12345", + "{}", + "{\"a\":1}", + "[]", + "[1]", + R"([{"key":[true,false,null,"first","second","second","third"]},"fourth",0.34])", + }; + + void TestReadToJsonDom() { + for (const TStringBuf json : TestCases) { + const auto binaryJson = *NBinaryJson::SerializeToBinaryJson(json); + const auto value = NBinaryJson::ReadToJsonDom(binaryJson, &ValueBuilder); + const auto jsonAfterBinaryJson = NDom::SerializeJsonDom(value); + + UNIT_ASSERT_VALUES_EQUAL(json, jsonAfterBinaryJson); + } + } + + void TestSerializeToJson() { + for (const TStringBuf json : TestCases) { + const auto binaryJson = *NBinaryJson::SerializeToBinaryJson(json); + const auto jsonAfterBinaryJson = NBinaryJson::SerializeToJson(binaryJson); + + UNIT_ASSERT_VALUES_EQUAL(json, jsonAfterBinaryJson); + } + } + + void TestSerializeDomToBinaryJson() { + for (const TStringBuf json : TestCases) { + const auto dom = NDom::TryParseJsonDom(json, &ValueBuilder); + const auto binaryJson = NBinaryJson::SerializeToBinaryJson(dom); + const auto jsonAfterBinaryJson = NBinaryJson::SerializeToJson(binaryJson); + + UNIT_ASSERT_VALUES_EQUAL(json, jsonAfterBinaryJson); + } + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TBinaryJsonIdentityTest); diff --git a/ydb/library/binary_json/ut/test_base.cpp b/ydb/library/binary_json/ut/test_base.cpp index df9947f36d..9ad48fbb43 100644 --- a/ydb/library/binary_json/ut/test_base.cpp +++ b/ydb/library/binary_json/ut/test_base.cpp @@ -1,28 +1,28 @@ -#include "test_base.h" - +#include "test_base.h" + #include <ydb/library/yql/minikql/dom/json.h> - -using namespace NYql::NDom; - -TBinaryJsonTestBase::TBinaryJsonTestBase() - : FunctionRegistry(CreateFunctionRegistry(CreateBuiltinRegistry())) - , Env(Alloc) - , MemInfo("Memory") - , HolderFactory(Alloc.Ref(), MemInfo, FunctionRegistry.Get()) - , ValueBuilder(HolderFactory) -{ -} - -TString TBinaryJsonTestBase::EntryToJsonText(const TEntryCursor& cursor) { - if (cursor.GetType() == EEntryType::Container) { - return ContainerToJsonText(cursor.GetContainer()); - } - - TUnboxedValue result = ReadElementToJsonDom(cursor, &ValueBuilder); - return SerializeJsonDom(result); -} - -TString TBinaryJsonTestBase::ContainerToJsonText(const TContainerCursor& cursor) { - TUnboxedValue result = ReadContainerToJsonDom(cursor, &ValueBuilder); - return SerializeJsonDom(result); -} + +using namespace NYql::NDom; + +TBinaryJsonTestBase::TBinaryJsonTestBase() + : FunctionRegistry(CreateFunctionRegistry(CreateBuiltinRegistry())) + , Env(Alloc) + , MemInfo("Memory") + , HolderFactory(Alloc.Ref(), MemInfo, FunctionRegistry.Get()) + , ValueBuilder(HolderFactory) +{ +} + +TString TBinaryJsonTestBase::EntryToJsonText(const TEntryCursor& cursor) { + if (cursor.GetType() == EEntryType::Container) { + return ContainerToJsonText(cursor.GetContainer()); + } + + TUnboxedValue result = ReadElementToJsonDom(cursor, &ValueBuilder); + return SerializeJsonDom(result); +} + +TString TBinaryJsonTestBase::ContainerToJsonText(const TContainerCursor& cursor) { + TUnboxedValue result = ReadContainerToJsonDom(cursor, &ValueBuilder); + return SerializeJsonDom(result); +} diff --git a/ydb/library/binary_json/ut/test_base.h b/ydb/library/binary_json/ut/test_base.h index 838fb33251..6f6deed423 100644 --- a/ydb/library/binary_json/ut/test_base.h +++ b/ydb/library/binary_json/ut/test_base.h @@ -1,9 +1,9 @@ -#pragma once - +#pragma once + #include <ydb/library/yql/core/issue/protos/issue_id.pb.h> #include <ydb/library/yql/minikql/jsonpath/jsonpath.h> #include <ydb/library/yql/minikql/dom/json.h> - + #include <ydb/library/yql/minikql/computation/mkql_value_builder.h> #include <ydb/library/yql/minikql/computation/mkql_computation_node_holders.h> #include <ydb/library/yql/minikql/invoke_builtins/mkql_builtins.h> @@ -11,35 +11,35 @@ #include <ydb/library/yql/minikql/mkql_function_registry.h> #include <ydb/library/yql/minikql/mkql_alloc.h> #include <ydb/library/yql/minikql/mkql_node.h> - + #include <ydb/library/binary_json/read.h> - + #include <library/cpp/json/json_reader.h> #include <library/cpp/testing/unittest/registar.h> - -#include <util/generic/yexception.h> - -using namespace NYql; -using namespace NYql::NDom; -using namespace NYql::NUdf; -using namespace NYql::NJsonPath; -using namespace NJson; -using namespace NKikimr::NMiniKQL; -using namespace NKikimr::NBinaryJson; - -class TBinaryJsonTestBase: public TTestBase { -public: - TBinaryJsonTestBase(); - - TString EntryToJsonText(const TEntryCursor& cursor); - - TString ContainerToJsonText(const TContainerCursor& cursor); - -protected: - TIntrusivePtr<IFunctionRegistry> FunctionRegistry; - TScopedAlloc Alloc; - TTypeEnvironment Env; - TMemoryUsageInfo MemInfo; - THolderFactory HolderFactory; - TDefaultValueBuilder ValueBuilder; -}; + +#include <util/generic/yexception.h> + +using namespace NYql; +using namespace NYql::NDom; +using namespace NYql::NUdf; +using namespace NYql::NJsonPath; +using namespace NJson; +using namespace NKikimr::NMiniKQL; +using namespace NKikimr::NBinaryJson; + +class TBinaryJsonTestBase: public TTestBase { +public: + TBinaryJsonTestBase(); + + TString EntryToJsonText(const TEntryCursor& cursor); + + TString ContainerToJsonText(const TContainerCursor& cursor); + +protected: + TIntrusivePtr<IFunctionRegistry> FunctionRegistry; + TScopedAlloc Alloc; + TTypeEnvironment Env; + TMemoryUsageInfo MemInfo; + THolderFactory HolderFactory; + TDefaultValueBuilder ValueBuilder; +}; diff --git a/ydb/library/binary_json/ut/valid_ut.cpp b/ydb/library/binary_json/ut/valid_ut.cpp index 8c67b7f5e1..f92b3c02c1 100644 --- a/ydb/library/binary_json/ut/valid_ut.cpp +++ b/ydb/library/binary_json/ut/valid_ut.cpp @@ -1,73 +1,73 @@ -#include "test_base.h" - +#include "test_base.h" + #include <ydb/library/binary_json/write.h> #include <ydb/library/binary_json/read.h> #include <ydb/library/binary_json/format.h> - + #include <ydb/library/yql/minikql/dom/json.h> - -#include <library/cpp/testing/unittest/registar.h> - -#include <util/random/random.h> - -using namespace NKikimr::NBinaryJson; - -class TBinaryJsonValidnessTest : public TBinaryJsonTestBase { -public: - TBinaryJsonValidnessTest() - : TBinaryJsonTestBase() - { - } - - UNIT_TEST_SUITE(TBinaryJsonValidnessTest); - UNIT_TEST(TestValidness); - UNIT_TEST(TestRandom); - UNIT_TEST(TestVersionCheck); - UNIT_TEST_SUITE_END(); - - void TestValidness() { - const TVector<TString> testCases = { - "false", - "true", - "null", - "\"test string\"", - "\"\"", - "1.2345", - "1", - "-23", - "0", - "0.12345", - "{}", - "{\"a\":1}", - "[]", - "[1]", - R"([{"key":[true,false,null,"first","second","second","third"]},"fourth",0.34])", - }; - - for (const TStringBuf json : testCases) { - const auto binaryJson = *SerializeToBinaryJson(json); - const TStringBuf buffer(binaryJson.Data(), binaryJson.Size()); - const auto error = IsValidBinaryJsonWithError(buffer); - UNIT_ASSERT_C(!error.Defined(), TStringBuilder() << "BinaryJson for '" << json << "' is invalid because of '" << *error << "'"); - } - } - - void TestRandom() { - for (ui32 i = 0; i < 1000000; i++) { - const auto fakeBinaryJson = NUnitTest::RandomString(RandomNumber<size_t>(1000)); - UNIT_ASSERT(!IsValidBinaryJson(fakeBinaryJson)); - } - } - - void TestVersionCheck() { - TBinaryJson binaryJson; - THeader header(EVersion::Draft, 0); - binaryJson.Append(reinterpret_cast<char*>(&header), sizeof(header)); - - UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { - TBinaryJsonReader::Make(binaryJson); - }(), yexception, "does not match current version"); - } -}; - -UNIT_TEST_SUITE_REGISTRATION(TBinaryJsonValidnessTest); + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/random/random.h> + +using namespace NKikimr::NBinaryJson; + +class TBinaryJsonValidnessTest : public TBinaryJsonTestBase { +public: + TBinaryJsonValidnessTest() + : TBinaryJsonTestBase() + { + } + + UNIT_TEST_SUITE(TBinaryJsonValidnessTest); + UNIT_TEST(TestValidness); + UNIT_TEST(TestRandom); + UNIT_TEST(TestVersionCheck); + UNIT_TEST_SUITE_END(); + + void TestValidness() { + const TVector<TString> testCases = { + "false", + "true", + "null", + "\"test string\"", + "\"\"", + "1.2345", + "1", + "-23", + "0", + "0.12345", + "{}", + "{\"a\":1}", + "[]", + "[1]", + R"([{"key":[true,false,null,"first","second","second","third"]},"fourth",0.34])", + }; + + for (const TStringBuf json : testCases) { + const auto binaryJson = *SerializeToBinaryJson(json); + const TStringBuf buffer(binaryJson.Data(), binaryJson.Size()); + const auto error = IsValidBinaryJsonWithError(buffer); + UNIT_ASSERT_C(!error.Defined(), TStringBuilder() << "BinaryJson for '" << json << "' is invalid because of '" << *error << "'"); + } + } + + void TestRandom() { + for (ui32 i = 0; i < 1000000; i++) { + const auto fakeBinaryJson = NUnitTest::RandomString(RandomNumber<size_t>(1000)); + UNIT_ASSERT(!IsValidBinaryJson(fakeBinaryJson)); + } + } + + void TestVersionCheck() { + TBinaryJson binaryJson; + THeader header(EVersion::Draft, 0); + binaryJson.Append(reinterpret_cast<char*>(&header), sizeof(header)); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + TBinaryJsonReader::Make(binaryJson); + }(), yexception, "does not match current version"); + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TBinaryJsonValidnessTest); diff --git a/ydb/library/binary_json/ut/ya.make b/ydb/library/binary_json/ut/ya.make index 1d4cf35311..7d9bbc9df3 100644 --- a/ydb/library/binary_json/ut/ya.make +++ b/ydb/library/binary_json/ut/ya.make @@ -1,15 +1,15 @@ UNITTEST_FOR(ydb/library/binary_json) - -OWNER(g:kikimr) - -SRCS( - container_ut.cpp - identity_ut.cpp - entry_ut.cpp - test_base.cpp - valid_ut.cpp -) - + +OWNER(g:kikimr) + +SRCS( + container_ut.cpp + identity_ut.cpp + entry_ut.cpp + test_base.cpp + valid_ut.cpp +) + IF (SANITIZER_TYPE == "thread" OR WITH_VALGRIND) TIMEOUT(2400) SPLIT_FACTOR(20) @@ -20,7 +20,7 @@ ELSE() SIZE(MEDIUM) ENDIF() -PEERDIR( +PEERDIR( ydb/library/binary_json ydb/library/yql/minikql ydb/library/yql/minikql/computation @@ -28,8 +28,8 @@ PEERDIR( ydb/library/yql/minikql/invoke_builtins ydb/library/yql/public/udf/service/exception_policy ydb/library/yql/core/issue/protos -) - +) + YQL_LAST_ABI_VERSION() -END() +END() diff --git a/ydb/library/binary_json/write.cpp b/ydb/library/binary_json/write.cpp index 0aee36f5d3..570d7ba5b5 100644 --- a/ydb/library/binary_json/write.cpp +++ b/ydb/library/binary_json/write.cpp @@ -1,570 +1,570 @@ -#include "write.h" - +#include "write.h" + #include <library/cpp/json/json_reader.h> - -#include <util/generic/vector.h> -#include <util/generic/stack.h> -#include <util/generic/set.h> -#include <util/generic/algorithm.h> -#include <util/generic/map.h> - -#include <cmath> - -namespace NKikimr::NBinaryJson { - -/** - * Serialization is done in 2 steps: - * 1. Parse textual JSON into TJsonIndex - * 2. Serialze TJsonIndex into TBinaryJson - * - * During the first step we: - * 1. Intern all strings found in JSON (both keys of objects and string values) - * 2. Intern all numbers found in JSON - * 3. Record JSON structure using sequence of TContainer. During this steps we store - * indices instead of offsets inside TEntry - * - * During the second step we: - * 1. Write Header - * 2. Write all interned strings into String index - * 3. Write all interned numbers into Number index - * 4. Serialize sequence of TContainer into Tree section. During this step we also - * replace all indices inside TEntry with actual offsets - */ - -using namespace NJson; -using namespace NYql::NDom; - -namespace { - -struct TContainer { - TContainer(EContainerType type) - : Type(type) - { - } - - EContainerType Type; - TVector<TEntry> Header; -}; - -/** - * @brief Intermediate representation of textual JSON convenient for serialization into BinaryJson. Each string and number - * is assigned to unique index and stored in THashMap. JSON structure is stored as a sequence of TContainer - * - * Let us consider and example of how we store JSON structure. Given JSON: - * ``` - * { - * "array": [1, 2] - * } - * ``` - * - * It will be stored as following sequence of TContainer: - * ``` - * TContainer (Object, size 2) - * TEntry (String, "array") - * TEntry (Container, index 1) - * TContainer (Array, size 2) - * TEntry (Number, 1) - * TEntry (Number, 2) - * ``` - * - * Note that we store containers in a flat manner. Array is not nested inside object container, it is referenced by - * container index instead. This is exactly how containers are stored in serialized BinaryJson (but with offsets instead of indices) - */ -struct TJsonIndex { - ui32 InternKey(const TStringBuf value) { - TotalKeysCount++; - - const auto it = Keys.find(value); - if (it == Keys.end()) { - const ui32 currentIndex = LastFreeStringIndex++; - Keys[TString(value)] = currentIndex; - TotalKeyLength += value.length() + 1; - return currentIndex; - } else { - return it->second; - } - } - - ui32 InternString(const TStringBuf value) { - const auto it = Strings.find(value); - if (it == Strings.end()) { - const ui32 currentIndex = LastFreeStringIndex++; - Strings[value] = currentIndex; - TotalStringLength += value.length() + 1; - return currentIndex; - } else { - return it->second; - } - } - - ui32 InternNumber(double value) { - const auto it = Numbers.find(value); - if (it == Numbers.end()) { - const ui32 currentIndex = LastFreeNumberIndex++; - Numbers[value] = currentIndex; - return currentIndex; - } else { - return it->second; - } - } - - void AddContainer(EContainerType type) { - Containers.emplace_back(type); - const ui32 index = Containers.size() - 1; - if (!ContainerIndex.empty()) { - // Add new container to parent container - AddEntry(TEntry(EEntryType::Container, index)); - } - ContainerIndex.push(index); - } - - void RemoveContainer() { - ContainerIndex.pop(); - } - - void AddEntry(TEntry entry, bool createTopLevel = false) { - if (createTopLevel && ContainerIndex.empty()) { - AddContainer(EContainerType::TopLevelScalar); - } - Containers[ContainerIndex.top()].Header.push_back(entry); - TotalEntriesCount++; - } - - TStack<ui32> ContainerIndex; - TVector<TContainer> Containers; - - TMap<TString, ui32> Keys; - ui32 TotalKeyLength = 0; - ui32 TotalKeysCount = 0; - - THashMap<TString, ui32> Strings; - ui32 LastFreeStringIndex = 0; - ui32 TotalStringLength = 0; - - THashMap<double, ui32> Numbers; - ui32 LastFreeNumberIndex = 0; - - ui32 TotalEntriesCount = 0; -}; - -/** - * @brief Convenient interface to write POD datastructures into buffer - */ -struct TPODWriter { - TBinaryJson& Buffer; - ui32 Offset; - - TPODWriter(TBinaryJson& buffer, ui32 offset) - : Buffer(buffer) - , Offset(offset) - { - } - - template <typename T> - void Write(const T& value) { - Y_VERIFY_DEBUG(Offset + sizeof(T) <= Buffer.Size()); - MemCopy(Buffer.Data() + Offset, reinterpret_cast<const char*>(&value), sizeof(T)); - Offset += sizeof(T); - } - - void Write(const char* source, ui32 length) { - Y_VERIFY_DEBUG(Offset + length <= Buffer.Size()); - MemCopy(Buffer.Data() + Offset, source, length); - Offset += length; - } - - template <typename T> - void Skip(ui32 count) { - Y_VERIFY_DEBUG(Offset + count * sizeof(T) <= Buffer.Size()); - Offset += count * sizeof(T); - } -}; - -/** - * @brief Serializes TJsonIndex into BinaryJson buffer - */ -class TBinaryJsonSerializer { -public: - TBinaryJsonSerializer(TJsonIndex&& json) - : Json(std::move(json)) - { - } - - /** - * @brief Performs actual serialization - * - * BinaryJson structure: - * +--------+------+--------------+--------------+ - * | Header | Tree | String index | Number index | - * +--------+------+--------------+--------------+ - * - * Serialization consists of the following steps: - * 1. Reserve memory for the whole BinaryJson in 1 allocation - * 2. Write Header - * 3. Write String index and record offsets to all strings - * 4. Write Number index and record offsets to all numbers - * 5. Write Tree and replace all indices to strings and numbers with actual offsets - */ - TBinaryJson Serialize() && { - // Header consists only of THeader - const ui32 headerSize = sizeof(THeader); - // Each container consists of 1 TMeta and multiple TEntry. Objects also have multiple TKeyEntry - const ui32 keysSize = Json.TotalKeysCount * sizeof(TKeyEntry); - const ui32 entriesSize = (Json.TotalEntriesCount - Json.TotalKeysCount) * sizeof(TEntry); - const ui32 treeSize = Json.Containers.size() * sizeof(TMeta) + entriesSize + keysSize; - - // String index consists of Count and TSEntry/string body pair for each string - const ui32 stringIndexSize = sizeof(ui32) + (Json.Strings.size() + Json.Keys.size()) * sizeof(TSEntry) + (Json.TotalStringLength + Json.TotalKeyLength); - // Number index consists of multiple doubles - const ui32 numberIndexSize = Json.Numbers.size() * sizeof(double); - - // Allocate space for all sections - const ui32 totalSize = headerSize + treeSize + stringIndexSize + numberIndexSize; - Buffer.Advance(totalSize); - - TPODWriter writer(Buffer, 0); - - // Write Header - const ui32 stringIndexStart = headerSize + treeSize; - writer.Write(THeader(CURRENT_VERSION, stringIndexStart)); - - // To get offsets to index elements we first need to write String index and Number index. - // We save current position for later use and skip Tree for now - TPODWriter treeWriter(writer); - writer.Skip<char>(treeSize); - - // Write String index and record offsets to all strings written - WriteStringIndex(writer); - - // Write Number index and record offsets to all numbers written - WriteNumberIndex(writer); - - // Write Tree - WriteContainer(treeWriter, 0); - - return std::move(Buffer); - } - -private: - /** - * @brief Writes container and all its children recursively - */ - void WriteContainer(TPODWriter& valueWriter, ui32 index) { - Y_VERIFY_DEBUG(index < Json.Containers.size()); - const auto& container = Json.Containers[index]; - - switch (container.Type) { - case EContainerType::Array: - case EContainerType::TopLevelScalar: - WriteArray(valueWriter, container); - break; - - case EContainerType::Object: - WriteObject(valueWriter, container); - break; - }; - } - - /** - * @brief Writes array and all its children recursively - * - * Structure: - * +------+---------+-----+------------+ - * | Meta | Entry 1 | ... | Entry Size | - * +------+---------+-----+------------+ - */ - void WriteArray(TPODWriter& valueWriter, const TContainer& container) { - const ui32 size = container.Header.size(); - valueWriter.Write(TMeta(container.Type, size)); - - TPODWriter entryWriter(valueWriter); - valueWriter.Skip<TEntry>(size); - - for (const auto entry : container.Header) { - WriteValue(entry, entryWriter, valueWriter); - } - } - - /** - * @brief Writes object and all its children recursively - * - * Structure: - * +------+------------+-----+---------------+---------+-----+------------+ - * | Meta | KeyEntry 1 | ... | KeyEntry Size | Entry 1 | ... | Entry Size | - * +------+------------+-----+---------------+---------+-----+------------+ - */ - void WriteObject(TPODWriter& valueWriter, const TContainer& container) { - const ui32 entriesCount = container.Header.size(); - const ui32 size = entriesCount / 2; - valueWriter.Write(TMeta(container.Type, size)); - - TVector<std::pair<TKeyEntry, TEntry>> keyValuePairs; - keyValuePairs.reserve(size); - for (ui32 i = 0; i < entriesCount; i += 2) { - const auto keyIndex = container.Header[i].Value; - const auto keyOffset = StringOffsets[keyIndex]; - const auto& value = container.Header[i + 1]; - keyValuePairs.emplace_back(TKeyEntry(keyOffset), value); - } - - // We need to sort all elements by key before writing them to buffer. - // All keys are already sorted in Key index so we can just compare - // offsets to them instead of actual keys - SortBy(keyValuePairs, [](const auto& pair) { return pair.first; }); - - TPODWriter keyWriter(valueWriter); - valueWriter.Skip<TKeyEntry>(size); - - TPODWriter entryWriter(valueWriter); - valueWriter.Skip<TEntry>(size); - - for (const auto& pair : keyValuePairs) { - keyWriter.Write(pair.first); - WriteValue(pair.second, entryWriter, valueWriter); - } - } - - void WriteValue(TEntry entry, TPODWriter& entryWriter, TPODWriter& valueWriter) { - TEntry result = entry; - - if (entry.Type == EEntryType::Container) { - const ui32 childIndex = entry.Value; - result.Value = valueWriter.Offset; - WriteContainer(valueWriter, childIndex); - } else if (entry.Type == EEntryType::String) { - const ui32 stringIndex = entry.Value; - result.Value = StringOffsets[stringIndex]; - } else if (entry.Type == EEntryType::Number) { - const ui32 numberIndex = entry.Value; - result.Value = NumberOffsets[numberIndex]; - } - - entryWriter.Write(result); - } - - /** - * @brief Writes String index and returns offsets to all strings - * - * Structure: - * +----------------+----------+-----+--------------+---------+-----+-------------+ - * | Count, 32 bits | SEntry 1 | ... | SEntry Count | SData 1 | ... | SData Count | - * +----------------+----------+-----+--------------+---------+-----+-------------+ - */ - void WriteStringIndex(TPODWriter& writer) { - const ui32 stringCount = Json.Keys.size() + Json.Strings.size(); - writer.Write(stringCount); - - TPODWriter entryWriter(writer); - writer.Skip<TSEntry>(stringCount); - - // Write SData and SEntry for each string - StringOffsets.resize(stringCount); - - for (const auto& it : Json.Keys) { - const auto& currentString = it.first; - const auto currentIndex = it.second; - - StringOffsets[currentIndex] = entryWriter.Offset; - - // Append SData to the end of the buffer - writer.Write(currentString.data(), currentString.length()); - writer.Write("\0", 1); - - // Rewrite SEntry in string index - entryWriter.Write(TSEntry(EStringType::RawNullTerminated, writer.Offset)); - } - - for (const auto& it : Json.Strings) { - const auto& currentString = it.first; - const auto currentIndex = it.second; - - StringOffsets[currentIndex] = entryWriter.Offset; - - // Append SData to the end of the buffer - writer.Write(currentString.data(), currentString.length()); - writer.Write("\0", 1); - - // Rewrite SEntry in string index - entryWriter.Write(TSEntry(EStringType::RawNullTerminated, writer.Offset)); - } - } - - /** - * @brief Writes Number index and returns offsets to all numbers - * - * Structure: - * +----------+-----+----------+ - * | double 1 | ... | double N | - * +----------+-----+----------+ - */ - void WriteNumberIndex(TPODWriter& writer) { - const ui32 numberCount = Json.Numbers.size(); - - NumberOffsets.resize(numberCount); - for (const auto it : Json.Numbers) { - NumberOffsets[it.second] = writer.Offset; - writer.Write(it.first); - } - } - - TJsonIndex Json; - TBinaryJson Buffer; - TVector<ui32> StringOffsets; - TVector<ui32> NumberOffsets; -}; - -/** - * @brief Callbacks for textual JSON parser. Essentially wrapper around TJsonIndex methods - */ -class TBinaryJsonCallbacks : public TJsonCallbacks { -public: - TBinaryJsonCallbacks(bool throwException) - : TJsonCallbacks(/* throwException */ throwException) - { - } - - bool OnNull() override { - Json.AddEntry(TEntry(EEntryType::Null), /* createTopLevel */ true); - return true; - } - - bool OnBoolean(bool value) override { - auto type = EEntryType::BoolFalse; - if (value) { - type = EEntryType::BoolTrue; - } - Json.AddEntry(TEntry(type), /* createTopLevel */ true); - return true; - } - - bool OnInteger(long long value) override { - Json.AddEntry(TEntry(EEntryType::Number, Json.InternNumber(static_cast<double>(value))), /* createTopLevel */ true); - return true; - } - - bool OnUInteger(unsigned long long value) override { - Json.AddEntry(TEntry(EEntryType::Number, Json.InternNumber(static_cast<double>(value))), /* createTopLevel */ true); - return true; - } - - bool OnDouble(double value) override { - if (Y_UNLIKELY(std::isinf(value))) { - ythrow yexception() << "JSON number is infinite"; - } - Json.AddEntry(TEntry(EEntryType::Number, Json.InternNumber(value)), /* createTopLevel */ true); - return true; - } - - bool OnString(const TStringBuf& value) override { - Json.AddEntry(TEntry(EEntryType::String, Json.InternString(value)), /* createTopLevel */ true); - return true; - } - - bool OnOpenMap() override { - Json.AddContainer(EContainerType::Object); - return true; - } - - bool OnMapKey(const TStringBuf& value) override { - Json.AddEntry(TEntry(EEntryType::String, Json.InternKey(value))); - return true; - } - - bool OnCloseMap() override { - Json.RemoveContainer(); - return true; - } - - bool OnOpenArray() override { - Json.AddContainer(EContainerType::Array); - return true; - } - - bool OnCloseArray() override { - Json.RemoveContainer(); - return true; - } - - TJsonIndex GetResult() && { - return std::move(Json); - } - -private: - TJsonIndex Json; -}; - -void DomToJsonIndex(const NUdf::TUnboxedValue& value, TBinaryJsonCallbacks& callbacks) { - switch (GetNodeType(value)) { - case ENodeType::String: - callbacks.OnString(value.AsStringRef()); - break; - case ENodeType::Bool: - callbacks.OnBoolean(value.Get<bool>()); - break; - case ENodeType::Int64: - callbacks.OnInteger(value.Get<i64>()); - break; - case ENodeType::Uint64: - callbacks.OnUInteger(value.Get<ui64>()); - break; - case ENodeType::Double: - callbacks.OnDouble(value.Get<double>()); - break; - case ENodeType::Entity: - callbacks.OnNull(); - break; - case ENodeType::List: { - callbacks.OnOpenArray(); - - if (value.IsBoxed()) { - const auto it = value.GetListIterator(); - TUnboxedValue current; - while (it.Next(current)) { - DomToJsonIndex(current, callbacks); - } - } - - callbacks.OnCloseArray(); - break; - } - case ENodeType::Dict: - case ENodeType::Attr: { - callbacks.OnOpenMap(); - - if (value.IsBoxed()) { - const auto it = value.GetDictIterator(); - TUnboxedValue key; - TUnboxedValue value; - while (it.NextPair(key, value)) { - callbacks.OnMapKey(key.AsStringRef()); - DomToJsonIndex(value, callbacks); - } - } - - callbacks.OnCloseMap(); - break; - } - } -} - -} - -TMaybe<TBinaryJson> SerializeToBinaryJson(const TStringBuf json) { - TMemoryInput input(json.data(), json.size()); - TBinaryJsonCallbacks callbacks(/* throwException */ false); - if (!ReadJson(&input, &callbacks)) { - return Nothing(); - } - - TBinaryJsonSerializer serializer(std::move(callbacks).GetResult()); - return std::move(serializer).Serialize(); -} - -TBinaryJson SerializeToBinaryJson(const NUdf::TUnboxedValue& value) { - TBinaryJsonCallbacks callbacks(/* throwException */ false); - DomToJsonIndex(value, callbacks); - TBinaryJsonSerializer serializer(std::move(callbacks).GetResult()); - return std::move(serializer).Serialize(); -} - -}
\ No newline at end of file + +#include <util/generic/vector.h> +#include <util/generic/stack.h> +#include <util/generic/set.h> +#include <util/generic/algorithm.h> +#include <util/generic/map.h> + +#include <cmath> + +namespace NKikimr::NBinaryJson { + +/** + * Serialization is done in 2 steps: + * 1. Parse textual JSON into TJsonIndex + * 2. Serialze TJsonIndex into TBinaryJson + * + * During the first step we: + * 1. Intern all strings found in JSON (both keys of objects and string values) + * 2. Intern all numbers found in JSON + * 3. Record JSON structure using sequence of TContainer. During this steps we store + * indices instead of offsets inside TEntry + * + * During the second step we: + * 1. Write Header + * 2. Write all interned strings into String index + * 3. Write all interned numbers into Number index + * 4. Serialize sequence of TContainer into Tree section. During this step we also + * replace all indices inside TEntry with actual offsets + */ + +using namespace NJson; +using namespace NYql::NDom; + +namespace { + +struct TContainer { + TContainer(EContainerType type) + : Type(type) + { + } + + EContainerType Type; + TVector<TEntry> Header; +}; + +/** + * @brief Intermediate representation of textual JSON convenient for serialization into BinaryJson. Each string and number + * is assigned to unique index and stored in THashMap. JSON structure is stored as a sequence of TContainer + * + * Let us consider and example of how we store JSON structure. Given JSON: + * ``` + * { + * "array": [1, 2] + * } + * ``` + * + * It will be stored as following sequence of TContainer: + * ``` + * TContainer (Object, size 2) + * TEntry (String, "array") + * TEntry (Container, index 1) + * TContainer (Array, size 2) + * TEntry (Number, 1) + * TEntry (Number, 2) + * ``` + * + * Note that we store containers in a flat manner. Array is not nested inside object container, it is referenced by + * container index instead. This is exactly how containers are stored in serialized BinaryJson (but with offsets instead of indices) + */ +struct TJsonIndex { + ui32 InternKey(const TStringBuf value) { + TotalKeysCount++; + + const auto it = Keys.find(value); + if (it == Keys.end()) { + const ui32 currentIndex = LastFreeStringIndex++; + Keys[TString(value)] = currentIndex; + TotalKeyLength += value.length() + 1; + return currentIndex; + } else { + return it->second; + } + } + + ui32 InternString(const TStringBuf value) { + const auto it = Strings.find(value); + if (it == Strings.end()) { + const ui32 currentIndex = LastFreeStringIndex++; + Strings[value] = currentIndex; + TotalStringLength += value.length() + 1; + return currentIndex; + } else { + return it->second; + } + } + + ui32 InternNumber(double value) { + const auto it = Numbers.find(value); + if (it == Numbers.end()) { + const ui32 currentIndex = LastFreeNumberIndex++; + Numbers[value] = currentIndex; + return currentIndex; + } else { + return it->second; + } + } + + void AddContainer(EContainerType type) { + Containers.emplace_back(type); + const ui32 index = Containers.size() - 1; + if (!ContainerIndex.empty()) { + // Add new container to parent container + AddEntry(TEntry(EEntryType::Container, index)); + } + ContainerIndex.push(index); + } + + void RemoveContainer() { + ContainerIndex.pop(); + } + + void AddEntry(TEntry entry, bool createTopLevel = false) { + if (createTopLevel && ContainerIndex.empty()) { + AddContainer(EContainerType::TopLevelScalar); + } + Containers[ContainerIndex.top()].Header.push_back(entry); + TotalEntriesCount++; + } + + TStack<ui32> ContainerIndex; + TVector<TContainer> Containers; + + TMap<TString, ui32> Keys; + ui32 TotalKeyLength = 0; + ui32 TotalKeysCount = 0; + + THashMap<TString, ui32> Strings; + ui32 LastFreeStringIndex = 0; + ui32 TotalStringLength = 0; + + THashMap<double, ui32> Numbers; + ui32 LastFreeNumberIndex = 0; + + ui32 TotalEntriesCount = 0; +}; + +/** + * @brief Convenient interface to write POD datastructures into buffer + */ +struct TPODWriter { + TBinaryJson& Buffer; + ui32 Offset; + + TPODWriter(TBinaryJson& buffer, ui32 offset) + : Buffer(buffer) + , Offset(offset) + { + } + + template <typename T> + void Write(const T& value) { + Y_VERIFY_DEBUG(Offset + sizeof(T) <= Buffer.Size()); + MemCopy(Buffer.Data() + Offset, reinterpret_cast<const char*>(&value), sizeof(T)); + Offset += sizeof(T); + } + + void Write(const char* source, ui32 length) { + Y_VERIFY_DEBUG(Offset + length <= Buffer.Size()); + MemCopy(Buffer.Data() + Offset, source, length); + Offset += length; + } + + template <typename T> + void Skip(ui32 count) { + Y_VERIFY_DEBUG(Offset + count * sizeof(T) <= Buffer.Size()); + Offset += count * sizeof(T); + } +}; + +/** + * @brief Serializes TJsonIndex into BinaryJson buffer + */ +class TBinaryJsonSerializer { +public: + TBinaryJsonSerializer(TJsonIndex&& json) + : Json(std::move(json)) + { + } + + /** + * @brief Performs actual serialization + * + * BinaryJson structure: + * +--------+------+--------------+--------------+ + * | Header | Tree | String index | Number index | + * +--------+------+--------------+--------------+ + * + * Serialization consists of the following steps: + * 1. Reserve memory for the whole BinaryJson in 1 allocation + * 2. Write Header + * 3. Write String index and record offsets to all strings + * 4. Write Number index and record offsets to all numbers + * 5. Write Tree and replace all indices to strings and numbers with actual offsets + */ + TBinaryJson Serialize() && { + // Header consists only of THeader + const ui32 headerSize = sizeof(THeader); + // Each container consists of 1 TMeta and multiple TEntry. Objects also have multiple TKeyEntry + const ui32 keysSize = Json.TotalKeysCount * sizeof(TKeyEntry); + const ui32 entriesSize = (Json.TotalEntriesCount - Json.TotalKeysCount) * sizeof(TEntry); + const ui32 treeSize = Json.Containers.size() * sizeof(TMeta) + entriesSize + keysSize; + + // String index consists of Count and TSEntry/string body pair for each string + const ui32 stringIndexSize = sizeof(ui32) + (Json.Strings.size() + Json.Keys.size()) * sizeof(TSEntry) + (Json.TotalStringLength + Json.TotalKeyLength); + // Number index consists of multiple doubles + const ui32 numberIndexSize = Json.Numbers.size() * sizeof(double); + + // Allocate space for all sections + const ui32 totalSize = headerSize + treeSize + stringIndexSize + numberIndexSize; + Buffer.Advance(totalSize); + + TPODWriter writer(Buffer, 0); + + // Write Header + const ui32 stringIndexStart = headerSize + treeSize; + writer.Write(THeader(CURRENT_VERSION, stringIndexStart)); + + // To get offsets to index elements we first need to write String index and Number index. + // We save current position for later use and skip Tree for now + TPODWriter treeWriter(writer); + writer.Skip<char>(treeSize); + + // Write String index and record offsets to all strings written + WriteStringIndex(writer); + + // Write Number index and record offsets to all numbers written + WriteNumberIndex(writer); + + // Write Tree + WriteContainer(treeWriter, 0); + + return std::move(Buffer); + } + +private: + /** + * @brief Writes container and all its children recursively + */ + void WriteContainer(TPODWriter& valueWriter, ui32 index) { + Y_VERIFY_DEBUG(index < Json.Containers.size()); + const auto& container = Json.Containers[index]; + + switch (container.Type) { + case EContainerType::Array: + case EContainerType::TopLevelScalar: + WriteArray(valueWriter, container); + break; + + case EContainerType::Object: + WriteObject(valueWriter, container); + break; + }; + } + + /** + * @brief Writes array and all its children recursively + * + * Structure: + * +------+---------+-----+------------+ + * | Meta | Entry 1 | ... | Entry Size | + * +------+---------+-----+------------+ + */ + void WriteArray(TPODWriter& valueWriter, const TContainer& container) { + const ui32 size = container.Header.size(); + valueWriter.Write(TMeta(container.Type, size)); + + TPODWriter entryWriter(valueWriter); + valueWriter.Skip<TEntry>(size); + + for (const auto entry : container.Header) { + WriteValue(entry, entryWriter, valueWriter); + } + } + + /** + * @brief Writes object and all its children recursively + * + * Structure: + * +------+------------+-----+---------------+---------+-----+------------+ + * | Meta | KeyEntry 1 | ... | KeyEntry Size | Entry 1 | ... | Entry Size | + * +------+------------+-----+---------------+---------+-----+------------+ + */ + void WriteObject(TPODWriter& valueWriter, const TContainer& container) { + const ui32 entriesCount = container.Header.size(); + const ui32 size = entriesCount / 2; + valueWriter.Write(TMeta(container.Type, size)); + + TVector<std::pair<TKeyEntry, TEntry>> keyValuePairs; + keyValuePairs.reserve(size); + for (ui32 i = 0; i < entriesCount; i += 2) { + const auto keyIndex = container.Header[i].Value; + const auto keyOffset = StringOffsets[keyIndex]; + const auto& value = container.Header[i + 1]; + keyValuePairs.emplace_back(TKeyEntry(keyOffset), value); + } + + // We need to sort all elements by key before writing them to buffer. + // All keys are already sorted in Key index so we can just compare + // offsets to them instead of actual keys + SortBy(keyValuePairs, [](const auto& pair) { return pair.first; }); + + TPODWriter keyWriter(valueWriter); + valueWriter.Skip<TKeyEntry>(size); + + TPODWriter entryWriter(valueWriter); + valueWriter.Skip<TEntry>(size); + + for (const auto& pair : keyValuePairs) { + keyWriter.Write(pair.first); + WriteValue(pair.second, entryWriter, valueWriter); + } + } + + void WriteValue(TEntry entry, TPODWriter& entryWriter, TPODWriter& valueWriter) { + TEntry result = entry; + + if (entry.Type == EEntryType::Container) { + const ui32 childIndex = entry.Value; + result.Value = valueWriter.Offset; + WriteContainer(valueWriter, childIndex); + } else if (entry.Type == EEntryType::String) { + const ui32 stringIndex = entry.Value; + result.Value = StringOffsets[stringIndex]; + } else if (entry.Type == EEntryType::Number) { + const ui32 numberIndex = entry.Value; + result.Value = NumberOffsets[numberIndex]; + } + + entryWriter.Write(result); + } + + /** + * @brief Writes String index and returns offsets to all strings + * + * Structure: + * +----------------+----------+-----+--------------+---------+-----+-------------+ + * | Count, 32 bits | SEntry 1 | ... | SEntry Count | SData 1 | ... | SData Count | + * +----------------+----------+-----+--------------+---------+-----+-------------+ + */ + void WriteStringIndex(TPODWriter& writer) { + const ui32 stringCount = Json.Keys.size() + Json.Strings.size(); + writer.Write(stringCount); + + TPODWriter entryWriter(writer); + writer.Skip<TSEntry>(stringCount); + + // Write SData and SEntry for each string + StringOffsets.resize(stringCount); + + for (const auto& it : Json.Keys) { + const auto& currentString = it.first; + const auto currentIndex = it.second; + + StringOffsets[currentIndex] = entryWriter.Offset; + + // Append SData to the end of the buffer + writer.Write(currentString.data(), currentString.length()); + writer.Write("\0", 1); + + // Rewrite SEntry in string index + entryWriter.Write(TSEntry(EStringType::RawNullTerminated, writer.Offset)); + } + + for (const auto& it : Json.Strings) { + const auto& currentString = it.first; + const auto currentIndex = it.second; + + StringOffsets[currentIndex] = entryWriter.Offset; + + // Append SData to the end of the buffer + writer.Write(currentString.data(), currentString.length()); + writer.Write("\0", 1); + + // Rewrite SEntry in string index + entryWriter.Write(TSEntry(EStringType::RawNullTerminated, writer.Offset)); + } + } + + /** + * @brief Writes Number index and returns offsets to all numbers + * + * Structure: + * +----------+-----+----------+ + * | double 1 | ... | double N | + * +----------+-----+----------+ + */ + void WriteNumberIndex(TPODWriter& writer) { + const ui32 numberCount = Json.Numbers.size(); + + NumberOffsets.resize(numberCount); + for (const auto it : Json.Numbers) { + NumberOffsets[it.second] = writer.Offset; + writer.Write(it.first); + } + } + + TJsonIndex Json; + TBinaryJson Buffer; + TVector<ui32> StringOffsets; + TVector<ui32> NumberOffsets; +}; + +/** + * @brief Callbacks for textual JSON parser. Essentially wrapper around TJsonIndex methods + */ +class TBinaryJsonCallbacks : public TJsonCallbacks { +public: + TBinaryJsonCallbacks(bool throwException) + : TJsonCallbacks(/* throwException */ throwException) + { + } + + bool OnNull() override { + Json.AddEntry(TEntry(EEntryType::Null), /* createTopLevel */ true); + return true; + } + + bool OnBoolean(bool value) override { + auto type = EEntryType::BoolFalse; + if (value) { + type = EEntryType::BoolTrue; + } + Json.AddEntry(TEntry(type), /* createTopLevel */ true); + return true; + } + + bool OnInteger(long long value) override { + Json.AddEntry(TEntry(EEntryType::Number, Json.InternNumber(static_cast<double>(value))), /* createTopLevel */ true); + return true; + } + + bool OnUInteger(unsigned long long value) override { + Json.AddEntry(TEntry(EEntryType::Number, Json.InternNumber(static_cast<double>(value))), /* createTopLevel */ true); + return true; + } + + bool OnDouble(double value) override { + if (Y_UNLIKELY(std::isinf(value))) { + ythrow yexception() << "JSON number is infinite"; + } + Json.AddEntry(TEntry(EEntryType::Number, Json.InternNumber(value)), /* createTopLevel */ true); + return true; + } + + bool OnString(const TStringBuf& value) override { + Json.AddEntry(TEntry(EEntryType::String, Json.InternString(value)), /* createTopLevel */ true); + return true; + } + + bool OnOpenMap() override { + Json.AddContainer(EContainerType::Object); + return true; + } + + bool OnMapKey(const TStringBuf& value) override { + Json.AddEntry(TEntry(EEntryType::String, Json.InternKey(value))); + return true; + } + + bool OnCloseMap() override { + Json.RemoveContainer(); + return true; + } + + bool OnOpenArray() override { + Json.AddContainer(EContainerType::Array); + return true; + } + + bool OnCloseArray() override { + Json.RemoveContainer(); + return true; + } + + TJsonIndex GetResult() && { + return std::move(Json); + } + +private: + TJsonIndex Json; +}; + +void DomToJsonIndex(const NUdf::TUnboxedValue& value, TBinaryJsonCallbacks& callbacks) { + switch (GetNodeType(value)) { + case ENodeType::String: + callbacks.OnString(value.AsStringRef()); + break; + case ENodeType::Bool: + callbacks.OnBoolean(value.Get<bool>()); + break; + case ENodeType::Int64: + callbacks.OnInteger(value.Get<i64>()); + break; + case ENodeType::Uint64: + callbacks.OnUInteger(value.Get<ui64>()); + break; + case ENodeType::Double: + callbacks.OnDouble(value.Get<double>()); + break; + case ENodeType::Entity: + callbacks.OnNull(); + break; + case ENodeType::List: { + callbacks.OnOpenArray(); + + if (value.IsBoxed()) { + const auto it = value.GetListIterator(); + TUnboxedValue current; + while (it.Next(current)) { + DomToJsonIndex(current, callbacks); + } + } + + callbacks.OnCloseArray(); + break; + } + case ENodeType::Dict: + case ENodeType::Attr: { + callbacks.OnOpenMap(); + + if (value.IsBoxed()) { + const auto it = value.GetDictIterator(); + TUnboxedValue key; + TUnboxedValue value; + while (it.NextPair(key, value)) { + callbacks.OnMapKey(key.AsStringRef()); + DomToJsonIndex(value, callbacks); + } + } + + callbacks.OnCloseMap(); + break; + } + } +} + +} + +TMaybe<TBinaryJson> SerializeToBinaryJson(const TStringBuf json) { + TMemoryInput input(json.data(), json.size()); + TBinaryJsonCallbacks callbacks(/* throwException */ false); + if (!ReadJson(&input, &callbacks)) { + return Nothing(); + } + + TBinaryJsonSerializer serializer(std::move(callbacks).GetResult()); + return std::move(serializer).Serialize(); +} + +TBinaryJson SerializeToBinaryJson(const NUdf::TUnboxedValue& value) { + TBinaryJsonCallbacks callbacks(/* throwException */ false); + DomToJsonIndex(value, callbacks); + TBinaryJsonSerializer serializer(std::move(callbacks).GetResult()); + return std::move(serializer).Serialize(); +} + +}
\ No newline at end of file diff --git a/ydb/library/binary_json/write.h b/ydb/library/binary_json/write.h index f1d4dad7cd..37e0598a14 100644 --- a/ydb/library/binary_json/write.h +++ b/ydb/library/binary_json/write.h @@ -1,22 +1,22 @@ -#pragma once - -#include "format.h" - +#pragma once + +#include "format.h" + #include <ydb/library/yql/minikql/dom/node.h> - -#include <util/generic/maybe.h> - -namespace NKikimr::NBinaryJson { - -/** - * @brief Translates textual JSON into BinaryJson - */ -TMaybe<TBinaryJson> SerializeToBinaryJson(const TStringBuf json); - -/** - * @brief Translates DOM layout from `yql/library/dom` library into BinaryJson - */ -TBinaryJson SerializeToBinaryJson(const NUdf::TUnboxedValue& value); - + +#include <util/generic/maybe.h> + +namespace NKikimr::NBinaryJson { + +/** + * @brief Translates textual JSON into BinaryJson + */ +TMaybe<TBinaryJson> SerializeToBinaryJson(const TStringBuf json); + +/** + * @brief Translates DOM layout from `yql/library/dom` library into BinaryJson + */ +TBinaryJson SerializeToBinaryJson(const NUdf::TUnboxedValue& value); + } diff --git a/ydb/library/binary_json/ya.make b/ydb/library/binary_json/ya.make index c2fe77f4bf..9b1405e86c 100644 --- a/ydb/library/binary_json/ya.make +++ b/ydb/library/binary_json/ya.make @@ -1,29 +1,29 @@ -LIBRARY() - +LIBRARY() + OWNER( laplab g:kikimr ) - + YQL_ABI_VERSION( 2 21 0 ) -PEERDIR( +PEERDIR( library/cpp/json ydb/library/yql/minikql/dom -) - -SRCS( - format.cpp - read.cpp - write.cpp -) - -GENERATE_ENUM_SERIALIZATION(format.h) - +) + +SRCS( + format.cpp + read.cpp + write.cpp +) + +GENERATE_ENUM_SERIALIZATION(format.h) + END() RECURSE_FOR_TESTS( diff --git a/ydb/library/dynumber/dynumber.cpp b/ydb/library/dynumber/dynumber.cpp index af2d50f15a..f34ee0099d 100644 --- a/ydb/library/dynumber/dynumber.cpp +++ b/ydb/library/dynumber/dynumber.cpp @@ -1,240 +1,240 @@ -#include "dynumber.h" - -#include <library/cpp/containers/stack_vector/stack_vec.h> - -#include <util/string/cast.h> -#include <util/string/builder.h> -#include <util/stream/buffer.h> - -namespace NKikimr::NDyNumber { - -bool IsValidDyNumber(TStringBuf buffer) { - const auto size = buffer.Size(); - if (!size) - return false; - switch (const auto data = buffer.Data(); *data) { - case '\x00': - if (size < 2U || size > 21U) - return false; - for (auto i = 2U; i < size; ++i) - if ((data[i] & '\x0F') < '\x06' || ((data[i] >> '\x04') & '\x0F') < '\x06') - return false; - break; - case '\x01': - return 1U == size; - case '\x02': - if (size < 2U || size > 21U) - return false; - for (auto i = 2U; i < size; ++i) - if ((data[i] & '\x0F') > '\x09' || ((data[i] >> '\x04') & '\x0F') > '\x09') - return false; - break; - default: - return false; - } - return true; -} - -bool IsValidDyNumberString(TStringBuf str) { - if (str.empty()) - return false; - auto s = str.data(); - auto l = str.size(); - const bool neg = '-' == *s; - if (neg || '+' == *s) { - ++s; - --l; - } - if (!l) - return false; - bool hasDot = false; - auto beforeDot = 0U; - auto nonZeroAfterDot = 0U; - bool hasNonZeroAfterDot = false; - auto zeroAfterDot = 0U; - i16 ePower = 0; - auto tailZeros = 0U; - for (auto i = 0U; i < l; ++i) { - const auto c = s[i]; - const bool isZero = '0' == c; - if (!hasDot && isZero && !beforeDot) - continue; - if (c == '.') { - if (hasDot) - return false; - hasDot = true; - continue; - } - if (c =='e' || c == 'E') { - if (++i >= l) - return false; - if (!TryFromString(s + i, l - i, ePower)) - return false; - break; - } - if (!std::isdigit(c)) - return false; - if (!hasDot) { - ++beforeDot; - } else { - if (!isZero) - hasNonZeroAfterDot = true; - if (hasNonZeroAfterDot) { - if (isZero) { - ++tailZeros; - } else { - nonZeroAfterDot += tailZeros; - tailZeros = 0U; - } - } else { - ++zeroAfterDot; - if (beforeDot) - ++tailZeros; - } - } - } - auto effectivePower = ePower; - if (beforeDot) - effectivePower += beforeDot; - else if (hasNonZeroAfterDot) - effectivePower -= zeroAfterDot; - else - return true; - if (beforeDot + zeroAfterDot + nonZeroAfterDot > 38U) - return false; - if (effectivePower < -129 || effectivePower > 126) - return false; - return true; -} - -TMaybe<TString> ParseDyNumberString(TStringBuf str) { - if (str.empty()) - return Nothing(); - auto s = str.data(); - auto l = str.size(); - const bool neg = '-' == *s; - if (neg || '+' == *s) { - ++s; - --l; - } - if (!l) - return Nothing(); - bool hasDot = false; - auto beforeDot = 0U; - auto nonZeroAfterDot = 0U; - bool hasNonZeroAfterDot = false; - auto zeroAfterDot = 0U; - i16 ePower = 0; - auto tailZeros = 0U; - TSmallVec<char> data; - data.reserve(l); - for (auto i = 0U; i < l; ++i) { - const auto c = s[i]; - const bool isZero = '0' == c; - if (!hasDot && isZero && !beforeDot) - continue; - if (c == '.') { - if (hasDot) - return Nothing(); - hasDot = true; - continue; - } - if (c =='e' || c == 'E') { - if (++i >= l) - return Nothing(); - if (!TryFromString(s + i, l - i, ePower)) - return Nothing(); - break; - } - if (!std::isdigit(c)) - return Nothing(); - if (!hasDot) { - ++beforeDot; - data.emplace_back(c - '0'); - } else { - if (!isZero) - hasNonZeroAfterDot = true; - if (hasNonZeroAfterDot) { - if (isZero) { - ++tailZeros; - } else { - for (; tailZeros; --tailZeros) { - data.emplace_back('\x00'); - ++nonZeroAfterDot; - } - data.emplace_back(c - '0'); - } - } else { - ++zeroAfterDot; - if (beforeDot) - ++tailZeros; - } - } - } - auto effectivePower = ePower; - if (beforeDot) - effectivePower += beforeDot; - else if (hasNonZeroAfterDot) - effectivePower -= zeroAfterDot; - else - return "\x01"; - if (beforeDot + zeroAfterDot + nonZeroAfterDot > 38U) - return Nothing(); - if (effectivePower < -129 || effectivePower > 126) - return Nothing(); - if (data.size() % 2U) - data.emplace_back('\x00'); - - TString result; - result.reserve(2U + (data.size() >> 1U)); - if (neg) { - result.append('\x00'); - result.append(char(126 - effectivePower)); - for (auto i = 0U; i < data.size(); i += 2U) - result.append((('\x0F' - data[i]) << '\x04') | ('\x0F' - data[i + 1])); - } else { - result.append('\x02'); - result.append(char(effectivePower + 129)); - for (auto i = 0U; i < data.size(); i += 2U) - result.append((data[i] << '\x04') | data[i + 1]); - } - return result; -} - -TMaybe<TString> DyNumberToString(TStringBuf buffer) { - TStringBuilder out; - auto s = buffer.data(); - auto l = buffer.size(); - if (l <= 0U || *s >= '\x03') { - return Nothing(); - } - if ('\x01' == *s) { - if (1U != l) { - return Nothing(); - } - out << '0'; - return out; - } - const bool negative = !*s++; - if (negative) - out << '-'; - if (0U >= --l) { - return Nothing(); - } - auto power = ui8(*s++); - if (negative) - power = '\xFF' - power; - out << '.'; - const auto digits = negative ? "FEDCBA9876543210" : "0123456789ABCDEF"; - while (--l) { - const auto c = *s++; - out << digits[(c >> '\x04') & '\x0F']; - if (const auto digit = c & '\x0F'; digit != (negative ? '\x0F' : '\x00') || l > 1U) - out << digits[digit]; - } - if (const auto e = power - 129) - out << 'e' << e; - return out; -} - -}
\ No newline at end of file +#include "dynumber.h" + +#include <library/cpp/containers/stack_vector/stack_vec.h> + +#include <util/string/cast.h> +#include <util/string/builder.h> +#include <util/stream/buffer.h> + +namespace NKikimr::NDyNumber { + +bool IsValidDyNumber(TStringBuf buffer) { + const auto size = buffer.Size(); + if (!size) + return false; + switch (const auto data = buffer.Data(); *data) { + case '\x00': + if (size < 2U || size > 21U) + return false; + for (auto i = 2U; i < size; ++i) + if ((data[i] & '\x0F') < '\x06' || ((data[i] >> '\x04') & '\x0F') < '\x06') + return false; + break; + case '\x01': + return 1U == size; + case '\x02': + if (size < 2U || size > 21U) + return false; + for (auto i = 2U; i < size; ++i) + if ((data[i] & '\x0F') > '\x09' || ((data[i] >> '\x04') & '\x0F') > '\x09') + return false; + break; + default: + return false; + } + return true; +} + +bool IsValidDyNumberString(TStringBuf str) { + if (str.empty()) + return false; + auto s = str.data(); + auto l = str.size(); + const bool neg = '-' == *s; + if (neg || '+' == *s) { + ++s; + --l; + } + if (!l) + return false; + bool hasDot = false; + auto beforeDot = 0U; + auto nonZeroAfterDot = 0U; + bool hasNonZeroAfterDot = false; + auto zeroAfterDot = 0U; + i16 ePower = 0; + auto tailZeros = 0U; + for (auto i = 0U; i < l; ++i) { + const auto c = s[i]; + const bool isZero = '0' == c; + if (!hasDot && isZero && !beforeDot) + continue; + if (c == '.') { + if (hasDot) + return false; + hasDot = true; + continue; + } + if (c =='e' || c == 'E') { + if (++i >= l) + return false; + if (!TryFromString(s + i, l - i, ePower)) + return false; + break; + } + if (!std::isdigit(c)) + return false; + if (!hasDot) { + ++beforeDot; + } else { + if (!isZero) + hasNonZeroAfterDot = true; + if (hasNonZeroAfterDot) { + if (isZero) { + ++tailZeros; + } else { + nonZeroAfterDot += tailZeros; + tailZeros = 0U; + } + } else { + ++zeroAfterDot; + if (beforeDot) + ++tailZeros; + } + } + } + auto effectivePower = ePower; + if (beforeDot) + effectivePower += beforeDot; + else if (hasNonZeroAfterDot) + effectivePower -= zeroAfterDot; + else + return true; + if (beforeDot + zeroAfterDot + nonZeroAfterDot > 38U) + return false; + if (effectivePower < -129 || effectivePower > 126) + return false; + return true; +} + +TMaybe<TString> ParseDyNumberString(TStringBuf str) { + if (str.empty()) + return Nothing(); + auto s = str.data(); + auto l = str.size(); + const bool neg = '-' == *s; + if (neg || '+' == *s) { + ++s; + --l; + } + if (!l) + return Nothing(); + bool hasDot = false; + auto beforeDot = 0U; + auto nonZeroAfterDot = 0U; + bool hasNonZeroAfterDot = false; + auto zeroAfterDot = 0U; + i16 ePower = 0; + auto tailZeros = 0U; + TSmallVec<char> data; + data.reserve(l); + for (auto i = 0U; i < l; ++i) { + const auto c = s[i]; + const bool isZero = '0' == c; + if (!hasDot && isZero && !beforeDot) + continue; + if (c == '.') { + if (hasDot) + return Nothing(); + hasDot = true; + continue; + } + if (c =='e' || c == 'E') { + if (++i >= l) + return Nothing(); + if (!TryFromString(s + i, l - i, ePower)) + return Nothing(); + break; + } + if (!std::isdigit(c)) + return Nothing(); + if (!hasDot) { + ++beforeDot; + data.emplace_back(c - '0'); + } else { + if (!isZero) + hasNonZeroAfterDot = true; + if (hasNonZeroAfterDot) { + if (isZero) { + ++tailZeros; + } else { + for (; tailZeros; --tailZeros) { + data.emplace_back('\x00'); + ++nonZeroAfterDot; + } + data.emplace_back(c - '0'); + } + } else { + ++zeroAfterDot; + if (beforeDot) + ++tailZeros; + } + } + } + auto effectivePower = ePower; + if (beforeDot) + effectivePower += beforeDot; + else if (hasNonZeroAfterDot) + effectivePower -= zeroAfterDot; + else + return "\x01"; + if (beforeDot + zeroAfterDot + nonZeroAfterDot > 38U) + return Nothing(); + if (effectivePower < -129 || effectivePower > 126) + return Nothing(); + if (data.size() % 2U) + data.emplace_back('\x00'); + + TString result; + result.reserve(2U + (data.size() >> 1U)); + if (neg) { + result.append('\x00'); + result.append(char(126 - effectivePower)); + for (auto i = 0U; i < data.size(); i += 2U) + result.append((('\x0F' - data[i]) << '\x04') | ('\x0F' - data[i + 1])); + } else { + result.append('\x02'); + result.append(char(effectivePower + 129)); + for (auto i = 0U; i < data.size(); i += 2U) + result.append((data[i] << '\x04') | data[i + 1]); + } + return result; +} + +TMaybe<TString> DyNumberToString(TStringBuf buffer) { + TStringBuilder out; + auto s = buffer.data(); + auto l = buffer.size(); + if (l <= 0U || *s >= '\x03') { + return Nothing(); + } + if ('\x01' == *s) { + if (1U != l) { + return Nothing(); + } + out << '0'; + return out; + } + const bool negative = !*s++; + if (negative) + out << '-'; + if (0U >= --l) { + return Nothing(); + } + auto power = ui8(*s++); + if (negative) + power = '\xFF' - power; + out << '.'; + const auto digits = negative ? "FEDCBA9876543210" : "0123456789ABCDEF"; + while (--l) { + const auto c = *s++; + out << digits[(c >> '\x04') & '\x0F']; + if (const auto digit = c & '\x0F'; digit != (negative ? '\x0F' : '\x00') || l > 1U) + out << digits[digit]; + } + if (const auto e = power - 129) + out << 'e' << e; + return out; +} + +}
\ No newline at end of file diff --git a/ydb/library/dynumber/dynumber.h b/ydb/library/dynumber/dynumber.h index f64d1a294f..9b0d6d3ab3 100644 --- a/ydb/library/dynumber/dynumber.h +++ b/ydb/library/dynumber/dynumber.h @@ -1,35 +1,35 @@ -#pragma once - -#include <util/generic/buffer.h> -#include <util/generic/maybe.h> - -namespace NKikimr::NDyNumber { - -/** - * DyNumber is a variable-length format to store large numbers. - * Along with binary representation of DyNumber we declare string representation. - * It lacks the original format properties but is human readable and can be passed to - * other storage systems. - */ - -/** - * @brief Checks if buffer stores valid binary representation of DyNumber - */ -bool IsValidDyNumber(TStringBuf buffer); - -/** - * @brief Checks if buffer stores valid string representation of DyNumber - */ -bool IsValidDyNumberString(TStringBuf str); - -/** - * @brief Parses DyNumber string representation into binary one - */ -TMaybe<TString> ParseDyNumberString(TStringBuf str); - -/** - * @brief Converts DyNumber binary representation into string one - */ -TMaybe<TString> DyNumberToString(TStringBuf buffer); - -}
\ No newline at end of file +#pragma once + +#include <util/generic/buffer.h> +#include <util/generic/maybe.h> + +namespace NKikimr::NDyNumber { + +/** + * DyNumber is a variable-length format to store large numbers. + * Along with binary representation of DyNumber we declare string representation. + * It lacks the original format properties but is human readable and can be passed to + * other storage systems. + */ + +/** + * @brief Checks if buffer stores valid binary representation of DyNumber + */ +bool IsValidDyNumber(TStringBuf buffer); + +/** + * @brief Checks if buffer stores valid string representation of DyNumber + */ +bool IsValidDyNumberString(TStringBuf str); + +/** + * @brief Parses DyNumber string representation into binary one + */ +TMaybe<TString> ParseDyNumberString(TStringBuf str); + +/** + * @brief Converts DyNumber binary representation into string one + */ +TMaybe<TString> DyNumberToString(TStringBuf buffer); + +}
\ No newline at end of file diff --git a/ydb/library/dynumber/ut/dynumber_ut.cpp b/ydb/library/dynumber/ut/dynumber_ut.cpp index f46db222de..36f0d722ff 100644 --- a/ydb/library/dynumber/ut/dynumber_ut.cpp +++ b/ydb/library/dynumber/ut/dynumber_ut.cpp @@ -1,31 +1,31 @@ #include <ydb/library/dynumber/dynumber.h> #include <ydb/library/dynumber/cast.h> - -#include <library/cpp/testing/unittest/registar.h> - -#include <util/stream/format.h> -#include <util/stream/str.h> - -using namespace NKikimr::NDyNumber; - -namespace { - void TestDyNumber(TStringBuf test) { - UNIT_ASSERT(IsValidDyNumberString(test)); - - const auto dyNumber = ParseDyNumberString(test); - UNIT_ASSERT(dyNumber.Defined()); - UNIT_ASSERT(IsValidDyNumber(*dyNumber)); - - const auto restoredTest = DyNumberToString(*dyNumber); - UNIT_ASSERT(restoredTest.Defined()); - UNIT_ASSERT(IsValidDyNumberString(*restoredTest)); - - const auto dyNumberAfterString = ParseDyNumberString(*restoredTest); - UNIT_ASSERT(dyNumberAfterString.Defined()); - UNIT_ASSERT(IsValidDyNumber(*dyNumberAfterString)); - - UNIT_ASSERT_EQUAL(*dyNumber, *dyNumberAfterString); - } + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/stream/format.h> +#include <util/stream/str.h> + +using namespace NKikimr::NDyNumber; + +namespace { + void TestDyNumber(TStringBuf test) { + UNIT_ASSERT(IsValidDyNumberString(test)); + + const auto dyNumber = ParseDyNumberString(test); + UNIT_ASSERT(dyNumber.Defined()); + UNIT_ASSERT(IsValidDyNumber(*dyNumber)); + + const auto restoredTest = DyNumberToString(*dyNumber); + UNIT_ASSERT(restoredTest.Defined()); + UNIT_ASSERT(IsValidDyNumberString(*restoredTest)); + + const auto dyNumberAfterString = ParseDyNumberString(*restoredTest); + UNIT_ASSERT(dyNumberAfterString.Defined()); + UNIT_ASSERT(IsValidDyNumber(*dyNumberAfterString)); + + UNIT_ASSERT_EQUAL(*dyNumber, *dyNumberAfterString); + } template <typename T> void TestCast(TStringBuf test, TMaybe<T> value) { @@ -45,30 +45,30 @@ namespace { UNIT_ASSERT_C(!casted && !value, "Casted: " << casted << ", value: " << value); } } -} - -Y_UNIT_TEST_SUITE(TDyNumberTests) { - Y_UNIT_TEST(ParseAndRestore) { - TestDyNumber("0"); - TestDyNumber(".0"); - TestDyNumber("1"); - TestDyNumber("18"); - TestDyNumber("181"); - TestDyNumber("1817"); - TestDyNumber("-1"); - TestDyNumber("-18"); - TestDyNumber("-181"); - TestDyNumber("-1817"); - TestDyNumber(".023"); - TestDyNumber("0.93"); - TestDyNumber("724.1"); - TestDyNumber("1E-130"); - TestDyNumber("9.9999999999999999999999999999999999999E+125"); - TestDyNumber("9.9999999999999999999999999999999999999000E+125"); - TestDyNumber("-1E-130"); - TestDyNumber("-9.9999999999999999999999999999999999999E+125"); - TestDyNumber("-9.9999999999999999999999999999999999999000E+125"); - } +} + +Y_UNIT_TEST_SUITE(TDyNumberTests) { + Y_UNIT_TEST(ParseAndRestore) { + TestDyNumber("0"); + TestDyNumber(".0"); + TestDyNumber("1"); + TestDyNumber("18"); + TestDyNumber("181"); + TestDyNumber("1817"); + TestDyNumber("-1"); + TestDyNumber("-18"); + TestDyNumber("-181"); + TestDyNumber("-1817"); + TestDyNumber(".023"); + TestDyNumber("0.93"); + TestDyNumber("724.1"); + TestDyNumber("1E-130"); + TestDyNumber("9.9999999999999999999999999999999999999E+125"); + TestDyNumber("9.9999999999999999999999999999999999999000E+125"); + TestDyNumber("-1E-130"); + TestDyNumber("-9.9999999999999999999999999999999999999E+125"); + TestDyNumber("-9.9999999999999999999999999999999999999000E+125"); + } Y_UNIT_TEST(Cast) { TestCast<int>("0", 0); @@ -176,4 +176,4 @@ Y_UNIT_TEST_SUITE(TDyNumberTests) { TestCast<double>("1E-130", 1E-130); TestCast<double>("-1E-130", -1E-130); } -} +} diff --git a/ydb/library/dynumber/ut/ya.make b/ydb/library/dynumber/ut/ya.make index 2f82007ccd..8c184bd4c1 100644 --- a/ydb/library/dynumber/ut/ya.make +++ b/ydb/library/dynumber/ut/ya.make @@ -1,13 +1,13 @@ UNITTEST_FOR(ydb/library/dynumber) - -OWNER(g:kikimr) - -SRCS( - dynumber_ut.cpp -) - -PEERDIR( + +OWNER(g:kikimr) + +SRCS( + dynumber_ut.cpp +) + +PEERDIR( ydb/library/dynumber -) - -END() +) + +END() diff --git a/ydb/library/dynumber/ya.make b/ydb/library/dynumber/ya.make index e125e312a7..f79fd60228 100644 --- a/ydb/library/dynumber/ya.make +++ b/ydb/library/dynumber/ya.make @@ -1,16 +1,16 @@ -LIBRARY() - -OWNER(g:kikimr) - -PEERDIR( - library/cpp/containers/stack_vector -) - -SRCS( +LIBRARY() + +OWNER(g:kikimr) + +PEERDIR( + library/cpp/containers/stack_vector +) + +SRCS( cast.h - dynumber.cpp -) - + dynumber.cpp +) + END() RECURSE_FOR_TESTS( diff --git a/ydb/library/mkql_proto/mkql_proto.cpp b/ydb/library/mkql_proto/mkql_proto.cpp index 53d7722dee..0c7b577ad7 100644 --- a/ydb/library/mkql_proto/mkql_proto.cpp +++ b/ydb/library/mkql_proto/mkql_proto.cpp @@ -89,18 +89,18 @@ Y_FORCE_INLINE void HandleKindDataExport(const TType* type, const NUdf::TUnboxed UuidToYdbProto(stringRef.Data(), stringRef.Size(), res); break; } - case NUdf::TDataType<NUdf::TJsonDocument>::Id: { - NUdf::TUnboxedValue json = ValueToString(NUdf::EDataSlot::JsonDocument, value); - const auto stringRef = json.AsStringRef(); - res.set_text_value(stringRef.Data(), stringRef.Size()); - break; - } - case NUdf::TDataType<NUdf::TDyNumber>::Id: { - NUdf::TUnboxedValue number = ValueToString(NUdf::EDataSlot::DyNumber, value); - const auto stringRef = number.AsStringRef(); - res.set_text_value(stringRef.Data(), stringRef.Size()); - break; - } + case NUdf::TDataType<NUdf::TJsonDocument>::Id: { + NUdf::TUnboxedValue json = ValueToString(NUdf::EDataSlot::JsonDocument, value); + const auto stringRef = json.AsStringRef(); + res.set_text_value(stringRef.Data(), stringRef.Size()); + break; + } + case NUdf::TDataType<NUdf::TDyNumber>::Id: { + NUdf::TUnboxedValue number = ValueToString(NUdf::EDataSlot::DyNumber, value); + const auto stringRef = number.AsStringRef(); + res.set_text_value(stringRef.Data(), stringRef.Size()); + break; + } default: const auto& stringRef = value.AsStringRef(); res.set_bytes_value(stringRef.Data(), stringRef.Size()); @@ -406,16 +406,16 @@ Y_FORCE_INLINE void HandleKindDataExport(const TType* type, const NUdf::TUnboxed UuidToMkqlProto(stringRef.Data(), stringRef.Size(), res); break; } - case NUdf::TDataType<NUdf::TJsonDocument>::Id: { - auto stringRef = value.AsStringRef(); - res.SetBytes(stringRef.Data(), stringRef.Size()); - break; - } - case NUdf::TDataType<NUdf::TDyNumber>::Id: { - auto stringRef = value.AsStringRef(); - res.SetBytes(stringRef.Data(), stringRef.Size()); - break; - } + case NUdf::TDataType<NUdf::TJsonDocument>::Id: { + auto stringRef = value.AsStringRef(); + res.SetBytes(stringRef.Data(), stringRef.Size()); + break; + } + case NUdf::TDataType<NUdf::TDyNumber>::Id: { + auto stringRef = value.AsStringRef(); + res.SetBytes(stringRef.Data(), stringRef.Size()); + break; + } default: auto stringRef = value.AsStringRef(); res.SetBytes(stringRef.Data(), stringRef.Size()); @@ -686,14 +686,14 @@ Y_FORCE_INLINE NUdf::TUnboxedValue HandleKindDataImport(const TType* type, const case NUdf::TDataType<NUdf::TInterval>::Id: MKQL_ENSURE_S(oneOfCase == NKikimrMiniKQL::TValue::ValueValueCase::kInt64); return NUdf::TUnboxedValuePod(value.GetInt64()); - case NUdf::TDataType<NUdf::TJsonDocument>::Id: + case NUdf::TDataType<NUdf::TJsonDocument>::Id: MKQL_ENSURE_S(oneOfCase == NKikimrMiniKQL::TValue::ValueValueCase::kBytes); - return MakeString(value.GetBytes()); + return MakeString(value.GetBytes()); case NUdf::TDataType<NUdf::TDecimal>::Id: return NUdf::TUnboxedValuePod(NYql::NDecimal::FromHalfs(value.GetLow128(), value.GetHi128())); - case NUdf::TDataType<NUdf::TDyNumber>::Id: + case NUdf::TDataType<NUdf::TDyNumber>::Id: MKQL_ENSURE_S(oneOfCase == NKikimrMiniKQL::TValue::ValueValueCase::kBytes); - return MakeString(value.GetBytes()); + return MakeString(value.GetBytes()); case NUdf::TDataType<NUdf::TUuid>::Id: { MKQL_ENSURE_S(oneOfCase == NKikimrMiniKQL::TValue::ValueValueCase::kLow128); union { @@ -738,8 +738,8 @@ void ExportPrimitiveTypeToProto(ui32 schemeType, Ydb::Type& output) { case NYql::NProto::TypeIds::Yson: case NYql::NProto::TypeIds::Json: case NYql::NProto::TypeIds::Uuid: - case NYql::NProto::TypeIds::JsonDocument: - case NYql::NProto::TypeIds::DyNumber: + case NYql::NProto::TypeIds::JsonDocument: + case NYql::NProto::TypeIds::DyNumber: output.set_type_id(static_cast<Ydb::Type::PrimitiveTypeId>(schemeType)); break; @@ -1046,8 +1046,8 @@ TNode* TProtoImporter::ImportNodeFromProto(TType* type, const NKikimrMiniKQL::TV break; case NUdf::TDataType<char*>::Id: case NUdf::TDataType<NUdf::TYson>::Id: - case NUdf::TDataType<NUdf::TJsonDocument>::Id: - case NUdf::TDataType<NUdf::TDyNumber>::Id: + case NUdf::TDataType<NUdf::TJsonDocument>::Id: + case NUdf::TDataType<NUdf::TDyNumber>::Id: dataNode = TDataLiteral::Create(env.NewStringValue(value.GetBytes()), dataType, env); break; case NUdf::TDataType<NUdf::TJson>::Id: diff --git a/ydb/library/yql/ast/yql_type_string.cpp b/ydb/library/yql/ast/yql_type_string.cpp index af9af93259..8d7ecd8701 100644 --- a/ydb/library/yql/ast/yql_type_string.cpp +++ b/ydb/library/yql/ast/yql_type_string.cpp @@ -77,8 +77,8 @@ enum EToken TOKEN_EMPTYLIST = -45, TOKEN_EMPTYDICT = -46, TOKEN_TYPE_MAX = -47, - TOKEN_JSON_DOCUMENT = -48, - TOKEN_DYNUMBER = -49, + TOKEN_JSON_DOCUMENT = -48, + TOKEN_DYNUMBER = -49, // identifiers TOKEN_IDENTIFIER = -100, @@ -209,8 +209,8 @@ private: case TOKEN_TZDATETIME: case TOKEN_TZTIMESTAMP: case TOKEN_UUID: - case TOKEN_JSON_DOCUMENT: - case TOKEN_DYNUMBER: + case TOKEN_JSON_DOCUMENT: + case TOKEN_DYNUMBER: type = MakeDataType(Identifier); GetNextToken(); break; diff --git a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp index a32a6c3dab..c45eaa7909 100644 --- a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp @@ -3018,15 +3018,15 @@ TExprNode::TPtr TryConvertSqlInPredicatesToJoins(const TCoFlatMapBase& flatMap, return {}; } -TExprNode::TPtr FoldParseAfterSerialize(const TExprNode::TPtr& node, const TStringBuf parseUdfName, const THashSet<TStringBuf>& serializeUdfNames) { +TExprNode::TPtr FoldParseAfterSerialize(const TExprNode::TPtr& node, const TStringBuf parseUdfName, const THashSet<TStringBuf>& serializeUdfNames) { auto apply = TExprBase(node).Cast<TCoApply>(); auto outerUdf = apply.Arg(0).Maybe<TCoUdf>(); - if (!outerUdf || outerUdf.Cast().MethodName() != parseUdfName) { + if (!outerUdf || outerUdf.Cast().MethodName() != parseUdfName) { return node; } - auto directCase = [&](const TCoApply& apply) { + auto directCase = [&](const TCoApply& apply) { auto node = apply.Ptr(); auto maybeUdfApply = apply.Arg(1).Maybe<TCoApply>(); if (!maybeUdfApply) { @@ -3034,7 +3034,7 @@ TExprNode::TPtr FoldParseAfterSerialize(const TExprNode::TPtr& node, const TStri } auto maybePairUdf = maybeUdfApply.Cast().Arg(0).Maybe<TCoUdf>(); - if (!maybePairUdf || !serializeUdfNames.contains(maybePairUdf.Cast().MethodName())) { + if (!maybePairUdf || !serializeUdfNames.contains(maybePairUdf.Cast().MethodName())) { return node; } @@ -3042,12 +3042,12 @@ TExprNode::TPtr FoldParseAfterSerialize(const TExprNode::TPtr& node, const TStri return maybeUdfApply.Cast().Arg(1).Ptr(); }; - const auto directRes = directCase(apply); - if (directRes.Get() != node.Get()) { - return directRes; - } - - auto flatMapCase = [&](const TCoApply& apply) { + const auto directRes = directCase(apply); + if (directRes.Get() != node.Get()) { + return directRes; + } + + auto flatMapCase = [&](const TCoApply& apply) { auto node = apply.Ptr(); auto maybeFlatMap = apply.Arg(1).Maybe<TCoFlatMapBase>(); if (!maybeFlatMap) { @@ -3062,7 +3062,7 @@ TExprNode::TPtr FoldParseAfterSerialize(const TExprNode::TPtr& node, const TStri } auto maybePairUdf = maybeUdfApply.Cast().Arg(0).Maybe<TCoUdf>(); - if (!maybePairUdf || !serializeUdfNames.contains(maybePairUdf.Cast().MethodName())) { + if (!maybePairUdf || !serializeUdfNames.contains(maybePairUdf.Cast().MethodName())) { return node; } @@ -3076,26 +3076,26 @@ TExprNode::TPtr FoldParseAfterSerialize(const TExprNode::TPtr& node, const TStri return flatMapCase(apply); } -TExprNode::TPtr FoldYsonParseAfterSerialize(const TExprNode::TPtr& node) { - static const THashSet<TStringBuf> serializeUdfNames = {"Yson.Serialize", "Yson.SerializeText", "Yson.SerializePretty"}; - return FoldParseAfterSerialize(node, "Yson.Parse", serializeUdfNames); -} +TExprNode::TPtr FoldYsonParseAfterSerialize(const TExprNode::TPtr& node) { + static const THashSet<TStringBuf> serializeUdfNames = {"Yson.Serialize", "Yson.SerializeText", "Yson.SerializePretty"}; + return FoldParseAfterSerialize(node, "Yson.Parse", serializeUdfNames); +} TExprNode::TPtr FoldYson2ParseAfterSerialize(const TExprNode::TPtr& node) { static const THashSet<TStringBuf> serializeUdfNames = {"Yson2.Serialize", "Yson2.SerializeText", "Yson2.SerializePretty"}; return FoldParseAfterSerialize(node, "Yson2.Parse", serializeUdfNames); } -TExprNode::TPtr FoldJsonParseAfterSerialize(const TExprNode::TPtr& node) { - static const THashSet<TStringBuf> serializeUdfNames = {"Json2.Serialize"}; - return FoldParseAfterSerialize(node, "Json2.Parse", serializeUdfNames); -} - -TExprNode::TPtr FoldSeralizeAfterParse(const TExprNode::TPtr& node, const TStringBuf parseUdfName, const TStringBuf serializeUdfName) { +TExprNode::TPtr FoldJsonParseAfterSerialize(const TExprNode::TPtr& node) { + static const THashSet<TStringBuf> serializeUdfNames = {"Json2.Serialize"}; + return FoldParseAfterSerialize(node, "Json2.Parse", serializeUdfNames); +} + +TExprNode::TPtr FoldSeralizeAfterParse(const TExprNode::TPtr& node, const TStringBuf parseUdfName, const TStringBuf serializeUdfName) { auto apply = TExprBase(node).Cast<TCoApply>(); auto outerUdf = apply.Arg(0).Maybe<TCoUdf>(); - if (!outerUdf || outerUdf.Cast().MethodName() != serializeUdfName) { + if (!outerUdf || outerUdf.Cast().MethodName() != serializeUdfName) { return node; } @@ -3105,7 +3105,7 @@ TExprNode::TPtr FoldSeralizeAfterParse(const TExprNode::TPtr& node, const TStrin } auto maybePairUdf = maybeUdfApply.Cast().Arg(0).Maybe<TCoUdf>(); - if (!maybePairUdf || maybePairUdf.Cast().MethodName().Value() != parseUdfName) { + if (!maybePairUdf || maybePairUdf.Cast().MethodName().Value() != parseUdfName) { return node; } @@ -3118,129 +3118,129 @@ TExprNode::TPtr FoldSeralizeAfterParse(const TExprNode::TPtr& node, const TStrin return innerInput; } -TExprNode::TPtr FoldYsonSeralizeAfterParse(const TExprNode::TPtr& node) { - return FoldSeralizeAfterParse(node, "Yson.Parse", "Yson.Serialize"); -} - +TExprNode::TPtr FoldYsonSeralizeAfterParse(const TExprNode::TPtr& node) { + return FoldSeralizeAfterParse(node, "Yson.Parse", "Yson.Serialize"); +} + TExprNode::TPtr FoldYson2SeralizeAfterParse(const TExprNode::TPtr& node) { return FoldSeralizeAfterParse(node, "Yson2.Parse", "Yson2.Serialize"); } -TExprNode::TPtr FoldJsonSeralizeAfterParse(const TExprNode::TPtr& node) { - return FoldSeralizeAfterParse(node, "Json2.Parse", "Json2.Serialize"); -} - -TExprNode::TPtr BuildJsonParse(const TExprNode::TPtr& jsonExpr, TExprContext& ctx) { - auto jsonPos = jsonExpr->Pos(); - - auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - ctx.MakeType<TDataExprType>(EDataSlot::Json), - }); - - auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - argumentsType, - ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), - ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) - }); - - auto parse = Build<TCoUdf>(ctx, jsonPos) - .MethodName() - .Build("Json2.Parse") - .RunConfigValue<TCoVoid>() - .Build() - .UserType(ExpandType(jsonPos, *udfArgumentsType, ctx)) - .Done().Ptr(); - - return Build<TCoApply>(ctx, jsonPos) - .Callable(parse) - .FreeArgs() - .Add(jsonExpr) - .Build() - .Done().Ptr(); -} - -TExprNode::TPtr BuildJsonParse(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx) { - return BuildJsonParse(jsonExpr.Json().Ptr(), ctx); -} - -TExprNode::TPtr GetJsonDocumentOrParseJson(const TExprNode::TPtr& jsonExpr, TExprContext& ctx, EDataSlot& argumentDataSlot) { - const TTypeAnnotationNode* type = jsonExpr->GetTypeAnn(); - if (type->GetKind() == ETypeAnnotationKind::Optional) { - type = type->Cast<TOptionalExprType>()->GetItemType(); - } - argumentDataSlot = type->Cast<TDataExprType>()->GetSlot(); - - // If jsonExpr has JsonDocument type, there is no need to parse it - if (argumentDataSlot == EDataSlot::JsonDocument) { - return jsonExpr; - } - - // Otherwise jsonExpr has Json type and we need to wrap it in Json2::Parse - return BuildJsonParse(jsonExpr, ctx); -} - -TExprNode::TPtr GetJsonDocumentOrParseJson(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx, EDataSlot& argumentDataSlot) { - return GetJsonDocumentOrParseJson(jsonExpr.Json().Ptr(), ctx, argumentDataSlot); -} - -TExprNode::TPtr BuildJsonSerialize(const TExprNode::TPtr& resourceExpr, TExprContext& ctx) { - auto resourcePos = resourceExpr->Pos(); - - auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")), - }); - - auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - argumentsType, - ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), - ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) - }); - - auto parse = Build<TCoUdf>(ctx, resourcePos) - .MethodName() - .Build("Json2.Serialize") - .RunConfigValue<TCoVoid>() - .Build() - .UserType(ExpandType(resourcePos, *udfArgumentsType, ctx)) - .Done().Ptr(); - - return Build<TCoApply>(ctx, resourcePos) - .Callable(parse) - .FreeArgs() - .Add(resourceExpr) - .Build() - .Done().Ptr(); -} - -TExprNode::TPtr BuildJsonCompilePath(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx) { - auto jsonPathPos = jsonExpr.JsonPath().Pos(); - - auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - ctx.MakeType<TDataExprType>(EDataSlot::Utf8) - }); - - auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - argumentsType, - ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), - ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) - }); - - auto compilePath = Build<TCoUdf>(ctx, jsonPathPos) - .MethodName() - .Build("Json2.CompilePath") - .RunConfigValue<TCoVoid>() - .Build() - .UserType(ExpandType(jsonPathPos, *udfArgumentsType, ctx)) - .Done().Ptr(); - - return Build<TCoApply>(ctx, jsonPathPos) - .Callable(compilePath) - .FreeArgs() - .Add(jsonExpr.JsonPath()) - .Build() - .Done().Ptr(); -} - +TExprNode::TPtr FoldJsonSeralizeAfterParse(const TExprNode::TPtr& node) { + return FoldSeralizeAfterParse(node, "Json2.Parse", "Json2.Serialize"); +} + +TExprNode::TPtr BuildJsonParse(const TExprNode::TPtr& jsonExpr, TExprContext& ctx) { + auto jsonPos = jsonExpr->Pos(); + + auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + ctx.MakeType<TDataExprType>(EDataSlot::Json), + }); + + auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + argumentsType, + ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), + ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) + }); + + auto parse = Build<TCoUdf>(ctx, jsonPos) + .MethodName() + .Build("Json2.Parse") + .RunConfigValue<TCoVoid>() + .Build() + .UserType(ExpandType(jsonPos, *udfArgumentsType, ctx)) + .Done().Ptr(); + + return Build<TCoApply>(ctx, jsonPos) + .Callable(parse) + .FreeArgs() + .Add(jsonExpr) + .Build() + .Done().Ptr(); +} + +TExprNode::TPtr BuildJsonParse(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx) { + return BuildJsonParse(jsonExpr.Json().Ptr(), ctx); +} + +TExprNode::TPtr GetJsonDocumentOrParseJson(const TExprNode::TPtr& jsonExpr, TExprContext& ctx, EDataSlot& argumentDataSlot) { + const TTypeAnnotationNode* type = jsonExpr->GetTypeAnn(); + if (type->GetKind() == ETypeAnnotationKind::Optional) { + type = type->Cast<TOptionalExprType>()->GetItemType(); + } + argumentDataSlot = type->Cast<TDataExprType>()->GetSlot(); + + // If jsonExpr has JsonDocument type, there is no need to parse it + if (argumentDataSlot == EDataSlot::JsonDocument) { + return jsonExpr; + } + + // Otherwise jsonExpr has Json type and we need to wrap it in Json2::Parse + return BuildJsonParse(jsonExpr, ctx); +} + +TExprNode::TPtr GetJsonDocumentOrParseJson(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx, EDataSlot& argumentDataSlot) { + return GetJsonDocumentOrParseJson(jsonExpr.Json().Ptr(), ctx, argumentDataSlot); +} + +TExprNode::TPtr BuildJsonSerialize(const TExprNode::TPtr& resourceExpr, TExprContext& ctx) { + auto resourcePos = resourceExpr->Pos(); + + auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")), + }); + + auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + argumentsType, + ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), + ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) + }); + + auto parse = Build<TCoUdf>(ctx, resourcePos) + .MethodName() + .Build("Json2.Serialize") + .RunConfigValue<TCoVoid>() + .Build() + .UserType(ExpandType(resourcePos, *udfArgumentsType, ctx)) + .Done().Ptr(); + + return Build<TCoApply>(ctx, resourcePos) + .Callable(parse) + .FreeArgs() + .Add(resourceExpr) + .Build() + .Done().Ptr(); +} + +TExprNode::TPtr BuildJsonCompilePath(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx) { + auto jsonPathPos = jsonExpr.JsonPath().Pos(); + + auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + ctx.MakeType<TDataExprType>(EDataSlot::Utf8) + }); + + auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + argumentsType, + ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), + ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) + }); + + auto compilePath = Build<TCoUdf>(ctx, jsonPathPos) + .MethodName() + .Build("Json2.CompilePath") + .RunConfigValue<TCoVoid>() + .Build() + .UserType(ExpandType(jsonPathPos, *udfArgumentsType, ctx)) + .Done().Ptr(); + + return Build<TCoApply>(ctx, jsonPathPos) + .Callable(compilePath) + .FreeArgs() + .Add(jsonExpr.JsonPath()) + .Build() + .Done().Ptr(); +} + template<bool Ordered> TExprNode::TPtr CanonizeMultiMap(const TExprNode::TPtr& node, TExprContext& ctx) { YQL_CLOG(DEBUG, Core) << "Canonize " << node->Content() << " of width " << node->Tail().ChildrenSize() - 1U; @@ -4692,16 +4692,16 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { return ret; } - ret = FoldJsonParseAfterSerialize(node); - if (ret != node) { - return ret; - } - - ret = FoldJsonSeralizeAfterParse(node); - if (ret != node) { - return ret; - } - + ret = FoldJsonParseAfterSerialize(node); + if (ret != node) { + return ret; + } + + ret = FoldJsonSeralizeAfterParse(node); + if (ret != node) { + return ret; + } + return node; }; @@ -5253,665 +5253,665 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { return node; }; - - map["JsonValue"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) { - /* - Here we rewrite expression - JSON_VALUE( - <json>, <jsonPath> - [PASSING <variableExpr1> AS <variableName1>, ...] - [RETURNING <resultType>] - [(NULL | DEFAULT <onEmptyExpr>) ON EMPTY] - [(NULL | DEFAULT <onErrorExpr>) ON ERROR] - ) - Generated depends on the <resultType> specified in RETURNING section: - 1. No RETURNING section - Default returning type of JsonValue is Utf8 and it must convert - result of JsonPath expression into Utf8 string. - Json2::SqlValueConvertToUtf8 is used - 2. <resultType> is a numeric type (Int16, Uint16, Float, etc.) - Json2::SqlValueNumber is used with additional CAST to corresponding type - 3. <resultType> is a date type (Date, Datetime, Timestamp) - Json2::SqlValueInt64 is used with additional CAST to corresponding type - 4. <resultType> is Bool - Json2::SqlValueBool is used - 5. <resultType> is String - Json2::SqlValueUtf8 is used with additional CAST to String - 6. <resultType> is Utf8 - Json2::SqlValueUtf8 is used - Returning type of all Json2::SqlValue* functions is Variant<Tuple<Uint8, String?>, <resultType>?>: - 1. If variant holds first type, either error happened or the result is empty. - If first tuple element is 0, result is empty. - If first tuple element is 1, error happened. - Second tuple element contains message that can be displayed to the user. - 2. If variant hold second type, execution was successful and it is a result. - We process result of Json2::SqlValue* function by using Visit callable with lambdas handling each type. - Note that in some cases we need to CAST result of Json2::SqlValue* and it can fail. So: - 1. If the result of Json2::SqlValue* is NULL, we return Nothing(<resultType>) - 2. Otherwise we check the result of SafeCast callable. If it is NULL, cast has failed and it is an error. - If it holds some value, we return it to the user. - If no CAST is needed, we just return the result of Json2::SqlValue*. - What is more, <onEmptyExpr> and <onErrorExpr> must be casted to <resultType> and this CAST can fail too. - ANSI SQL specification is unclear about what to do with this situation. If we failed to cast <onEmptyExpr> to - target type, we return <onErrorExpr>. If we failed to cast <onErrorExpr> to target type, we throw an exception. - - I know all this sounds very clumsy and a lot of logic to handle in s-expressions. If you have a better idea - of a way to handle all this ***, please write to laplab@. - */ - TCoJsonValue jsonValue(node); - - // <json expr> or Json2::Parse(<json expr>) - EDataSlot jsonDataSlot; - TExprNode::TPtr jsonExpr = GetJsonDocumentOrParseJson(jsonValue, ctx, jsonDataSlot); - - // Json2::CompilePath(<jsonPath>) - TExprNode::TPtr compilePathExpr = BuildJsonCompilePath(jsonValue, ctx); - - // Json2::SqlValue...(<parsedJson>, <compiledJsonPath>) - TExprNode::TPtr sqlValueExpr; - const auto returnTypeAnn = node->GetTypeAnn()->Cast<TOptionalExprType>(); - const auto unwrappedSlot = returnTypeAnn->GetItemType()->Cast<TDataExprType>()->GetSlot(); - bool needCast = false; - const auto jsonValuePos = jsonValue.Pos(); - { - TString sqlValueUdfName; - if (IsDataTypeNumeric(unwrappedSlot)) { - sqlValueUdfName = "SqlValueNumber"; - needCast = true; - } else if (IsDataTypeDate(unwrappedSlot)) { - sqlValueUdfName = "SqlValueInt64"; - needCast = true; - } else if (unwrappedSlot == EDataSlot::Utf8 || unwrappedSlot == EDataSlot::String) { - if (jsonValue.ReturningType()) { - sqlValueUdfName = "SqlValueUtf8"; - } else { - sqlValueUdfName = "SqlValueConvertToUtf8"; - } - needCast = unwrappedSlot == EDataSlot::String; - } else if (unwrappedSlot == EDataSlot::Bool) { - sqlValueUdfName = "SqlValueBool"; - } else { - YQL_ENSURE(false, "Unsupported type"); - } - - const TTypeAnnotationNode* inputType = nullptr; - if (jsonDataSlot == EDataSlot::JsonDocument) { - inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(EDataSlot::JsonDocument)); - sqlValueUdfName = "JsonDocument" + sqlValueUdfName; - } else { - inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")); - } - sqlValueUdfName = "Json2." + sqlValueUdfName; - - TTypeAnnotationNode::TListType arguments = { - inputType, - ctx.MakeType<TResourceExprType>("JsonPath") - }; - - auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - ctx.MakeType<TTupleExprType>(arguments), - ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), - ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) - }); - - auto sqlValue = Build<TCoUdf>(ctx, jsonValuePos) - .MethodName() - .Build(sqlValueUdfName) - .RunConfigValue<TCoVoid>() - .Build() - .UserType(ExpandType(jsonValuePos, *udfArgumentsType, ctx)) - .Done().Ptr(); - - sqlValueExpr = Build<TCoApply>(ctx, jsonValuePos) - .Callable(sqlValue) - .FreeArgs() - .Add(jsonExpr) - .Add(compilePathExpr) - .Add(jsonValue.Variables()) - .Build() - .Done().Ptr(); - } - - auto makeCastOrValue = [&](TPositionHandle pos, const TExprNode::TPtr& source, const TExprNode::TPtr& onCastFail) { - /* - if Exists($source) - then - return IfPresent( - CAST($source as <resultType>), - ($x) -> { return Just($x); }, - $onCastFail - ) - else - return Nothing(<resultType>) - */ - TExprNode::TPtr returnTypeNode = ExpandType(pos, *returnTypeAnn, ctx); - return Build<TCoIf>(ctx, pos) - .Predicate<TCoExists>() - .Optional(source) - .Build() - .ThenValue<TCoIfPresent>() - .Optional<TCoSafeCast>() - .Value(source) - .Type(returnTypeNode) - .Build() - .PresentHandler<TCoLambda>() - .Args({"unwrappedValue"}) - .Body<TCoJust>() - .Input("unwrappedValue") - .Build() - .Build() - .MissingValue(onCastFail) - .Build() - .ElseValue<TCoNothing>() - .OptionalType(returnTypeNode) - .Build() - .Done().Ptr(); - }; - - auto makeThrow = [&](TPositionHandle pos, const TExprNode::TPtr& message) { - return Build<TCoEnsure>(ctx, pos) - .Value<TCoNothing>() - .OptionalType(ExpandType(pos, *returnTypeAnn, ctx)) - .Build() - .Predicate<TCoBool>() - .Literal() - .Build("false") - .Build() - .Message(message) - .Done().Ptr(); - }; - - auto makeHandler = [&](EJsonValueHandlerMode mode, const TExprNode::TPtr& node, const TExprNode::TPtr& errorMessage, const TExprNode::TPtr& onCastFail) -> TExprNode::TPtr { - const auto pos = node->Pos(); - if (mode == EJsonValueHandlerMode::Error) { - return makeThrow(pos, errorMessage); - } - - // Convert NULL to Nothing(<resultType>) - if (IsNull(*node)) { - return Build<TCoNothing>(ctx, pos) - .OptionalType(ExpandType(pos, *returnTypeAnn, ctx)) - .Done().Ptr(); - } - - // If type is not Optional, wrap expression in Just call - TExprNode::TPtr result = node; - const auto typeAnn = node->GetTypeAnn(); - if (typeAnn->GetKind() != ETypeAnnotationKind::Optional) { - result = Build<TCoJust>(ctx, pos) - .Input(result) - .Done().Ptr(); - } - - // Perform CAST to <resultType> or return onCastFail - return makeCastOrValue(pos, result, onCastFail); - }; - - const auto onEmptyMode = FromString<EJsonValueHandlerMode>(jsonValue.OnEmptyMode().Ref().Content()); - const auto onErrorMode = FromString<EJsonValueHandlerMode>(jsonValue.OnErrorMode().Ref().Content()); - auto makeOnErrorHandler = [&](const TExprNode::TPtr& errorMessage) { - const auto onError = jsonValue.OnError(); - const auto throwCastError = makeThrow( - onError.Pos(), - Build<TCoString>(ctx, onError.Pos()) - .Literal() - .Build(TStringBuilder() << "Failed to cast default value from ON ERROR clause to target type " << FormatType(returnTypeAnn)) - .Done().Ptr() - ); - - return makeHandler(onErrorMode, onError.Ptr(), errorMessage, throwCastError); - }; - auto makeOnEmptyHandler = [&](const TExprNode::TPtr& errorMessage) { - const auto onEmptyDefaultCastError = Build<TCoString>(ctx, jsonValue.OnEmpty().Pos()) - .Literal() - .Build(TStringBuilder() << "Failed to cast default value from ON EMPTY clause to target type " << FormatType(returnTypeAnn)) - .Done().Ptr(); - return makeHandler(onEmptyMode, jsonValue.OnEmpty().Ptr(), errorMessage, makeOnErrorHandler(onEmptyDefaultCastError)); - }; - - /* - Lambda for handling first type of variant - - ($errorTuple) -> { - if $errorTuple[0] == 0 - then - return onEmptyHandler - else - return onErrorHandler - } - */ - auto errorTupleArgument = ctx.NewArgument(jsonValuePos, "errorTuple"); - auto sqlValueMessage = Build<TCoNth>(ctx, jsonValuePos) - .Tuple(errorTupleArgument) - .Index() - .Build("1") - .Done().Ptr(); - const auto errorLambda = Build<TCoLambda>(ctx, jsonValuePos) - .Args(TExprNode::TListType{errorTupleArgument}) - .Body<TCoIf>() - .Predicate<TCoCmpEqual>() - .Left<TCoNth>() - .Tuple(errorTupleArgument) - .Index() - .Build("0") - .Build() - .Right<TCoUint8>() - .Literal() - .Build("0") - .Build() - .Build() - .ThenValue(makeOnEmptyHandler(sqlValueMessage)) - .ElseValue(makeOnErrorHandler(sqlValueMessage)) - .Build() - .Done().Ptr(); - - // Lambda for handling second type of variant - TExprNode::TPtr sqlValueResultLambda; - if (needCast) { - const auto errorMessage = Build<TCoString>(ctx, jsonValuePos) - .Literal() - .Build(TStringBuilder() << "Failed to cast extracted JSON value to target type " << FormatType(returnTypeAnn)) - .Done().Ptr(); - const auto inputArgument = ctx.NewArgument(jsonValuePos, "sqlValueResult"); - sqlValueResultLambda = Build<TCoLambda>(ctx, jsonValuePos) - .Args(TExprNode::TListType{inputArgument}) - .Body(makeCastOrValue(jsonValuePos, inputArgument, makeOnErrorHandler(errorMessage))) - .Done().Ptr(); - } else { - /* - ($sqlValueResult) -> { - return $sqlValueResult; - } - */ - sqlValueResultLambda = Build<TCoLambda>(ctx, jsonValuePos) - .Args({"sqlValueResult"}) - .Body("sqlValueResult") - .Done().Ptr(); - } - - // Visit call to get the result - const auto visitResult = Build<TCoVisit>(ctx, jsonValuePos) - .Input(sqlValueExpr) - .FreeArgs() - .Add<TCoAtom>() - .Build("0") - .Add(errorLambda) - .Add<TCoAtom>() - .Build("1") - .Add(sqlValueResultLambda) - .Build() - .Done().Ptr(); - - return visitResult; - }; - - map["JsonExists"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) { - /* - Here we rewrite expression - JSON_EXISTS(<json expr>, <jsonpath> [PASSING <variableExpr1> AS <variableName1>, ...] {TRUE | FALSE | UNKNOWN} ON ERROR) - into - Json2::SqlExists(Json2::Parse(<json expr>), Json2::CompilePath(<jsonpath>), <dict with variables>, <on error value>) - and its sibling - JSON_EXISTS(<json expr>, <jsonpath> [PASSING <variableExpr1> AS <variableName1>, ...] ERROR ON ERROR) - into - Json2::SqlTryExists(Json2::Parse(<json expr>), <dict with variables>, Json2::CompilePath(<jsonpath>)) - */ - TCoJsonExists jsonExists(node); - - // <json expr> or Json2::Parse(<json expr>) - EDataSlot jsonDataSlot; - TExprNode::TPtr parseJsonExpr = GetJsonDocumentOrParseJson(jsonExists, ctx, jsonDataSlot); - - // Json2::CompilePath(<jsonPath>) - TExprNode::TPtr compilePathExpr = BuildJsonCompilePath(jsonExists, ctx); - - // Json2::SqlExists(<json>, <compiled jsonpath>, [<default value>]) - // or - // Json2::SqlTryExists(<json>, <compiled jsonpath>) - const bool needThrow = !jsonExists.OnError().IsValid(); - - TString sqlExistsUdfName = "SqlExists"; - if (needThrow) { - sqlExistsUdfName = "SqlTryExists"; - } - - const TTypeAnnotationNode* inputType = nullptr; - if (jsonDataSlot == EDataSlot::JsonDocument) { - inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(EDataSlot::JsonDocument)); - sqlExistsUdfName = "JsonDocument" + sqlExistsUdfName; - } else { - inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")); - } - sqlExistsUdfName = "Json2." + sqlExistsUdfName; - - TTypeAnnotationNode::TListType arguments = { - inputType, - ctx.MakeType<TResourceExprType>("JsonPath") - }; - - if (!needThrow) { - const auto boolType = ctx.MakeType<TDataExprType>(EDataSlot::Bool); - const auto optionalBoolType = ctx.MakeType<TOptionalExprType>(boolType); - arguments.push_back(optionalBoolType); - } - - auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - ctx.MakeType<TTupleExprType>(arguments), - ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), - ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) - }); - - const auto jsonExistsPos = jsonExists.Pos(); - auto sqlExists = Build<TCoUdf>(ctx, jsonExistsPos) - .MethodName() - .Build(sqlExistsUdfName) - .RunConfigValue<TCoVoid>() - .Build() - .UserType(ExpandType(jsonExistsPos, *udfArgumentsType, ctx)) - .Done().Ptr(); - - if (needThrow) { - return Build<TCoApply>(ctx, jsonExistsPos) - .Callable(sqlExists) - .FreeArgs() - .Add(parseJsonExpr) - .Add(compilePathExpr) - .Add(jsonExists.Variables()) - .Build() - .Done().Ptr(); - } - - return Build<TCoApply>(ctx, jsonExistsPos) - .Callable(sqlExists) - .FreeArgs() - .Add(parseJsonExpr) - .Add(compilePathExpr) - .Add(jsonExists.Variables()) - .Add(jsonExists.OnError().Cast()) - .Build() - .Done().Ptr(); - }; - - map["JsonQuery"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { - /* - Here we rewrite expression - JSON_QUERY( - <json expr>, - <jsonpath> - [PASSING <variableExpr1> AS <variableName1>, ...] - [{WITHOUT [ARRAY] | WITH [CONDITIONAL | UNCONDITIONAL] [ARRAY]} WRAPPER] - [{ERROR | NULL | EMPTY ARRAY | EMPTY OBJECT} ON EMPTY] - [{ERROR | NULL | EMPTY ARRAY | EMPTY OBJECT} ON ERROR] - ) - into something like - Json2::SqlQuery...( - Json2::Parse(<json expr>), - Json2::CompilePath(<jsonpath>), - <dict with variables>, - <do we have ERROR ON EMPTY?>, - <default value depending on {NULL | EMPTY ARRAY | EMPTY OBJECT} ON EMPTY>, - <do we have ERROR ON ERROR?>, - <default value depending on {NULL | EMPTY ARRAY | EMPTY OBJECT} ON ERROR> - ) - Exact UDF name is choosen depending on wrap config: - - WITHOUT [ARRAY] WRAPPER -> Json2::SqlQuery - - WITH [UNCONDITIONAL] [ARRAY] WRAPPER -> Json2::SqlQueryWrap - - WITH CONDITIONAL [ARRAY] WRAPPER -> Json2::SqlQueryConditionalWrap - */ - TCoJsonQuery jsonQuery(node); - - // <json expr> or Json2::Parse(<json expr>) - EDataSlot jsonDataSlot; - TExprNode::TPtr parseJsonExpr = GetJsonDocumentOrParseJson(jsonQuery, ctx, jsonDataSlot); - - // Json2::CompilePath(<jsonPath>) - TExprNode::TPtr compilePathExpr = BuildJsonCompilePath(jsonQuery, ctx); - - // Json2::SqlQuery...(<json expr>, <jsonpath>, ...) - const auto wrapMode = FromString<EJsonQueryWrap>(jsonQuery.WrapMode().Ref().Content()); - TString sqlQueryUdfName = "SqlQuery"; - switch (wrapMode) { - case EJsonQueryWrap::NoWrap: - sqlQueryUdfName = "SqlQuery"; - break; - case EJsonQueryWrap::Wrap: - sqlQueryUdfName = "SqlQueryWrap"; - break; - case EJsonQueryWrap::ConditionalWrap: - sqlQueryUdfName = "SqlQueryConditionalWrap"; - break; - } - - const TTypeAnnotationNode* inputType = nullptr; - if (jsonDataSlot == EDataSlot::JsonDocument) { - inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(EDataSlot::JsonDocument)); - sqlQueryUdfName = "JsonDocument" + sqlQueryUdfName; - } else { - inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")); - } - inputType = ctx.MakeType<TOptionalExprType>(inputType); - sqlQueryUdfName = "Json2." + sqlQueryUdfName; - - const auto optionalJsonResourceType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")); - TTypeAnnotationNode::TListType arguments{ - inputType, - ctx.MakeType<TResourceExprType>("JsonPath"), - ctx.MakeType<TDataExprType>(EDataSlot::Bool), - optionalJsonResourceType, - ctx.MakeType<TDataExprType>(EDataSlot::Bool), - optionalJsonResourceType, - }; - - auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - ctx.MakeType<TTupleExprType>(arguments), - ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), - ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) - }); - - auto buildShouldThrow = [&](EJsonQueryHandler handler, TPositionHandle pos) { - return Build<TCoBool>(ctx, pos) - .Literal() - .Build(handler == EJsonQueryHandler::Error ? "true" : "false") - .Done().Ptr(); - }; - - auto buildHandler = [&](EJsonQueryHandler handler, TPositionHandle pos) { - switch (handler) { - case EJsonQueryHandler::Error: - case EJsonQueryHandler::Null: { - // Nothing(Resource<JsonNode>) - return Build<TCoNothing>(ctx, pos) - .OptionalType(ExpandType(pos, *optionalJsonResourceType, ctx)) - .Done().Ptr(); - } - case EJsonQueryHandler::EmptyArray: { - auto value = Build<TCoJson>(ctx, pos) - .Literal() - .Build("[]") - .Done().Ptr(); - return BuildJsonParse(value, ctx); - } - case EJsonQueryHandler::EmptyObject: { - auto value = Build<TCoJson>(ctx, pos) - .Literal() - .Build("{}") - .Done().Ptr(); - return BuildJsonParse(value, ctx); - } - } - }; - - const auto jsonQueryPos = jsonQuery.Pos(); - auto sqlQuery = Build<TCoUdf>(ctx, jsonQueryPos) - .MethodName() - .Build(sqlQueryUdfName) - .RunConfigValue<TCoVoid>() - .Build() - .UserType(ExpandType(jsonQueryPos, *udfArgumentsType, ctx)) - .Done().Ptr(); - - const auto onEmpty = FromString<EJsonQueryHandler>(jsonQuery.OnEmpty().Ref().Content()); - const auto onError = FromString<EJsonQueryHandler>(jsonQuery.OnError().Ref().Content()); - const auto onEmptyPos = jsonQuery.OnEmpty().Pos(); - const auto onErrorPos = jsonQuery.OnError().Pos(); - - auto sqlQueryApply = Build<TCoApply>(ctx, jsonQueryPos) - .Callable(sqlQuery) - .FreeArgs() - .Add(parseJsonExpr) - .Add(compilePathExpr) - .Add(jsonQuery.Variables()) - .Add(buildShouldThrow(onEmpty, onEmptyPos)) - .Add(buildHandler(onEmpty, onEmptyPos)) - .Add(buildShouldThrow(onError, onErrorPos)) - .Add(buildHandler(onError, onErrorPos)) - .Build() - .Done().Ptr(); - - // In this case we need to serialize Resource<JsonNode> to Json type - if (!optCtx.Types->JsonQueryReturnsJsonDocument) { - return BuildJsonSerialize(sqlQueryApply, ctx); - } - - // Now we need to serialize Resource<JsonNode> from sqlQueryApply to JsonDocument - { - auto resourcePos = sqlQueryApply->Pos(); - - auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")), - }); - - auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - argumentsType, - ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), - ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) - }); - - TStringBuf serializeUdfName = "Json2.Serialize"; - if (optCtx.Types->JsonQueryReturnsJsonDocument) { - serializeUdfName = "Json2.SerializeToJsonDocument"; - } - auto parse = Build<TCoUdf>(ctx, resourcePos) - .MethodName() - .Build(serializeUdfName) - .RunConfigValue<TCoVoid>() - .Build() - .UserType(ExpandType(resourcePos, *udfArgumentsType, ctx)) - .Done().Ptr(); - - return Build<TCoApply>(ctx, resourcePos) - .Callable(parse) - .FreeArgs() - .Add(sqlQueryApply) - .Build() - .Done().Ptr(); - } - }; - - map["JsonVariables"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) { - /* - Here we rewrite expression - PASSING - <expr1> as <name1>, - <expr2> as <name2>, - ... - Into something like: - AsDict( - '( <-- tuple creation - <name1>, - Json2::...AsJsonNode(<expr1>) <-- exact name depends on the <expr1> type - ), - '( - <name2>, - Json2::...AsJsonNode(<expr2>) - ), - .... - ) - If <expr> is NULL, it is replaced with Nothing(String). - If <expr> is not Optional, it is wrapped in Just call. - */ - TCoJsonVariables jsonVariables(node); - const auto pos = jsonVariables.Pos(); - - TVector<TExprNode::TPtr> children; - for (const auto& tuple : jsonVariables) { - TExprNode::TPtr name = tuple.Name().Ptr(); - const auto nameUtf8 = Build<TCoUtf8>(ctx, name->Pos()) - .Literal(name) - .Done().Ptr(); - - TExprNode::TPtr payload = tuple.Value().Cast().Ptr(); - auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - payload->GetTypeAnn(), - }); - - auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ - argumentsType, - ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), - ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) - }); - - EDataSlot payloadSlot; - const auto* payloadType = payload->GetTypeAnn(); - if (payloadType->GetKind() == ETypeAnnotationKind::Null) { - // we treat NULL as Nothing(Utf8?) - payloadSlot = EDataSlot::Utf8; - const auto* optionalUtf8 = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(payloadSlot)); - payload = Build<TCoNothing>(ctx, pos) - .OptionalType(ExpandType(pos, *optionalUtf8, ctx)) - .Done().Ptr(); - } else if (payloadType->GetKind() == ETypeAnnotationKind::Optional) { - payloadSlot = payloadType->Cast<TOptionalExprType>()->GetItemType()->Cast<TDataExprType>()->GetSlot(); - } else { - payloadSlot = payloadType->Cast<TDataExprType>()->GetSlot(); - payload = Build<TCoJust>(ctx, pos) - .Input(payload) - .Done().Ptr(); - } - - TStringBuf convertUdfName; - if (IsDataTypeNumeric(payloadSlot) || IsDataTypeDate(payloadSlot)) { - payload = Build<TCoSafeCast>(ctx, pos) - .Value(payload) - .Type(ExpandType(payload->Pos(), *ctx.MakeType<TDataExprType>(EDataSlot::Double), ctx)) - .Done().Ptr(); - convertUdfName = "Json2.DoubleAsJsonNode"; - } else if (payloadSlot == EDataSlot::Utf8) { - convertUdfName = "Json2.Utf8AsJsonNode"; - } else if (payloadSlot == EDataSlot::Bool) { - convertUdfName = "Json2.BoolAsJsonNode"; - } else if (payloadSlot == EDataSlot::Json) { - convertUdfName = "Json2.JsonAsJsonNode"; - } else { - YQL_ENSURE(false, "Unsupported type"); - } - - auto payloadPos = payload->Pos(); - auto convert = Build<TCoUdf>(ctx, payloadPos) - .MethodName() - .Build(convertUdfName) - .RunConfigValue<TCoVoid>() - .Build() - .UserType(ExpandType(payloadPos, *udfArgumentsType, ctx)) - .Done().Ptr(); - - auto applyConvert = Build<TCoApply>(ctx, payloadPos) - .Callable(convert) - .FreeArgs() - .Add(payload) - .Build() - .Done().Ptr(); - - auto pair = ctx.NewList(tuple.Pos(), {nameUtf8, applyConvert}); - children.push_back(pair); - } - - return Build<TCoAsDict>(ctx, pos) - .FreeArgs() - .Add(children) - .Build() - .Done().Ptr(); - }; + + map["JsonValue"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) { + /* + Here we rewrite expression + JSON_VALUE( + <json>, <jsonPath> + [PASSING <variableExpr1> AS <variableName1>, ...] + [RETURNING <resultType>] + [(NULL | DEFAULT <onEmptyExpr>) ON EMPTY] + [(NULL | DEFAULT <onErrorExpr>) ON ERROR] + ) + Generated depends on the <resultType> specified in RETURNING section: + 1. No RETURNING section + Default returning type of JsonValue is Utf8 and it must convert + result of JsonPath expression into Utf8 string. + Json2::SqlValueConvertToUtf8 is used + 2. <resultType> is a numeric type (Int16, Uint16, Float, etc.) + Json2::SqlValueNumber is used with additional CAST to corresponding type + 3. <resultType> is a date type (Date, Datetime, Timestamp) + Json2::SqlValueInt64 is used with additional CAST to corresponding type + 4. <resultType> is Bool + Json2::SqlValueBool is used + 5. <resultType> is String + Json2::SqlValueUtf8 is used with additional CAST to String + 6. <resultType> is Utf8 + Json2::SqlValueUtf8 is used + Returning type of all Json2::SqlValue* functions is Variant<Tuple<Uint8, String?>, <resultType>?>: + 1. If variant holds first type, either error happened or the result is empty. + If first tuple element is 0, result is empty. + If first tuple element is 1, error happened. + Second tuple element contains message that can be displayed to the user. + 2. If variant hold second type, execution was successful and it is a result. + We process result of Json2::SqlValue* function by using Visit callable with lambdas handling each type. + Note that in some cases we need to CAST result of Json2::SqlValue* and it can fail. So: + 1. If the result of Json2::SqlValue* is NULL, we return Nothing(<resultType>) + 2. Otherwise we check the result of SafeCast callable. If it is NULL, cast has failed and it is an error. + If it holds some value, we return it to the user. + If no CAST is needed, we just return the result of Json2::SqlValue*. + What is more, <onEmptyExpr> and <onErrorExpr> must be casted to <resultType> and this CAST can fail too. + ANSI SQL specification is unclear about what to do with this situation. If we failed to cast <onEmptyExpr> to + target type, we return <onErrorExpr>. If we failed to cast <onErrorExpr> to target type, we throw an exception. + + I know all this sounds very clumsy and a lot of logic to handle in s-expressions. If you have a better idea + of a way to handle all this ***, please write to laplab@. + */ + TCoJsonValue jsonValue(node); + + // <json expr> or Json2::Parse(<json expr>) + EDataSlot jsonDataSlot; + TExprNode::TPtr jsonExpr = GetJsonDocumentOrParseJson(jsonValue, ctx, jsonDataSlot); + + // Json2::CompilePath(<jsonPath>) + TExprNode::TPtr compilePathExpr = BuildJsonCompilePath(jsonValue, ctx); + + // Json2::SqlValue...(<parsedJson>, <compiledJsonPath>) + TExprNode::TPtr sqlValueExpr; + const auto returnTypeAnn = node->GetTypeAnn()->Cast<TOptionalExprType>(); + const auto unwrappedSlot = returnTypeAnn->GetItemType()->Cast<TDataExprType>()->GetSlot(); + bool needCast = false; + const auto jsonValuePos = jsonValue.Pos(); + { + TString sqlValueUdfName; + if (IsDataTypeNumeric(unwrappedSlot)) { + sqlValueUdfName = "SqlValueNumber"; + needCast = true; + } else if (IsDataTypeDate(unwrappedSlot)) { + sqlValueUdfName = "SqlValueInt64"; + needCast = true; + } else if (unwrappedSlot == EDataSlot::Utf8 || unwrappedSlot == EDataSlot::String) { + if (jsonValue.ReturningType()) { + sqlValueUdfName = "SqlValueUtf8"; + } else { + sqlValueUdfName = "SqlValueConvertToUtf8"; + } + needCast = unwrappedSlot == EDataSlot::String; + } else if (unwrappedSlot == EDataSlot::Bool) { + sqlValueUdfName = "SqlValueBool"; + } else { + YQL_ENSURE(false, "Unsupported type"); + } + + const TTypeAnnotationNode* inputType = nullptr; + if (jsonDataSlot == EDataSlot::JsonDocument) { + inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(EDataSlot::JsonDocument)); + sqlValueUdfName = "JsonDocument" + sqlValueUdfName; + } else { + inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")); + } + sqlValueUdfName = "Json2." + sqlValueUdfName; + + TTypeAnnotationNode::TListType arguments = { + inputType, + ctx.MakeType<TResourceExprType>("JsonPath") + }; + + auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + ctx.MakeType<TTupleExprType>(arguments), + ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), + ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) + }); + + auto sqlValue = Build<TCoUdf>(ctx, jsonValuePos) + .MethodName() + .Build(sqlValueUdfName) + .RunConfigValue<TCoVoid>() + .Build() + .UserType(ExpandType(jsonValuePos, *udfArgumentsType, ctx)) + .Done().Ptr(); + + sqlValueExpr = Build<TCoApply>(ctx, jsonValuePos) + .Callable(sqlValue) + .FreeArgs() + .Add(jsonExpr) + .Add(compilePathExpr) + .Add(jsonValue.Variables()) + .Build() + .Done().Ptr(); + } + + auto makeCastOrValue = [&](TPositionHandle pos, const TExprNode::TPtr& source, const TExprNode::TPtr& onCastFail) { + /* + if Exists($source) + then + return IfPresent( + CAST($source as <resultType>), + ($x) -> { return Just($x); }, + $onCastFail + ) + else + return Nothing(<resultType>) + */ + TExprNode::TPtr returnTypeNode = ExpandType(pos, *returnTypeAnn, ctx); + return Build<TCoIf>(ctx, pos) + .Predicate<TCoExists>() + .Optional(source) + .Build() + .ThenValue<TCoIfPresent>() + .Optional<TCoSafeCast>() + .Value(source) + .Type(returnTypeNode) + .Build() + .PresentHandler<TCoLambda>() + .Args({"unwrappedValue"}) + .Body<TCoJust>() + .Input("unwrappedValue") + .Build() + .Build() + .MissingValue(onCastFail) + .Build() + .ElseValue<TCoNothing>() + .OptionalType(returnTypeNode) + .Build() + .Done().Ptr(); + }; + + auto makeThrow = [&](TPositionHandle pos, const TExprNode::TPtr& message) { + return Build<TCoEnsure>(ctx, pos) + .Value<TCoNothing>() + .OptionalType(ExpandType(pos, *returnTypeAnn, ctx)) + .Build() + .Predicate<TCoBool>() + .Literal() + .Build("false") + .Build() + .Message(message) + .Done().Ptr(); + }; + + auto makeHandler = [&](EJsonValueHandlerMode mode, const TExprNode::TPtr& node, const TExprNode::TPtr& errorMessage, const TExprNode::TPtr& onCastFail) -> TExprNode::TPtr { + const auto pos = node->Pos(); + if (mode == EJsonValueHandlerMode::Error) { + return makeThrow(pos, errorMessage); + } + + // Convert NULL to Nothing(<resultType>) + if (IsNull(*node)) { + return Build<TCoNothing>(ctx, pos) + .OptionalType(ExpandType(pos, *returnTypeAnn, ctx)) + .Done().Ptr(); + } + + // If type is not Optional, wrap expression in Just call + TExprNode::TPtr result = node; + const auto typeAnn = node->GetTypeAnn(); + if (typeAnn->GetKind() != ETypeAnnotationKind::Optional) { + result = Build<TCoJust>(ctx, pos) + .Input(result) + .Done().Ptr(); + } + + // Perform CAST to <resultType> or return onCastFail + return makeCastOrValue(pos, result, onCastFail); + }; + + const auto onEmptyMode = FromString<EJsonValueHandlerMode>(jsonValue.OnEmptyMode().Ref().Content()); + const auto onErrorMode = FromString<EJsonValueHandlerMode>(jsonValue.OnErrorMode().Ref().Content()); + auto makeOnErrorHandler = [&](const TExprNode::TPtr& errorMessage) { + const auto onError = jsonValue.OnError(); + const auto throwCastError = makeThrow( + onError.Pos(), + Build<TCoString>(ctx, onError.Pos()) + .Literal() + .Build(TStringBuilder() << "Failed to cast default value from ON ERROR clause to target type " << FormatType(returnTypeAnn)) + .Done().Ptr() + ); + + return makeHandler(onErrorMode, onError.Ptr(), errorMessage, throwCastError); + }; + auto makeOnEmptyHandler = [&](const TExprNode::TPtr& errorMessage) { + const auto onEmptyDefaultCastError = Build<TCoString>(ctx, jsonValue.OnEmpty().Pos()) + .Literal() + .Build(TStringBuilder() << "Failed to cast default value from ON EMPTY clause to target type " << FormatType(returnTypeAnn)) + .Done().Ptr(); + return makeHandler(onEmptyMode, jsonValue.OnEmpty().Ptr(), errorMessage, makeOnErrorHandler(onEmptyDefaultCastError)); + }; + + /* + Lambda for handling first type of variant + + ($errorTuple) -> { + if $errorTuple[0] == 0 + then + return onEmptyHandler + else + return onErrorHandler + } + */ + auto errorTupleArgument = ctx.NewArgument(jsonValuePos, "errorTuple"); + auto sqlValueMessage = Build<TCoNth>(ctx, jsonValuePos) + .Tuple(errorTupleArgument) + .Index() + .Build("1") + .Done().Ptr(); + const auto errorLambda = Build<TCoLambda>(ctx, jsonValuePos) + .Args(TExprNode::TListType{errorTupleArgument}) + .Body<TCoIf>() + .Predicate<TCoCmpEqual>() + .Left<TCoNth>() + .Tuple(errorTupleArgument) + .Index() + .Build("0") + .Build() + .Right<TCoUint8>() + .Literal() + .Build("0") + .Build() + .Build() + .ThenValue(makeOnEmptyHandler(sqlValueMessage)) + .ElseValue(makeOnErrorHandler(sqlValueMessage)) + .Build() + .Done().Ptr(); + + // Lambda for handling second type of variant + TExprNode::TPtr sqlValueResultLambda; + if (needCast) { + const auto errorMessage = Build<TCoString>(ctx, jsonValuePos) + .Literal() + .Build(TStringBuilder() << "Failed to cast extracted JSON value to target type " << FormatType(returnTypeAnn)) + .Done().Ptr(); + const auto inputArgument = ctx.NewArgument(jsonValuePos, "sqlValueResult"); + sqlValueResultLambda = Build<TCoLambda>(ctx, jsonValuePos) + .Args(TExprNode::TListType{inputArgument}) + .Body(makeCastOrValue(jsonValuePos, inputArgument, makeOnErrorHandler(errorMessage))) + .Done().Ptr(); + } else { + /* + ($sqlValueResult) -> { + return $sqlValueResult; + } + */ + sqlValueResultLambda = Build<TCoLambda>(ctx, jsonValuePos) + .Args({"sqlValueResult"}) + .Body("sqlValueResult") + .Done().Ptr(); + } + + // Visit call to get the result + const auto visitResult = Build<TCoVisit>(ctx, jsonValuePos) + .Input(sqlValueExpr) + .FreeArgs() + .Add<TCoAtom>() + .Build("0") + .Add(errorLambda) + .Add<TCoAtom>() + .Build("1") + .Add(sqlValueResultLambda) + .Build() + .Done().Ptr(); + + return visitResult; + }; + + map["JsonExists"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) { + /* + Here we rewrite expression + JSON_EXISTS(<json expr>, <jsonpath> [PASSING <variableExpr1> AS <variableName1>, ...] {TRUE | FALSE | UNKNOWN} ON ERROR) + into + Json2::SqlExists(Json2::Parse(<json expr>), Json2::CompilePath(<jsonpath>), <dict with variables>, <on error value>) + and its sibling + JSON_EXISTS(<json expr>, <jsonpath> [PASSING <variableExpr1> AS <variableName1>, ...] ERROR ON ERROR) + into + Json2::SqlTryExists(Json2::Parse(<json expr>), <dict with variables>, Json2::CompilePath(<jsonpath>)) + */ + TCoJsonExists jsonExists(node); + + // <json expr> or Json2::Parse(<json expr>) + EDataSlot jsonDataSlot; + TExprNode::TPtr parseJsonExpr = GetJsonDocumentOrParseJson(jsonExists, ctx, jsonDataSlot); + + // Json2::CompilePath(<jsonPath>) + TExprNode::TPtr compilePathExpr = BuildJsonCompilePath(jsonExists, ctx); + + // Json2::SqlExists(<json>, <compiled jsonpath>, [<default value>]) + // or + // Json2::SqlTryExists(<json>, <compiled jsonpath>) + const bool needThrow = !jsonExists.OnError().IsValid(); + + TString sqlExistsUdfName = "SqlExists"; + if (needThrow) { + sqlExistsUdfName = "SqlTryExists"; + } + + const TTypeAnnotationNode* inputType = nullptr; + if (jsonDataSlot == EDataSlot::JsonDocument) { + inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(EDataSlot::JsonDocument)); + sqlExistsUdfName = "JsonDocument" + sqlExistsUdfName; + } else { + inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")); + } + sqlExistsUdfName = "Json2." + sqlExistsUdfName; + + TTypeAnnotationNode::TListType arguments = { + inputType, + ctx.MakeType<TResourceExprType>("JsonPath") + }; + + if (!needThrow) { + const auto boolType = ctx.MakeType<TDataExprType>(EDataSlot::Bool); + const auto optionalBoolType = ctx.MakeType<TOptionalExprType>(boolType); + arguments.push_back(optionalBoolType); + } + + auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + ctx.MakeType<TTupleExprType>(arguments), + ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), + ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) + }); + + const auto jsonExistsPos = jsonExists.Pos(); + auto sqlExists = Build<TCoUdf>(ctx, jsonExistsPos) + .MethodName() + .Build(sqlExistsUdfName) + .RunConfigValue<TCoVoid>() + .Build() + .UserType(ExpandType(jsonExistsPos, *udfArgumentsType, ctx)) + .Done().Ptr(); + + if (needThrow) { + return Build<TCoApply>(ctx, jsonExistsPos) + .Callable(sqlExists) + .FreeArgs() + .Add(parseJsonExpr) + .Add(compilePathExpr) + .Add(jsonExists.Variables()) + .Build() + .Done().Ptr(); + } + + return Build<TCoApply>(ctx, jsonExistsPos) + .Callable(sqlExists) + .FreeArgs() + .Add(parseJsonExpr) + .Add(compilePathExpr) + .Add(jsonExists.Variables()) + .Add(jsonExists.OnError().Cast()) + .Build() + .Done().Ptr(); + }; + + map["JsonQuery"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { + /* + Here we rewrite expression + JSON_QUERY( + <json expr>, + <jsonpath> + [PASSING <variableExpr1> AS <variableName1>, ...] + [{WITHOUT [ARRAY] | WITH [CONDITIONAL | UNCONDITIONAL] [ARRAY]} WRAPPER] + [{ERROR | NULL | EMPTY ARRAY | EMPTY OBJECT} ON EMPTY] + [{ERROR | NULL | EMPTY ARRAY | EMPTY OBJECT} ON ERROR] + ) + into something like + Json2::SqlQuery...( + Json2::Parse(<json expr>), + Json2::CompilePath(<jsonpath>), + <dict with variables>, + <do we have ERROR ON EMPTY?>, + <default value depending on {NULL | EMPTY ARRAY | EMPTY OBJECT} ON EMPTY>, + <do we have ERROR ON ERROR?>, + <default value depending on {NULL | EMPTY ARRAY | EMPTY OBJECT} ON ERROR> + ) + Exact UDF name is choosen depending on wrap config: + - WITHOUT [ARRAY] WRAPPER -> Json2::SqlQuery + - WITH [UNCONDITIONAL] [ARRAY] WRAPPER -> Json2::SqlQueryWrap + - WITH CONDITIONAL [ARRAY] WRAPPER -> Json2::SqlQueryConditionalWrap + */ + TCoJsonQuery jsonQuery(node); + + // <json expr> or Json2::Parse(<json expr>) + EDataSlot jsonDataSlot; + TExprNode::TPtr parseJsonExpr = GetJsonDocumentOrParseJson(jsonQuery, ctx, jsonDataSlot); + + // Json2::CompilePath(<jsonPath>) + TExprNode::TPtr compilePathExpr = BuildJsonCompilePath(jsonQuery, ctx); + + // Json2::SqlQuery...(<json expr>, <jsonpath>, ...) + const auto wrapMode = FromString<EJsonQueryWrap>(jsonQuery.WrapMode().Ref().Content()); + TString sqlQueryUdfName = "SqlQuery"; + switch (wrapMode) { + case EJsonQueryWrap::NoWrap: + sqlQueryUdfName = "SqlQuery"; + break; + case EJsonQueryWrap::Wrap: + sqlQueryUdfName = "SqlQueryWrap"; + break; + case EJsonQueryWrap::ConditionalWrap: + sqlQueryUdfName = "SqlQueryConditionalWrap"; + break; + } + + const TTypeAnnotationNode* inputType = nullptr; + if (jsonDataSlot == EDataSlot::JsonDocument) { + inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(EDataSlot::JsonDocument)); + sqlQueryUdfName = "JsonDocument" + sqlQueryUdfName; + } else { + inputType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")); + } + inputType = ctx.MakeType<TOptionalExprType>(inputType); + sqlQueryUdfName = "Json2." + sqlQueryUdfName; + + const auto optionalJsonResourceType = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")); + TTypeAnnotationNode::TListType arguments{ + inputType, + ctx.MakeType<TResourceExprType>("JsonPath"), + ctx.MakeType<TDataExprType>(EDataSlot::Bool), + optionalJsonResourceType, + ctx.MakeType<TDataExprType>(EDataSlot::Bool), + optionalJsonResourceType, + }; + + auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + ctx.MakeType<TTupleExprType>(arguments), + ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), + ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) + }); + + auto buildShouldThrow = [&](EJsonQueryHandler handler, TPositionHandle pos) { + return Build<TCoBool>(ctx, pos) + .Literal() + .Build(handler == EJsonQueryHandler::Error ? "true" : "false") + .Done().Ptr(); + }; + + auto buildHandler = [&](EJsonQueryHandler handler, TPositionHandle pos) { + switch (handler) { + case EJsonQueryHandler::Error: + case EJsonQueryHandler::Null: { + // Nothing(Resource<JsonNode>) + return Build<TCoNothing>(ctx, pos) + .OptionalType(ExpandType(pos, *optionalJsonResourceType, ctx)) + .Done().Ptr(); + } + case EJsonQueryHandler::EmptyArray: { + auto value = Build<TCoJson>(ctx, pos) + .Literal() + .Build("[]") + .Done().Ptr(); + return BuildJsonParse(value, ctx); + } + case EJsonQueryHandler::EmptyObject: { + auto value = Build<TCoJson>(ctx, pos) + .Literal() + .Build("{}") + .Done().Ptr(); + return BuildJsonParse(value, ctx); + } + } + }; + + const auto jsonQueryPos = jsonQuery.Pos(); + auto sqlQuery = Build<TCoUdf>(ctx, jsonQueryPos) + .MethodName() + .Build(sqlQueryUdfName) + .RunConfigValue<TCoVoid>() + .Build() + .UserType(ExpandType(jsonQueryPos, *udfArgumentsType, ctx)) + .Done().Ptr(); + + const auto onEmpty = FromString<EJsonQueryHandler>(jsonQuery.OnEmpty().Ref().Content()); + const auto onError = FromString<EJsonQueryHandler>(jsonQuery.OnError().Ref().Content()); + const auto onEmptyPos = jsonQuery.OnEmpty().Pos(); + const auto onErrorPos = jsonQuery.OnError().Pos(); + + auto sqlQueryApply = Build<TCoApply>(ctx, jsonQueryPos) + .Callable(sqlQuery) + .FreeArgs() + .Add(parseJsonExpr) + .Add(compilePathExpr) + .Add(jsonQuery.Variables()) + .Add(buildShouldThrow(onEmpty, onEmptyPos)) + .Add(buildHandler(onEmpty, onEmptyPos)) + .Add(buildShouldThrow(onError, onErrorPos)) + .Add(buildHandler(onError, onErrorPos)) + .Build() + .Done().Ptr(); + + // In this case we need to serialize Resource<JsonNode> to Json type + if (!optCtx.Types->JsonQueryReturnsJsonDocument) { + return BuildJsonSerialize(sqlQueryApply, ctx); + } + + // Now we need to serialize Resource<JsonNode> from sqlQueryApply to JsonDocument + { + auto resourcePos = sqlQueryApply->Pos(); + + auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + ctx.MakeType<TOptionalExprType>(ctx.MakeType<TResourceExprType>("JsonNode")), + }); + + auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + argumentsType, + ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), + ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) + }); + + TStringBuf serializeUdfName = "Json2.Serialize"; + if (optCtx.Types->JsonQueryReturnsJsonDocument) { + serializeUdfName = "Json2.SerializeToJsonDocument"; + } + auto parse = Build<TCoUdf>(ctx, resourcePos) + .MethodName() + .Build(serializeUdfName) + .RunConfigValue<TCoVoid>() + .Build() + .UserType(ExpandType(resourcePos, *udfArgumentsType, ctx)) + .Done().Ptr(); + + return Build<TCoApply>(ctx, resourcePos) + .Callable(parse) + .FreeArgs() + .Add(sqlQueryApply) + .Build() + .Done().Ptr(); + } + }; + + map["JsonVariables"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) { + /* + Here we rewrite expression + PASSING + <expr1> as <name1>, + <expr2> as <name2>, + ... + Into something like: + AsDict( + '( <-- tuple creation + <name1>, + Json2::...AsJsonNode(<expr1>) <-- exact name depends on the <expr1> type + ), + '( + <name2>, + Json2::...AsJsonNode(<expr2>) + ), + .... + ) + If <expr> is NULL, it is replaced with Nothing(String). + If <expr> is not Optional, it is wrapped in Just call. + */ + TCoJsonVariables jsonVariables(node); + const auto pos = jsonVariables.Pos(); + + TVector<TExprNode::TPtr> children; + for (const auto& tuple : jsonVariables) { + TExprNode::TPtr name = tuple.Name().Ptr(); + const auto nameUtf8 = Build<TCoUtf8>(ctx, name->Pos()) + .Literal(name) + .Done().Ptr(); + + TExprNode::TPtr payload = tuple.Value().Cast().Ptr(); + auto argumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + payload->GetTypeAnn(), + }); + + auto udfArgumentsType = ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ + argumentsType, + ctx.MakeType<TStructExprType>(TVector<const TItemExprType*>{}), + ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{}) + }); + + EDataSlot payloadSlot; + const auto* payloadType = payload->GetTypeAnn(); + if (payloadType->GetKind() == ETypeAnnotationKind::Null) { + // we treat NULL as Nothing(Utf8?) + payloadSlot = EDataSlot::Utf8; + const auto* optionalUtf8 = ctx.MakeType<TOptionalExprType>(ctx.MakeType<TDataExprType>(payloadSlot)); + payload = Build<TCoNothing>(ctx, pos) + .OptionalType(ExpandType(pos, *optionalUtf8, ctx)) + .Done().Ptr(); + } else if (payloadType->GetKind() == ETypeAnnotationKind::Optional) { + payloadSlot = payloadType->Cast<TOptionalExprType>()->GetItemType()->Cast<TDataExprType>()->GetSlot(); + } else { + payloadSlot = payloadType->Cast<TDataExprType>()->GetSlot(); + payload = Build<TCoJust>(ctx, pos) + .Input(payload) + .Done().Ptr(); + } + + TStringBuf convertUdfName; + if (IsDataTypeNumeric(payloadSlot) || IsDataTypeDate(payloadSlot)) { + payload = Build<TCoSafeCast>(ctx, pos) + .Value(payload) + .Type(ExpandType(payload->Pos(), *ctx.MakeType<TDataExprType>(EDataSlot::Double), ctx)) + .Done().Ptr(); + convertUdfName = "Json2.DoubleAsJsonNode"; + } else if (payloadSlot == EDataSlot::Utf8) { + convertUdfName = "Json2.Utf8AsJsonNode"; + } else if (payloadSlot == EDataSlot::Bool) { + convertUdfName = "Json2.BoolAsJsonNode"; + } else if (payloadSlot == EDataSlot::Json) { + convertUdfName = "Json2.JsonAsJsonNode"; + } else { + YQL_ENSURE(false, "Unsupported type"); + } + + auto payloadPos = payload->Pos(); + auto convert = Build<TCoUdf>(ctx, payloadPos) + .MethodName() + .Build(convertUdfName) + .RunConfigValue<TCoVoid>() + .Build() + .UserType(ExpandType(payloadPos, *udfArgumentsType, ctx)) + .Done().Ptr(); + + auto applyConvert = Build<TCoApply>(ctx, payloadPos) + .Callable(convert) + .FreeArgs() + .Add(payload) + .Build() + .Done().Ptr(); + + auto pair = ctx.NewList(tuple.Pos(), {nameUtf8, applyConvert}); + children.push_back(pair); + } + + return Build<TCoAsDict>(ctx, pos) + .FreeArgs() + .Add(children) + .Build() + .Done().Ptr(); + }; map["CalcOverWindow"] = map["CalcOverSessionWindow"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) { TCoCalcOverWindowBase self(node); diff --git a/ydb/library/yql/core/expr_nodes/yql_expr_nodes.json b/ydb/library/yql/core/expr_nodes/yql_expr_nodes.json index edfcaa1530..29d8d3ce44 100644 --- a/ydb/library/yql/core/expr_nodes/yql_expr_nodes.json +++ b/ydb/library/yql/core/expr_nodes/yql_expr_nodes.json @@ -946,11 +946,11 @@ "Match": {"Type": "Callable", "Name": "AsStruct"} }, { - "Name": "TCoAsDict", - "Base": "TFreeArgCallable", - "Match": {"Type": "Callable", "Name": "AsDict"} - }, - { + "Name": "TCoAsDict", + "Base": "TFreeArgCallable", + "Match": {"Type": "Callable", "Name": "AsDict"} + }, + { "Name": "TCoToDict", "Base": "TCallable", "Match": {"Type": "Callable", "Name": "ToDict"}, @@ -1234,12 +1234,12 @@ "Match": {"Type": "Callable", "Name": "Udf"}, "Children": [ {"Index": 0, "Name": "MethodName", "Type": "TCoAtom"}, - {"Index": 1, "Name": "RunConfigValue", "Type": "TExprBase", "Optional": true}, - {"Index": 2, "Name": "UserType", "Type": "TExprBase", "Optional": true}, - {"Index": 3, "Name": "TypeConfig", "Type": "TCoAtom", "Optional": true}, - {"Index": 4, "Name": "CachedCallableType", "Type": "TExprBase", "Optional": true}, - {"Index": 5, "Name": "CachedRunConfigType", "Type": "TExprBase", "Optional": true}, - {"Index": 6, "Name": "FileAlias", "Type": "TCoAtom", "Optional": true} + {"Index": 1, "Name": "RunConfigValue", "Type": "TExprBase", "Optional": true}, + {"Index": 2, "Name": "UserType", "Type": "TExprBase", "Optional": true}, + {"Index": 3, "Name": "TypeConfig", "Type": "TCoAtom", "Optional": true}, + {"Index": 4, "Name": "CachedCallableType", "Type": "TExprBase", "Optional": true}, + {"Index": 5, "Name": "CachedRunConfigType", "Type": "TExprBase", "Optional": true}, + {"Index": 6, "Name": "FileAlias", "Type": "TCoAtom", "Optional": true} ] }, { @@ -1785,7 +1785,7 @@ "Match": {"Type": "Callable", "Name": "SafeCast"}, "Children": [ {"Index": 0, "Name": "Value", "Type": "TExprBase"}, - {"Index": 1, "Name": "Type", "Type": "TExprBase"} + {"Index": 1, "Name": "Type", "Type": "TExprBase"} ] }, { @@ -1896,62 +1896,62 @@ "Children": [ {"Index": 0, "Name": "Name", "Type": "TCoAtom"} ] - }, - { - "Name": "TCoEnsure", - "Base": "TCallable", - "Match": {"Type": "Callable", "Name": "Ensure"}, - "Children": [ - {"Index": 0, "Name": "Value", "Type": "TExprBase"}, - {"Index": 1, "Name": "Predicate", "Type": "TExprBase"}, - {"Index": 2, "Name": "Message", "Type": "TExprBase", "Optional": true} - ] - }, - { - "Name": "TCoJsonVariables", - "VarArgBase": "TCoNameValueTuple", - "Match": {"Type": "Callable", "Name": "JsonVariables"} - }, - { - "Name": "TCoJsonQueryBase", - "Base": "TCallable", - "Match": {"Type": "CallableBase"}, - "Builder": {"Generate": "None"}, - "Children": [ - {"Index": 0, "Name": "Json", "Type": "TExprBase"}, - {"Index": 1, "Name": "JsonPath", "Type": "TExprBase"}, - {"Index": 2, "Name": "Variables", "Type": "TExprBase"} - ] - }, - { - "Name": "TCoJsonValue", - "Base": "TCoJsonQueryBase", - "Match": {"Type": "Callable", "Name": "JsonValue"}, - "Children": [ - {"Index": 3, "Name": "OnEmptyMode", "Type": "TCoAtom"}, - {"Index": 4, "Name": "OnEmpty", "Type": "TExprBase"}, - {"Index": 5, "Name": "OnErrorMode", "Type": "TCoAtom"}, - {"Index": 6, "Name": "OnError", "Type": "TExprBase"}, - {"Index": 7, "Name": "ReturningType", "Type": "TExprBase", "Optional": true} - ] - }, - { - "Name": "TCoJsonExists", - "Base": "TCoJsonQueryBase", - "Match": {"Type": "Callable", "Name": "JsonExists"}, - "Children": [ - {"Index": 3, "Name": "OnError", "Type": "TExprBase", "Optional": true} - ] - }, - { - "Name": "TCoJsonQuery", - "Base": "TCoJsonQueryBase", - "Match": {"Type": "Callable", "Name": "JsonQuery"}, - "Children": [ - {"Index": 3, "Name": "WrapMode", "Type": "TCoAtom"}, - {"Index": 4, "Name": "OnEmpty", "Type": "TCoAtom"}, - {"Index": 5, "Name": "OnError", "Type": "TCoAtom"} - ] + }, + { + "Name": "TCoEnsure", + "Base": "TCallable", + "Match": {"Type": "Callable", "Name": "Ensure"}, + "Children": [ + {"Index": 0, "Name": "Value", "Type": "TExprBase"}, + {"Index": 1, "Name": "Predicate", "Type": "TExprBase"}, + {"Index": 2, "Name": "Message", "Type": "TExprBase", "Optional": true} + ] + }, + { + "Name": "TCoJsonVariables", + "VarArgBase": "TCoNameValueTuple", + "Match": {"Type": "Callable", "Name": "JsonVariables"} + }, + { + "Name": "TCoJsonQueryBase", + "Base": "TCallable", + "Match": {"Type": "CallableBase"}, + "Builder": {"Generate": "None"}, + "Children": [ + {"Index": 0, "Name": "Json", "Type": "TExprBase"}, + {"Index": 1, "Name": "JsonPath", "Type": "TExprBase"}, + {"Index": 2, "Name": "Variables", "Type": "TExprBase"} + ] + }, + { + "Name": "TCoJsonValue", + "Base": "TCoJsonQueryBase", + "Match": {"Type": "Callable", "Name": "JsonValue"}, + "Children": [ + {"Index": 3, "Name": "OnEmptyMode", "Type": "TCoAtom"}, + {"Index": 4, "Name": "OnEmpty", "Type": "TExprBase"}, + {"Index": 5, "Name": "OnErrorMode", "Type": "TCoAtom"}, + {"Index": 6, "Name": "OnError", "Type": "TExprBase"}, + {"Index": 7, "Name": "ReturningType", "Type": "TExprBase", "Optional": true} + ] + }, + { + "Name": "TCoJsonExists", + "Base": "TCoJsonQueryBase", + "Match": {"Type": "Callable", "Name": "JsonExists"}, + "Children": [ + {"Index": 3, "Name": "OnError", "Type": "TExprBase", "Optional": true} + ] + }, + { + "Name": "TCoJsonQuery", + "Base": "TCoJsonQueryBase", + "Match": {"Type": "Callable", "Name": "JsonQuery"}, + "Children": [ + {"Index": 3, "Name": "WrapMode", "Type": "TCoAtom"}, + {"Index": 4, "Name": "OnEmpty", "Type": "TCoAtom"}, + {"Index": 5, "Name": "OnError", "Type": "TCoAtom"} + ] }, { "Name": "TCoAsRange", diff --git a/ydb/library/yql/core/issue/protos/issue_id.proto b/ydb/library/yql/core/issue/protos/issue_id.proto index a2ed91d640..cc1ad93387 100644 --- a/ydb/library/yql/core/issue/protos/issue_id.proto +++ b/ydb/library/yql/core/issue/protos/issue_id.proto @@ -129,7 +129,7 @@ message TIssuesIds { YQL_UNUSED_SYMBOL = 4527; YQL_MIXED_TZ = 4528; YQL_OPERATION_WILL_RETURN_NULL = 4529; - YQL_JSON_QUERY_RETURNING_JSON_IS_DEPRECATED = 4530; + YQL_JSON_QUERY_RETURNING_JSON_IS_DEPRECATED = 4530; YQL_DEPRECATED_LIST_FLATMAP_OPTIONAL = 4531; YQL_PROJECTION_ALIAS_IS_REFERENCED_IN_GROUP_BY = 4532; YQL_TABLE_BINDING_DUPLICATE = 4533; @@ -137,32 +137,32 @@ message TIssuesIds { // yql parser errors YQL_NOT_ALLOWED_IN_DISCOVERY = 4600; -// jsonpath errors - JSONPATH_PARSE_ERROR = 4700; - JSONPATH_EXPECTED_OBJECT = 4701; - JSONPATH_MEMBER_NOT_FOUND = 4702; - JSONPATH_EXPECTED_ARRAY = 4703; - JSONPATH_INVALID_ARRAY_INDEX = 4704; - JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS = 4705; - JSONPATH_INVALID_ARRAY_INDEX_RANGE = 4706; - JSONPATH_LAST_OUTSIDE_OF_ARRAY_SUBSCRIPT = 4707; - JSONPATH_INVALID_UNARY_OPERATION_ARGUMENT_TYPE = 4708; - JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT = 4709; - JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT_TYPE = 4710; - JSONPATH_UNDEFINED_VARIABLE = 4711; - JSONPATH_DIVISION_BY_ZERO = 4712; - JSONPATH_BINARY_OPERATION_RESULT_INFINITY = 4713; - JSONPATH_INVALID_LOGICAL_OPERATION_ARGUMENT = 4714; - JSONPATH_FILTER_OBJECT_OUTSIDE_OF_FILTER = 4715; - JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT = 4716; - JSONPATH_INVALID_DOUBLE_METHOD_ARGUMENT = 4717; - JSONPATH_INVALID_NUMBER_STRING = 4718; - JSONPATH_INFINITE_NUMBER_STRING = 4719; - JSONPATH_INVALID_KEYVALUE_METHOD_ARGUMENT = 4720; - JSONPATH_TYPE_CHECK_ERROR = 4721; - JSONPATH_INVALID_IS_UNKNOWN_ARGUMENT = 4722; - JSONPATH_INVALID_STARTS_WITH_ARGUMENT = 4723; - +// jsonpath errors + JSONPATH_PARSE_ERROR = 4700; + JSONPATH_EXPECTED_OBJECT = 4701; + JSONPATH_MEMBER_NOT_FOUND = 4702; + JSONPATH_EXPECTED_ARRAY = 4703; + JSONPATH_INVALID_ARRAY_INDEX = 4704; + JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS = 4705; + JSONPATH_INVALID_ARRAY_INDEX_RANGE = 4706; + JSONPATH_LAST_OUTSIDE_OF_ARRAY_SUBSCRIPT = 4707; + JSONPATH_INVALID_UNARY_OPERATION_ARGUMENT_TYPE = 4708; + JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT = 4709; + JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT_TYPE = 4710; + JSONPATH_UNDEFINED_VARIABLE = 4711; + JSONPATH_DIVISION_BY_ZERO = 4712; + JSONPATH_BINARY_OPERATION_RESULT_INFINITY = 4713; + JSONPATH_INVALID_LOGICAL_OPERATION_ARGUMENT = 4714; + JSONPATH_FILTER_OBJECT_OUTSIDE_OF_FILTER = 4715; + JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT = 4716; + JSONPATH_INVALID_DOUBLE_METHOD_ARGUMENT = 4717; + JSONPATH_INVALID_NUMBER_STRING = 4718; + JSONPATH_INFINITE_NUMBER_STRING = 4719; + JSONPATH_INVALID_KEYVALUE_METHOD_ARGUMENT = 4720; + JSONPATH_TYPE_CHECK_ERROR = 4721; + JSONPATH_INVALID_IS_UNKNOWN_ARGUMENT = 4722; + JSONPATH_INVALID_STARTS_WITH_ARGUMENT = 4723; + // stat STAT_DEPRECATED_STRING_TREE = 5000; STAT_ACCESS_DENIED = 5001; diff --git a/ydb/library/yql/core/issue/yql_issue.txt b/ydb/library/yql/core/issue/yql_issue.txt index 897d5d6f9a..c608e6b77d 100644 --- a/ydb/library/yql/core/issue/yql_issue.txt +++ b/ydb/library/yql/core/issue/yql_issue.txt @@ -444,103 +444,103 @@ ids { code: YQL_NOT_ALLOWED_IN_DISCOVERY severity: S_ERROR } -ids { - code: JSONPATH_PARSE_ERROR - severity: S_ERROR -} -ids { - code: JSONPATH_EXPECTED_OBJECT - severity: S_ERROR -} -ids { - code: JSONPATH_MEMBER_NOT_FOUND - severity: S_ERROR -} -ids { - code: JSONPATH_EXPECTED_ARRAY - severity: S_ERROR -} -ids { - code: JSONPATH_INVALID_ARRAY_INDEX - severity: S_ERROR -} -ids { - code: JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS - severity: S_ERROR -} -ids { - code: JSONPATH_INVALID_ARRAY_INDEX_RANGE - severity: S_ERROR -} -ids { - code: JSONPATH_LAST_OUTSIDE_OF_ARRAY_SUBSCRIPT - severity: S_ERROR -} -ids { - code: JSONPATH_INVALID_UNARY_OPERATION_ARGUMENT_TYPE - severity: S_ERROR -} -ids { - code: JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT - severity: S_ERROR -} -ids { - code: JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT_TYPE - severity: S_ERROR -} -ids { - code: JSONPATH_UNDEFINED_VARIABLE - severity: S_ERROR -} -ids { - code: JSONPATH_DIVISION_BY_ZERO - severity: S_ERROR -} -ids { - code: JSONPATH_BINARY_OPERATION_RESULT_INFINITY - severity: S_ERROR -} -ids { - code: JSONPATH_INVALID_LOGICAL_OPERATION_ARGUMENT - severity: S_ERROR -} -ids { - code: JSONPATH_FILTER_OBJECT_OUTSIDE_OF_FILTER - severity: S_ERROR -} -ids { - code: JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT - severity: S_ERROR -} -ids { - code: JSONPATH_INVALID_DOUBLE_METHOD_ARGUMENT - severity: S_ERROR -} -ids { - code: JSONPATH_INVALID_NUMBER_STRING - severity: S_ERROR -} -ids { - code: JSONPATH_INFINITE_NUMBER_STRING - severity: S_ERROR -} -ids { - code: JSONPATH_INVALID_KEYVALUE_METHOD_ARGUMENT - severity: S_ERROR -} -ids { - code: JSONPATH_TYPE_CHECK_ERROR - severity: S_ERROR -} -ids { - code: JSONPATH_INVALID_IS_UNKNOWN_ARGUMENT - severity: S_ERROR -} -ids { - code: JSONPATH_INVALID_STARTS_WITH_ARGUMENT - severity: S_ERROR -} -ids { +ids { + code: JSONPATH_PARSE_ERROR + severity: S_ERROR +} +ids { + code: JSONPATH_EXPECTED_OBJECT + severity: S_ERROR +} +ids { + code: JSONPATH_MEMBER_NOT_FOUND + severity: S_ERROR +} +ids { + code: JSONPATH_EXPECTED_ARRAY + severity: S_ERROR +} +ids { + code: JSONPATH_INVALID_ARRAY_INDEX + severity: S_ERROR +} +ids { + code: JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS + severity: S_ERROR +} +ids { + code: JSONPATH_INVALID_ARRAY_INDEX_RANGE + severity: S_ERROR +} +ids { + code: JSONPATH_LAST_OUTSIDE_OF_ARRAY_SUBSCRIPT + severity: S_ERROR +} +ids { + code: JSONPATH_INVALID_UNARY_OPERATION_ARGUMENT_TYPE + severity: S_ERROR +} +ids { + code: JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT + severity: S_ERROR +} +ids { + code: JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT_TYPE + severity: S_ERROR +} +ids { + code: JSONPATH_UNDEFINED_VARIABLE + severity: S_ERROR +} +ids { + code: JSONPATH_DIVISION_BY_ZERO + severity: S_ERROR +} +ids { + code: JSONPATH_BINARY_OPERATION_RESULT_INFINITY + severity: S_ERROR +} +ids { + code: JSONPATH_INVALID_LOGICAL_OPERATION_ARGUMENT + severity: S_ERROR +} +ids { + code: JSONPATH_FILTER_OBJECT_OUTSIDE_OF_FILTER + severity: S_ERROR +} +ids { + code: JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT + severity: S_ERROR +} +ids { + code: JSONPATH_INVALID_DOUBLE_METHOD_ARGUMENT + severity: S_ERROR +} +ids { + code: JSONPATH_INVALID_NUMBER_STRING + severity: S_ERROR +} +ids { + code: JSONPATH_INFINITE_NUMBER_STRING + severity: S_ERROR +} +ids { + code: JSONPATH_INVALID_KEYVALUE_METHOD_ARGUMENT + severity: S_ERROR +} +ids { + code: JSONPATH_TYPE_CHECK_ERROR + severity: S_ERROR +} +ids { + code: JSONPATH_INVALID_IS_UNKNOWN_ARGUMENT + severity: S_ERROR +} +ids { + code: JSONPATH_INVALID_STARTS_WITH_ARGUMENT + severity: S_ERROR +} +ids { code: YQL_EMPTY_WINDOW_FRAME severity: S_WARNING } @@ -580,10 +580,10 @@ ids { code: YQL_OPERATION_WILL_RETURN_NULL severity: S_WARNING } -ids { - code: YQL_JSON_QUERY_RETURNING_JSON_IS_DEPRECATED - severity: S_WARNING -} +ids { + code: YQL_JSON_QUERY_RETURNING_JSON_IS_DEPRECATED + severity: S_WARNING +} ids { code: YQL_DEPRECATED_LIST_FLATMAP_OPTIONAL severity: S_WARNING diff --git a/ydb/library/yql/core/type_ann/type_ann_core.cpp b/ydb/library/yql/core/type_ann/type_ann_core.cpp index 5846e6cb10..b2453e744c 100644 --- a/ydb/library/yql/core/type_ann/type_ann_core.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_core.cpp @@ -25,7 +25,7 @@ #include <ydb/library/yql/minikql/mkql_program_builder.h> #include <ydb/library/yql/minikql/mkql_type_ops.h> -#include <util/generic/serialized_enum.h> +#include <util/generic/serialized_enum.h> #include <util/generic/singleton.h> #include <util/generic/strbuf.h> #include <util/generic/maybe.h> @@ -337,17 +337,17 @@ namespace NTypeAnnImpl { .Build(), ctx.MakeType<TDataExprType>(EDataSlot::TzTimestamp) }; } - if (resType->GetTag() == "JsonNode") { - return { ctx.Builder(input->Pos()) - .Callable("Apply") - .Callable(0, "Udf") + if (resType->GetTag() == "JsonNode") { + return { ctx.Builder(input->Pos()) + .Callable("Apply") + .Callable(0, "Udf") .Atom(0, "Json2.Serialize", TNodeFlags::Default) - .Seal() - .Add(1, input) - .Seal() - .Build(), ctx.MakeType<TDataExprType>(EDataSlot::Json) }; - } - + .Seal() + .Add(1, input) + .Seal() + .Build(), ctx.MakeType<TDataExprType>(EDataSlot::Json) }; + } + return { nullptr, nullptr }; } @@ -457,60 +457,60 @@ namespace NTypeAnnImpl { } } - bool EnsureJsonQueryFunction(const NNodes::TCoJsonQueryBase& function, TContext& ctx) { - // first argument must be "Json", "Json?", "JsonDocument" or "JsonDocument?" type - const auto& jsonArg = function.Json().Ref(); - bool isOptional; - const TDataExprType* dataType; - if (!EnsureDataOrOptionalOfData(jsonArg, isOptional, dataType, ctx.Expr)) { - return false; - } - - if (dataType->GetSlot() != EDataSlot::Json && dataType->GetSlot() != EDataSlot::JsonDocument) { - ctx.Expr.AddError(TIssue( - ctx.Expr.GetPosition(jsonArg.Pos()), - TStringBuilder() << "Expected Json, Json?, JsonDocument or JsonDocument?, but got: " << *jsonArg.GetTypeAnn() - )); - return false; - } - - // second argument must be "Utf8" type - const auto& jsonPathArg = function.JsonPath().Ref(); - if (!EnsureSpecificDataType(jsonPathArg, EDataSlot::Utf8, ctx.Expr)) { - return false; - } - - // third argument must be "Dict" type - const auto& variablesArg = function.Variables().Ref(); - if (!variablesArg.GetTypeAnn()) { - ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(variablesArg.Pos()), "Expected dict, but got lambda")); - return false; - } - - if (variablesArg.GetTypeAnn()->GetKind() == ETypeAnnotationKind::EmptyDict) { - return true; - } - - if (!EnsureDictType(variablesArg, ctx.Expr)) { - return false; - } - - const TDictExprType* dictType = variablesArg.GetTypeAnn()->Cast<TDictExprType>(); - - if (!EnsureSpecificDataType(variablesArg.Pos(), *dictType->GetKeyType(), EDataSlot::Utf8, ctx.Expr)) { - return false; - } - - const auto* payloadType = dictType->GetPayloadType(); - if (payloadType->GetKind() != ETypeAnnotationKind::Resource - || payloadType->Cast<TResourceExprType>()->GetTag() != "JsonNode") { - ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(variablesArg.Pos()), TStringBuilder() << "Dict payload type must be Resource<'JsonNode'>, not " << *payloadType)); - return false; - } - - return true; - } - + bool EnsureJsonQueryFunction(const NNodes::TCoJsonQueryBase& function, TContext& ctx) { + // first argument must be "Json", "Json?", "JsonDocument" or "JsonDocument?" type + const auto& jsonArg = function.Json().Ref(); + bool isOptional; + const TDataExprType* dataType; + if (!EnsureDataOrOptionalOfData(jsonArg, isOptional, dataType, ctx.Expr)) { + return false; + } + + if (dataType->GetSlot() != EDataSlot::Json && dataType->GetSlot() != EDataSlot::JsonDocument) { + ctx.Expr.AddError(TIssue( + ctx.Expr.GetPosition(jsonArg.Pos()), + TStringBuilder() << "Expected Json, Json?, JsonDocument or JsonDocument?, but got: " << *jsonArg.GetTypeAnn() + )); + return false; + } + + // second argument must be "Utf8" type + const auto& jsonPathArg = function.JsonPath().Ref(); + if (!EnsureSpecificDataType(jsonPathArg, EDataSlot::Utf8, ctx.Expr)) { + return false; + } + + // third argument must be "Dict" type + const auto& variablesArg = function.Variables().Ref(); + if (!variablesArg.GetTypeAnn()) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(variablesArg.Pos()), "Expected dict, but got lambda")); + return false; + } + + if (variablesArg.GetTypeAnn()->GetKind() == ETypeAnnotationKind::EmptyDict) { + return true; + } + + if (!EnsureDictType(variablesArg, ctx.Expr)) { + return false; + } + + const TDictExprType* dictType = variablesArg.GetTypeAnn()->Cast<TDictExprType>(); + + if (!EnsureSpecificDataType(variablesArg.Pos(), *dictType->GetKeyType(), EDataSlot::Utf8, ctx.Expr)) { + return false; + } + + const auto* payloadType = dictType->GetPayloadType(); + if (payloadType->GetKind() != ETypeAnnotationKind::Resource + || payloadType->Cast<TResourceExprType>()->GetTag() != "JsonNode") { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(variablesArg.Pos()), TStringBuilder() << "Dict payload type must be Resource<'JsonNode'>, not " << *payloadType)); + return false; + } + + return true; + } + typedef std::function<IGraphTransformer::TStatus(const TExprNode::TPtr&, TExprNode::TPtr&, TContext& ctx)> TAnnotationFunc; @@ -705,8 +705,8 @@ namespace NTypeAnnImpl { return IGraphTransformer::TStatus::Error; } - } else if (input->Content() == "JsonDocument") { - // check will be performed in JsonDocument callable + } else if (input->Content() == "JsonDocument") { + // check will be performed in JsonDocument callable } else if (input->Content() == "DyNumber") { if (!NKikimr::NMiniKQL::IsValidStringValue(EDataSlot::DyNumber, input->Head().Content())) { ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), TStringBuilder() << "Bad atom format for type: " @@ -11867,241 +11867,241 @@ template <NKikimr::NUdf::EDataSlot DataSlot> return IGraphTransformer::TStatus::Ok; } - IGraphTransformer::TStatus JsonValueWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { - Y_UNUSED(output); - - using NNodes::TCoJsonValue; - if (!EnsureMinArgsCount(*input, 7, ctx.Expr) - || !EnsureMaxArgsCount(*input, 8, ctx.Expr) - || !EnsureAtom(*input->Child(TCoJsonValue::idx_OnEmptyMode), ctx.Expr) - || !EnsureAtom(*input->Child(TCoJsonValue::idx_OnErrorMode), ctx.Expr)) { - return IGraphTransformer::TStatus::Error; - } - + IGraphTransformer::TStatus JsonValueWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + Y_UNUSED(output); + + using NNodes::TCoJsonValue; + if (!EnsureMinArgsCount(*input, 7, ctx.Expr) + || !EnsureMaxArgsCount(*input, 8, ctx.Expr) + || !EnsureAtom(*input->Child(TCoJsonValue::idx_OnEmptyMode), ctx.Expr) + || !EnsureAtom(*input->Child(TCoJsonValue::idx_OnErrorMode), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + if (TCoJsonValue::idx_ReturningType < input->ChildrenSize()) { auto status = EnsureTypeRewrite(input->ChildRef(TCoJsonValue::idx_ReturningType), ctx.Expr); if (status != IGraphTransformer::TStatus::Ok) { return status; } - } - - TCoJsonValue jsonValue(input); - - // check first 3 common arguments - if (!EnsureJsonQueryFunction(jsonValue, ctx)) { - return IGraphTransformer::TStatus::Error; - } - - // default return value type is "Utf8?" - EDataSlot resultSlot = EDataSlot::Utf8; - - // check if user provided custom return value type - const auto& returningTypeArg = jsonValue.ReturningType(); - if (returningTypeArg) { - const auto* returningTypeAnn = returningTypeArg.Ref().GetTypeAnn()->Cast<TTypeExprType>()->GetType(); - if (!EnsureDataType(returningTypeArg.Ref().Pos(), *returningTypeAnn, ctx.Expr)) { - return IGraphTransformer::TStatus::Error; - } - resultSlot = returningTypeAnn->Cast<TDataExprType>()->GetSlot(); - - if (!IsDataTypeNumeric(resultSlot) - && !IsDataTypeDate(resultSlot) - && resultSlot != EDataSlot::Utf8 - && resultSlot != EDataSlot::String - && resultSlot != EDataSlot::Bool) { - ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), "Returning argument of JsonValue callable supports only Utf8, String, Bool, date and numeric types")); - return IGraphTransformer::TStatus::Error; - } - } - - // ON ERROR and ON EMPTY values must be castable to resultSlot or "Null" - auto isValidCaseHandler = [&] (const TExprNode& node) { - const auto* typeAnn = node.GetTypeAnn(); - if (!typeAnn) { - ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(node.Pos()), "Expected computable value, but got lambda")); - return false; - } - - if (IsNull(node)) { - return true; - } - - bool isOptional; - const TDataExprType* dataType; - if (!EnsureDataOrOptionalOfData(node, isOptional, dataType, ctx.Expr)) { - return false; - } - - const auto handlerSlot = dataType->GetSlot(); - const auto castResult = GetCastResult(handlerSlot, resultSlot); - if (!castResult.Defined() || *castResult == NUdf::ECastOptions::Impossible) { - ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(node.Pos()), - TStringBuilder() << "Cannot cast type of case handler " << handlerSlot << " to the returning type of JSON_VALUE " << resultSlot)); - return false; - } - - return true; - }; - - if (!isValidCaseHandler(jsonValue.OnEmpty().Ref()) || !isValidCaseHandler(jsonValue.OnError().Ref())) { - return IGraphTransformer::TStatus::Error; - } - - // make returning type optional - const TTypeAnnotationNode* resultType = ctx.Expr.MakeType<TDataExprType>(resultSlot); - input->SetTypeAnn(ctx.Expr.MakeType<TOptionalExprType>(resultType)); - return IGraphTransformer::TStatus::Ok; - } - - IGraphTransformer::TStatus JsonExistsWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { - Y_UNUSED(output); - - if (!EnsureMinArgsCount(*input, 3, ctx.Expr) || !EnsureMaxArgsCount(*input, 4, ctx.Expr)) { - return IGraphTransformer::TStatus::Error; - } - - NNodes::TCoJsonExists jsonExists(input); - - // check first 3 common arguments - if (!EnsureJsonQueryFunction(jsonExists, ctx)) { - return IGraphTransformer::TStatus::Error; - } - - // onError argument if present must be "Bool?" type - if (jsonExists.OnError()) { - const auto& onErrorArg = jsonExists.OnError().Ref(); - - if (!EnsureOptionalType(onErrorArg, ctx.Expr)) { - return IGraphTransformer::TStatus::Error; - } - - const auto optionalTypeAnn = onErrorArg.GetTypeAnn(); - if (!optionalTypeAnn) { - ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(onErrorArg.Pos()), "Expected optional Bool, but got lambda")); - return IGraphTransformer::TStatus::Error; - } - - const auto underlyingType = optionalTypeAnn->Cast<TOptionalExprType>()->GetItemType(); - if (!EnsureSpecificDataType(onErrorArg.Pos(), *underlyingType, EDataSlot::Bool, ctx.Expr)) { - return IGraphTransformer::TStatus::Error; - } - } - - // make returning type optional - const TTypeAnnotationNode* resultType = ctx.Expr.MakeType<TDataExprType>(EDataSlot::Bool); - input->SetTypeAnn(ctx.Expr.MakeType<TOptionalExprType>(resultType)); - return IGraphTransformer::TStatus::Ok; - } - - IGraphTransformer::TStatus JsonQueryWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx) { - Y_UNUSED(output); - - if (!EnsureArgsCount(*input, 6, ctx.Expr)) { - return IGraphTransformer::TStatus::Error; - } - - using NNodes::TCoJsonQuery; - if (!EnsureAtom(*input->Child(TCoJsonQuery::idx_WrapMode), ctx.Expr) - || !EnsureAtom(*input->Child(TCoJsonQuery::idx_OnEmpty), ctx.Expr) - || !EnsureAtom(*input->Child(TCoJsonQuery::idx_OnError), ctx.Expr)) { - return IGraphTransformer::TStatus::Error; - } - - TCoJsonQuery jsonQuery(input); - - // check first 3 common arguments - if (!EnsureJsonQueryFunction(jsonQuery, ctx)) { - return IGraphTransformer::TStatus::Error; - } - - const auto& wrapModeArg = jsonQuery.WrapMode().Ref(); - EJsonQueryWrap wrapMode; - if (!TryFromString(wrapModeArg.Content(), wrapMode)) { - ctx.Expr.AddError(TIssue( - ctx.Expr.GetPosition(input->Pos()), - TStringBuilder() << "Invalid value for WrapMode argument. Available options are: " << GetEnumAllNames<EJsonQueryWrap>() - )); - return IGraphTransformer::TStatus::Error; - } - - const auto& onEmptyArg = jsonQuery.OnEmpty().Ref(); - EJsonQueryHandler onEmpty; - if (!TryFromString(onEmptyArg.Content(), onEmpty)) { - ctx.Expr.AddError(TIssue( - ctx.Expr.GetPosition(input->Pos()), - TStringBuilder() << "Invalid value for OnEmpty argument. Available options are: " << GetEnumAllNames<EJsonQueryHandler>() - )); - return IGraphTransformer::TStatus::Error; - } - - const auto& onErrorArg = jsonQuery.OnError().Ref(); - EJsonQueryHandler onError; - if (!TryFromString(onErrorArg.Content(), onError)) { - ctx.Expr.AddError(TIssue( - ctx.Expr.GetPosition(input->Pos()), - TStringBuilder() << "Invalid value for OnError argument. Available options are: " << GetEnumAllNames<EJsonQueryHandler>() - )); - return IGraphTransformer::TStatus::Error; - } - - // make returning type optional - EDataSlot returnType = EDataSlot::JsonDocument; - if (!ctx.Types.JsonQueryReturnsJsonDocument) { - auto issue = TIssue( - ctx.Expr.GetPosition(input->Pos()), - "JSON_QUERY returning Json type is deprecated. Please use PRAGMA JsonQueryReturnsJsonDocument; to " - "make JSON_QUERY return JsonDocument type. It will be turned on by default soon" - ); - SetIssueCode(EYqlIssueCode::TIssuesIds_EIssueCode_YQL_JSON_QUERY_RETURNING_JSON_IS_DEPRECATED, issue); - if (!ctx.Expr.AddWarning(issue)) { - return IGraphTransformer::TStatus::Error; - } - returnType = EDataSlot::Json; - } - const TTypeAnnotationNode* resultType = ctx.Expr.MakeType<TDataExprType>(returnType); - input->SetTypeAnn(ctx.Expr.MakeType<TOptionalExprType>(resultType)); - return IGraphTransformer::TStatus::Ok; - } - - IGraphTransformer::TStatus JsonVariablesWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { - Y_UNUSED(output); - for (size_t i = 0; i < input->ChildrenSize(); i++) { - const auto& tuple = input->Child(i); - - using NNodes::TCoNameValueTuple; - if (!EnsureTuple(*tuple, ctx.Expr) || !EnsureTupleSize(*tuple, 2, ctx.Expr) || !EnsureAtom(*tuple->Child(TCoNameValueTuple::idx_Name), ctx.Expr)) { - return IGraphTransformer::TStatus::Error; - } - - TCoNameValueTuple nameValueTuple(tuple); - const auto& variableValue = nameValueTuple.Value().Ref(); - if (IsNull(variableValue)) { - continue; - } - - bool isOptional; - const TDataExprType* valueType; - if (!EnsureDataOrOptionalOfData(variableValue, isOptional, valueType, ctx.Expr)) { - return IGraphTransformer::TStatus::Error; - } - - const auto valueSlot = valueType->GetSlot(); - if (!IsDataTypeNumeric(valueSlot) - && !IsDataTypeDate(valueSlot) - && valueSlot != EDataSlot::Utf8 - && valueSlot != EDataSlot::Bool - && valueSlot != EDataSlot::Json) { - ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), "You can pass only values of Utf8, Bool, Json, date and numeric types for jsonpath variables")); - return IGraphTransformer::TStatus::Error; - } - } - - const auto* keyType = ctx.Expr.MakeType<TDataExprType>(EDataSlot::Utf8); - const auto* payloadType = ctx.Expr.MakeType<TResourceExprType>("JsonNode"); - input->SetTypeAnn(ctx.Expr.MakeType<TDictExprType>(keyType, payloadType)); - return IGraphTransformer::TStatus::Ok; - } - + } + + TCoJsonValue jsonValue(input); + + // check first 3 common arguments + if (!EnsureJsonQueryFunction(jsonValue, ctx)) { + return IGraphTransformer::TStatus::Error; + } + + // default return value type is "Utf8?" + EDataSlot resultSlot = EDataSlot::Utf8; + + // check if user provided custom return value type + const auto& returningTypeArg = jsonValue.ReturningType(); + if (returningTypeArg) { + const auto* returningTypeAnn = returningTypeArg.Ref().GetTypeAnn()->Cast<TTypeExprType>()->GetType(); + if (!EnsureDataType(returningTypeArg.Ref().Pos(), *returningTypeAnn, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + resultSlot = returningTypeAnn->Cast<TDataExprType>()->GetSlot(); + + if (!IsDataTypeNumeric(resultSlot) + && !IsDataTypeDate(resultSlot) + && resultSlot != EDataSlot::Utf8 + && resultSlot != EDataSlot::String + && resultSlot != EDataSlot::Bool) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), "Returning argument of JsonValue callable supports only Utf8, String, Bool, date and numeric types")); + return IGraphTransformer::TStatus::Error; + } + } + + // ON ERROR and ON EMPTY values must be castable to resultSlot or "Null" + auto isValidCaseHandler = [&] (const TExprNode& node) { + const auto* typeAnn = node.GetTypeAnn(); + if (!typeAnn) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(node.Pos()), "Expected computable value, but got lambda")); + return false; + } + + if (IsNull(node)) { + return true; + } + + bool isOptional; + const TDataExprType* dataType; + if (!EnsureDataOrOptionalOfData(node, isOptional, dataType, ctx.Expr)) { + return false; + } + + const auto handlerSlot = dataType->GetSlot(); + const auto castResult = GetCastResult(handlerSlot, resultSlot); + if (!castResult.Defined() || *castResult == NUdf::ECastOptions::Impossible) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(node.Pos()), + TStringBuilder() << "Cannot cast type of case handler " << handlerSlot << " to the returning type of JSON_VALUE " << resultSlot)); + return false; + } + + return true; + }; + + if (!isValidCaseHandler(jsonValue.OnEmpty().Ref()) || !isValidCaseHandler(jsonValue.OnError().Ref())) { + return IGraphTransformer::TStatus::Error; + } + + // make returning type optional + const TTypeAnnotationNode* resultType = ctx.Expr.MakeType<TDataExprType>(resultSlot); + input->SetTypeAnn(ctx.Expr.MakeType<TOptionalExprType>(resultType)); + return IGraphTransformer::TStatus::Ok; + } + + IGraphTransformer::TStatus JsonExistsWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + Y_UNUSED(output); + + if (!EnsureMinArgsCount(*input, 3, ctx.Expr) || !EnsureMaxArgsCount(*input, 4, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + NNodes::TCoJsonExists jsonExists(input); + + // check first 3 common arguments + if (!EnsureJsonQueryFunction(jsonExists, ctx)) { + return IGraphTransformer::TStatus::Error; + } + + // onError argument if present must be "Bool?" type + if (jsonExists.OnError()) { + const auto& onErrorArg = jsonExists.OnError().Ref(); + + if (!EnsureOptionalType(onErrorArg, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + const auto optionalTypeAnn = onErrorArg.GetTypeAnn(); + if (!optionalTypeAnn) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(onErrorArg.Pos()), "Expected optional Bool, but got lambda")); + return IGraphTransformer::TStatus::Error; + } + + const auto underlyingType = optionalTypeAnn->Cast<TOptionalExprType>()->GetItemType(); + if (!EnsureSpecificDataType(onErrorArg.Pos(), *underlyingType, EDataSlot::Bool, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + } + + // make returning type optional + const TTypeAnnotationNode* resultType = ctx.Expr.MakeType<TDataExprType>(EDataSlot::Bool); + input->SetTypeAnn(ctx.Expr.MakeType<TOptionalExprType>(resultType)); + return IGraphTransformer::TStatus::Ok; + } + + IGraphTransformer::TStatus JsonQueryWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx) { + Y_UNUSED(output); + + if (!EnsureArgsCount(*input, 6, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + using NNodes::TCoJsonQuery; + if (!EnsureAtom(*input->Child(TCoJsonQuery::idx_WrapMode), ctx.Expr) + || !EnsureAtom(*input->Child(TCoJsonQuery::idx_OnEmpty), ctx.Expr) + || !EnsureAtom(*input->Child(TCoJsonQuery::idx_OnError), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + TCoJsonQuery jsonQuery(input); + + // check first 3 common arguments + if (!EnsureJsonQueryFunction(jsonQuery, ctx)) { + return IGraphTransformer::TStatus::Error; + } + + const auto& wrapModeArg = jsonQuery.WrapMode().Ref(); + EJsonQueryWrap wrapMode; + if (!TryFromString(wrapModeArg.Content(), wrapMode)) { + ctx.Expr.AddError(TIssue( + ctx.Expr.GetPosition(input->Pos()), + TStringBuilder() << "Invalid value for WrapMode argument. Available options are: " << GetEnumAllNames<EJsonQueryWrap>() + )); + return IGraphTransformer::TStatus::Error; + } + + const auto& onEmptyArg = jsonQuery.OnEmpty().Ref(); + EJsonQueryHandler onEmpty; + if (!TryFromString(onEmptyArg.Content(), onEmpty)) { + ctx.Expr.AddError(TIssue( + ctx.Expr.GetPosition(input->Pos()), + TStringBuilder() << "Invalid value for OnEmpty argument. Available options are: " << GetEnumAllNames<EJsonQueryHandler>() + )); + return IGraphTransformer::TStatus::Error; + } + + const auto& onErrorArg = jsonQuery.OnError().Ref(); + EJsonQueryHandler onError; + if (!TryFromString(onErrorArg.Content(), onError)) { + ctx.Expr.AddError(TIssue( + ctx.Expr.GetPosition(input->Pos()), + TStringBuilder() << "Invalid value for OnError argument. Available options are: " << GetEnumAllNames<EJsonQueryHandler>() + )); + return IGraphTransformer::TStatus::Error; + } + + // make returning type optional + EDataSlot returnType = EDataSlot::JsonDocument; + if (!ctx.Types.JsonQueryReturnsJsonDocument) { + auto issue = TIssue( + ctx.Expr.GetPosition(input->Pos()), + "JSON_QUERY returning Json type is deprecated. Please use PRAGMA JsonQueryReturnsJsonDocument; to " + "make JSON_QUERY return JsonDocument type. It will be turned on by default soon" + ); + SetIssueCode(EYqlIssueCode::TIssuesIds_EIssueCode_YQL_JSON_QUERY_RETURNING_JSON_IS_DEPRECATED, issue); + if (!ctx.Expr.AddWarning(issue)) { + return IGraphTransformer::TStatus::Error; + } + returnType = EDataSlot::Json; + } + const TTypeAnnotationNode* resultType = ctx.Expr.MakeType<TDataExprType>(returnType); + input->SetTypeAnn(ctx.Expr.MakeType<TOptionalExprType>(resultType)); + return IGraphTransformer::TStatus::Ok; + } + + IGraphTransformer::TStatus JsonVariablesWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + Y_UNUSED(output); + for (size_t i = 0; i < input->ChildrenSize(); i++) { + const auto& tuple = input->Child(i); + + using NNodes::TCoNameValueTuple; + if (!EnsureTuple(*tuple, ctx.Expr) || !EnsureTupleSize(*tuple, 2, ctx.Expr) || !EnsureAtom(*tuple->Child(TCoNameValueTuple::idx_Name), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + TCoNameValueTuple nameValueTuple(tuple); + const auto& variableValue = nameValueTuple.Value().Ref(); + if (IsNull(variableValue)) { + continue; + } + + bool isOptional; + const TDataExprType* valueType; + if (!EnsureDataOrOptionalOfData(variableValue, isOptional, valueType, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + const auto valueSlot = valueType->GetSlot(); + if (!IsDataTypeNumeric(valueSlot) + && !IsDataTypeDate(valueSlot) + && valueSlot != EDataSlot::Utf8 + && valueSlot != EDataSlot::Bool + && valueSlot != EDataSlot::Json) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), "You can pass only values of Utf8, Bool, Json, date and numeric types for jsonpath variables")); + return IGraphTransformer::TStatus::Error; + } + } + + const auto* keyType = ctx.Expr.MakeType<TDataExprType>(EDataSlot::Utf8); + const auto* payloadType = ctx.Expr.MakeType<TResourceExprType>("JsonNode"); + input->SetTypeAnn(ctx.Expr.MakeType<TDictExprType>(keyType, payloadType)); + return IGraphTransformer::TStatus::Ok; + } + bool IsValidTypeForRanges(const TTypeAnnotationNode* type) { YQL_ENSURE(type); if (type->GetKind() != ETypeAnnotationKind::Optional) { @@ -13234,10 +13234,10 @@ template <NKikimr::NUdf::EDataSlot DataSlot> ExtFunctions["CurrentAuthenticatedUser"] = &CurrentAuthenticatedUserWrapper; ExtFunctions["SecureParam"] = &SecureParamWrapper; ExtFunctions["UnsafeTimestampCast"] = &UnsafeTimestampCastWrapper; - ExtFunctions["JsonValue"] = &JsonValueWrapper; - ExtFunctions["JsonExists"] = &JsonExistsWrapper; - ExtFunctions["JsonQuery"] = &JsonQueryWrapper; - ExtFunctions["JsonVariables"] = &JsonVariablesWrapper; + ExtFunctions["JsonValue"] = &JsonValueWrapper; + ExtFunctions["JsonExists"] = &JsonExistsWrapper; + ExtFunctions["JsonQuery"] = &JsonQueryWrapper; + ExtFunctions["JsonVariables"] = &JsonVariablesWrapper; ExtFunctions["AssumeColumnOrder"] = &AssumeColumnOrderWrapper; ExtFunctions["AssumeColumnOrderPartial"] = &AssumeColumnOrderWrapper; ExtFunctions["UnionAllPositional"] = &UnionAllPositionalWrapper; diff --git a/ydb/library/yql/core/ya.make b/ydb/library/yql/core/ya.make index 29169996f5..3338658060 100644 --- a/ydb/library/yql/core/ya.make +++ b/ydb/library/yql/core/ya.make @@ -7,7 +7,7 @@ OWNER( ) SRCS( - yql_atom_enums.h + yql_atom_enums.h yql_callable_transform.cpp yql_callable_transform.h yql_csv.cpp @@ -84,7 +84,7 @@ GENERATE_ENUM_SERIALIZATION(yql_data_provider.h) GENERATE_ENUM_SERIALIZATION(yql_user_data.h) -GENERATE_ENUM_SERIALIZATION(yql_atom_enums.h) +GENERATE_ENUM_SERIALIZATION(yql_atom_enums.h) YQL_LAST_ABI_VERSION() diff --git a/ydb/library/yql/core/yql_atom_enums.h b/ydb/library/yql/core/yql_atom_enums.h index 38a6859a33..5d9ab7598c 100644 --- a/ydb/library/yql/core/yql_atom_enums.h +++ b/ydb/library/yql/core/yql_atom_enums.h @@ -1,21 +1,21 @@ -#pragma once - -namespace NYql { - enum class EJsonQueryWrap { - NoWrap = 0, - Wrap = 1, - ConditionalWrap = 2, - }; - - enum class EJsonQueryHandler { - Null = 0, - Error = 1, - EmptyArray = 2, - EmptyObject = 3, - }; - - enum class EJsonValueHandlerMode { - Error = 0, - DefaultValue = 1, - }; +#pragma once + +namespace NYql { + enum class EJsonQueryWrap { + NoWrap = 0, + Wrap = 1, + ConditionalWrap = 2, + }; + + enum class EJsonQueryHandler { + Null = 0, + Error = 1, + EmptyArray = 2, + EmptyObject = 3, + }; + + enum class EJsonValueHandlerMode { + Error = 0, + DefaultValue = 1, + }; } diff --git a/ydb/library/yql/core/yql_expr_type_annotation.cpp b/ydb/library/yql/core/yql_expr_type_annotation.cpp index f2b793af8d..30dbaac046 100644 --- a/ydb/library/yql/core/yql_expr_type_annotation.cpp +++ b/ydb/library/yql/core/yql_expr_type_annotation.cpp @@ -231,30 +231,30 @@ IGraphTransformer::TStatus TryConvertToImpl(TExprContext& ctx, TExprNode::TPtr& .Build(); return IGraphTransformer::TStatus::Repeat; - } else if (fromSlot == EDataSlot::Json && to == "JsonNode") { - node = ctx.Builder(node->Pos()) - .Callable("Apply") - .Callable(0, "Udf") - .Atom(0, "Json2.Parse", TNodeFlags::Default) - .Callable(1, "Void") - .Seal() - .Callable(2, "TupleType") - .Callable(0, "TupleType") - .Callable(0, "DataType") - .Atom(0, sourceType.Cast<TDataExprType>()->GetName(), TNodeFlags::Default) - .Seal() - .Seal() - .Callable(1, "StructType") - .Seal() - .Callable(2, "TupleType") - .Seal() - .Seal() - .Seal() - .Add(1, std::move(node)) - .Seal() - .Build(); - - return IGraphTransformer::TStatus::Repeat; + } else if (fromSlot == EDataSlot::Json && to == "JsonNode") { + node = ctx.Builder(node->Pos()) + .Callable("Apply") + .Callable(0, "Udf") + .Atom(0, "Json2.Parse", TNodeFlags::Default) + .Callable(1, "Void") + .Seal() + .Callable(2, "TupleType") + .Callable(0, "TupleType") + .Callable(0, "DataType") + .Atom(0, sourceType.Cast<TDataExprType>()->GetName(), TNodeFlags::Default) + .Seal() + .Seal() + .Callable(1, "StructType") + .Seal() + .Callable(2, "TupleType") + .Seal() + .Seal() + .Seal() + .Add(1, std::move(node)) + .Seal() + .Build(); + + return IGraphTransformer::TStatus::Repeat; } } else if (expectedType.GetKind() == ETypeAnnotationKind::Data && sourceType.GetKind() == ETypeAnnotationKind::Data) { const auto from = sourceType.Cast<TDataExprType>()->GetSlot(); diff --git a/ydb/library/yql/core/yql_type_annotation.h b/ydb/library/yql/core/yql_type_annotation.h index 1a1f1fb476..eb36e68e8f 100644 --- a/ydb/library/yql/core/yql_type_annotation.h +++ b/ydb/library/yql/core/yql_type_annotation.h @@ -219,7 +219,7 @@ struct TTypeAnnotationContext: public TThrRefBase { bool DqCaptured = false; // TODO: Add before/after recapture transformers TString DqFallbackPolicy = ""; bool StrictTableProps = true; - bool JsonQueryReturnsJsonDocument = false; + bool JsonQueryReturnsJsonDocument = false; ui32 FolderSubDirsLimit = 1000; // compatibility with v0 or raw s-expression code diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_frombytes.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_frombytes.cpp index 16f9109b70..a8436227eb 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_frombytes.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_frombytes.cpp @@ -7,7 +7,7 @@ #include <ydb/library/yql/utils/swap_bytes.h> #include <ydb/library/binary_json/read.h> - + #include <util/system/unaligned_mem.h> namespace NKikimr { @@ -97,14 +97,14 @@ public: return NUdf::TUnboxedValuePod(); } - - case NUdf::EDataSlot::JsonDocument: { - if (!NBinaryJson::IsValidBinaryJson(TStringBuf(data.AsStringRef()))) { - return NUdf::TUnboxedValuePod(); - } - return data.Release(); - } - + + case NUdf::EDataSlot::JsonDocument: { + if (!NBinaryJson::IsValidBinaryJson(TStringBuf(data.AsStringRef()))) { + return NUdf::TUnboxedValuePod(); + } + return data.Release(); + } + default: if (IsValidValue(SchemeType, data)) { return data.Release(); diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_tobytes.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_tobytes.cpp index 876eef4ad6..d14336880c 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_tobytes.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_tobytes.cpp @@ -58,7 +58,7 @@ public: return NUdf::TUnboxedValuePod::Embedded(NUdf::TStringRef(buf, sizeof(buf))); } }; - + class TToBytesWrapper : public TDecoratorCodegeneratorNode<TToBytesWrapper> { using TBaseComputation = TDecoratorCodegeneratorNode<TToBytesWrapper>; public: diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_pack.cpp b/ydb/library/yql/minikql/computation/mkql_computation_node_pack.cpp index 625a269eaf..17bce8151b 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_pack.cpp +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_pack.cpp @@ -918,7 +918,7 @@ TValuePacker::TProperties TValuePacker::ScanTypeProperties(const TType* type) { case NUdf::EDataSlot::Json: case NUdf::EDataSlot::Yson: case NUdf::EDataSlot::Utf8: - case NUdf::EDataSlot::JsonDocument: + case NUdf::EDataSlot::JsonDocument: // Reuse entire packed value length for strings props.Set(EProps::UseTopLength); break; diff --git a/ydb/library/yql/minikql/dom/json.cpp b/ydb/library/yql/minikql/dom/json.cpp index 853df56cab..0021c69487 100644 --- a/ydb/library/yql/minikql/dom/json.cpp +++ b/ydb/library/yql/minikql/dom/json.cpp @@ -1,27 +1,27 @@ -#include "json.h" -#include "node.h" - +#include "json.h" +#include "node.h" + #include <library/cpp/containers/stack_vector/stack_vec.h> #include <library/cpp/json/json_reader.h> #include <library/cpp/json/json_writer.h> - -#include <util/stream/input.h> -#include <util/stream/str.h> -#include <util/generic/stack.h> -#include <util/system/yassert.h> -#include <util/system/compiler.h> - -#include <cmath> + +#include <util/stream/input.h> +#include <util/stream/str.h> +#include <util/generic/stack.h> +#include <util/system/yassert.h> +#include <util/system/compiler.h> + +#include <cmath> #include <ctype.h> - -namespace NYql::NDom { - -using namespace NUdf; -using namespace NJson; - -namespace { - + +namespace NYql::NDom { + +using namespace NUdf; +using namespace NJson; + +namespace { + size_t AsciiSize(const TStringBuf& str) { size_t s = 0U; while (s < str.size() && isascii(str[s])) @@ -64,113 +64,113 @@ TString DecodeUtf(const TStringBuf& str, size_t from) } template<bool DecodeUtf8> -class TDomCallbacks : public TJsonCallbacks { -public: - TDomCallbacks(const IValueBuilder* valueBuilder, bool throwException) - : TJsonCallbacks(throwException) - , ValueBuilder(valueBuilder) - { - Result.push({}); - } - - bool OnNull() override { - return PushToCurrentCollection(MakeEntity()); - } - - bool OnBoolean(bool value) override { - return PushToCurrentCollection(MakeBool(value)); - } - - bool OnInteger(long long value) override { - return PushToCurrentCollection(MakeInt64(static_cast<i64>(value))); - } - - bool OnUInteger(unsigned long long value) override { - return PushToCurrentCollection(MakeUint64(static_cast<ui64>(value))); - } - - bool OnDouble(double value) override { - if (Y_UNLIKELY(std::isinf(value))) { - ythrow yexception() << "JSON number is infinite"; - } - - return PushToCurrentCollection(MakeDouble(value)); - } - - bool OnString(const TStringBuf& value) override { +class TDomCallbacks : public TJsonCallbacks { +public: + TDomCallbacks(const IValueBuilder* valueBuilder, bool throwException) + : TJsonCallbacks(throwException) + , ValueBuilder(valueBuilder) + { + Result.push({}); + } + + bool OnNull() override { + return PushToCurrentCollection(MakeEntity()); + } + + bool OnBoolean(bool value) override { + return PushToCurrentCollection(MakeBool(value)); + } + + bool OnInteger(long long value) override { + return PushToCurrentCollection(MakeInt64(static_cast<i64>(value))); + } + + bool OnUInteger(unsigned long long value) override { + return PushToCurrentCollection(MakeUint64(static_cast<ui64>(value))); + } + + bool OnDouble(double value) override { + if (Y_UNLIKELY(std::isinf(value))) { + ythrow yexception() << "JSON number is infinite"; + } + + return PushToCurrentCollection(MakeDouble(value)); + } + + bool OnString(const TStringBuf& value) override { if constexpr (DecodeUtf8) { if (const auto from = AsciiSize(value); from < value.size()) { return PushToCurrentCollection(MakeString(DecodeUtf(value, from), ValueBuilder)); } } - return PushToCurrentCollection(MakeString(value, ValueBuilder)); - } - - bool OnOpenMap() override { - return OnCollectionOpen(); - } - - bool OnMapKey(const TStringBuf& value) override { - return OnString(value); - } - - bool OnCloseMap() override { + return PushToCurrentCollection(MakeString(value, ValueBuilder)); + } + + bool OnOpenMap() override { + return OnCollectionOpen(); + } + + bool OnMapKey(const TStringBuf& value) override { + return OnString(value); + } + + bool OnCloseMap() override { Y_VERIFY_DEBUG(!Result.empty()); - auto& items = Result.top(); + auto& items = Result.top(); Y_VERIFY_DEBUG(items.size() % 2 == 0); - - TSmallVec<TPair, TStdAllocatorForUdf<TPair>> pairs; - for (size_t i = 0; i < items.size(); i += 2) { - pairs.emplace_back(std::move(items[i]), std::move(items[i + 1])); - } - - Result.pop(); - return PushToCurrentCollection(MakeDict(pairs.data(), pairs.size())); - } - - bool OnOpenArray() override { - return OnCollectionOpen(); - } - - bool OnCloseArray() override { - Y_VERIFY_DEBUG(!Result.empty()); - auto& items = Result.top(); - TUnboxedValue list = MakeList(items.data(), items.size(), ValueBuilder); - Result.pop(); - return PushToCurrentCollection(std::move(list)); - } - - bool OnEnd() override { - return IsResultSingle(); - } - - TUnboxedValue GetResult() && { - Y_VERIFY_DEBUG(IsResultSingle()); - return std::move(Result.top()[0]); - } - -private: - bool OnCollectionOpen() { + + TSmallVec<TPair, TStdAllocatorForUdf<TPair>> pairs; + for (size_t i = 0; i < items.size(); i += 2) { + pairs.emplace_back(std::move(items[i]), std::move(items[i + 1])); + } + + Result.pop(); + return PushToCurrentCollection(MakeDict(pairs.data(), pairs.size())); + } + + bool OnOpenArray() override { + return OnCollectionOpen(); + } + + bool OnCloseArray() override { + Y_VERIFY_DEBUG(!Result.empty()); + auto& items = Result.top(); + TUnboxedValue list = MakeList(items.data(), items.size(), ValueBuilder); + Result.pop(); + return PushToCurrentCollection(std::move(list)); + } + + bool OnEnd() override { + return IsResultSingle(); + } + + TUnboxedValue GetResult() && { + Y_VERIFY_DEBUG(IsResultSingle()); + return std::move(Result.top()[0]); + } + +private: + bool OnCollectionOpen() { Result.emplace(); - return true; - } - - bool PushToCurrentCollection(TUnboxedValue&& value) { - Y_VERIFY_DEBUG(!Result.empty()); + return true; + } + + bool PushToCurrentCollection(TUnboxedValue&& value) { + Y_VERIFY_DEBUG(!Result.empty()); Result.top().emplace_back(std::move(value)); - return true; - } - - bool IsResultSingle() { - return Result.size() == 1 && Result.top().size() == 1; - } - - const IValueBuilder* ValueBuilder; - - using TUnboxedValues = TSmallVec<TUnboxedValue, TStdAllocatorForUdf<TUnboxedValue>>; + return true; + } + + bool IsResultSingle() { + return Result.size() == 1 && Result.top().size() == 1; + } + + const IValueBuilder* ValueBuilder; + + using TUnboxedValues = TSmallVec<TUnboxedValue, TStdAllocatorForUdf<TUnboxedValue>>; std::stack<TUnboxedValues, TSmallVec<TUnboxedValues, TStdAllocatorForUdf<TUnboxedValues>>> Result; -}; - +}; + class TTestCallbacks : public TJsonCallbacks { public: TTestCallbacks() @@ -289,17 +289,17 @@ void WriteValue(const TUnboxedValuePod value, TJsonWriter& writer) { case ENodeType::Dict: return WriteMap<SkipMapEntity, EncodeUtf8>(value, writer); case ENodeType::Attr: - writer.OpenMap(); + writer.OpenMap(); writer.WriteKey("$attributes"); WriteMap<SkipMapEntity, EncodeUtf8>(value, writer); writer.WriteKey("$value"); WriteValue<SkipMapEntity, EncodeUtf8>(value.GetVariantItem().Release(), writer); - writer.CloseMap(); - } -} - -} - + writer.CloseMap(); + } +} + +} + bool IsValidJson(const TStringBuf json) { TMemoryInput input(json.data(), json.size()); TTestCallbacks callbacks; @@ -307,7 +307,7 @@ bool IsValidJson(const TStringBuf json) { } TUnboxedValue TryParseJsonDom(const TStringBuf json, const IValueBuilder* valueBuilder, bool dencodeUtf8) { - TMemoryInput input(json.data(), json.size()); + TMemoryInput input(json.data(), json.size()); if (dencodeUtf8) { TDomCallbacks<true> callbacks(valueBuilder, /* throwException */ true); if (!ReadJson(&input, &callbacks)) { @@ -320,12 +320,12 @@ TUnboxedValue TryParseJsonDom(const TStringBuf json, const IValueBuilder* valueB UdfTerminate("Internal error: parser error occurred but corresponding callback was not called"); } return std::move(callbacks).GetResult(); - } -} - + } +} + TString SerializeJsonDom(const NUdf::TUnboxedValuePod dom, bool skipMapEntity, bool encodeUtf8) { - TStringStream output; - TJsonWriter writer(&output, /* formatOutput */ false); + TStringStream output; + TJsonWriter writer(&output, /* formatOutput */ false); if (skipMapEntity) if (encodeUtf8) WriteValue<true, true>(dom, writer); @@ -336,8 +336,8 @@ TString SerializeJsonDom(const NUdf::TUnboxedValuePod dom, bool skipMapEntity, b WriteValue<false, true>(dom, writer); else WriteValue<false, false>(dom, writer); - writer.Flush(); - return output.Str(); -} - + writer.Flush(); + return output.Str(); +} + } diff --git a/ydb/library/yql/minikql/dom/json.h b/ydb/library/yql/minikql/dom/json.h index b234fd8561..8152bb198c 100644 --- a/ydb/library/yql/minikql/dom/json.h +++ b/ydb/library/yql/minikql/dom/json.h @@ -1,14 +1,14 @@ -#pragma once - +#pragma once + #include <ydb/library/yql/public/udf/udf_value.h> #include <ydb/library/yql/public/udf/udf_value_builder.h> - -namespace NYql::NDom { - + +namespace NYql::NDom { + bool IsValidJson(const TStringBuf json); NUdf::TUnboxedValue TryParseJsonDom(const TStringBuf json, const NUdf::IValueBuilder* valueBuilder, bool decodeUtf8 = false); - + TString SerializeJsonDom(const NUdf::TUnboxedValuePod dom, bool skipMapEntity = false, bool encodeUtf8 = false); - + } diff --git a/ydb/library/yql/minikql/dom/node.cpp b/ydb/library/yql/minikql/dom/node.cpp index 6eabde3e4e..0543b62e14 100644 --- a/ydb/library/yql/minikql/dom/node.cpp +++ b/ydb/library/yql/minikql/dom/node.cpp @@ -1,4 +1,4 @@ -#include "node.h" +#include "node.h" #include <util/generic/algorithm.h> diff --git a/ydb/library/yql/minikql/dom/node.h b/ydb/library/yql/minikql/dom/node.h index 9e9cc65840..47287b0a7e 100644 --- a/ydb/library/yql/minikql/dom/node.h +++ b/ydb/library/yql/minikql/dom/node.h @@ -1,31 +1,31 @@ -#pragma once - +#pragma once + #include <ydb/library/yql/public/udf/udf_value_builder.h> #include <ydb/library/yql/public/udf/udf_value.h> - -namespace NYql::NDom { - + +namespace NYql::NDom { + using namespace NUdf; - + constexpr char NodeResourceName[] = "Yson2.Node"; -using TPair = std::pair<TUnboxedValue, TUnboxedValue>; - -enum class ENodeType : ui8 { - String = 0, - Bool = 1, - Int64 = 2, - Uint64 = 3, - Double = 4, - Entity = 5, - List = 6, - Dict = 7, +using TPair = std::pair<TUnboxedValue, TUnboxedValue>; + +enum class ENodeType : ui8 { + String = 0, + Bool = 1, + Int64 = 2, + Uint64 = 3, + Double = 4, + Entity = 5, + List = 6, + Dict = 7, Attr = 8, -}; - -constexpr ui8 NodeTypeShift = 4; -constexpr ui8 NodeTypeMask = 0xf0; - +}; + +constexpr ui8 NodeTypeShift = 4; +constexpr ui8 NodeTypeMask = 0xf0; + template<ENodeType type> constexpr inline TUnboxedValuePod SetNodeType(TUnboxedValuePod node) { const auto buffer = reinterpret_cast<ui8*>(&node); @@ -41,66 +41,66 @@ constexpr inline bool IsNodeType(const TUnboxedValuePod node) { return currentMask == expectedMask; } -inline ENodeType GetNodeType(const TUnboxedValuePod& node) { - const auto* buffer = reinterpret_cast<const char*>(&node); - const ui8 flag = (buffer[TUnboxedValuePod::InternalBufferSize] & NodeTypeMask) >> NodeTypeShift; - return static_cast<ENodeType>(flag); -} - -inline bool IsNodeType(const TUnboxedValuePod& node, ENodeType type) { - const auto* buffer = reinterpret_cast<const char*>(&node); - const ui8 currentMask = buffer[TUnboxedValuePod::InternalBufferSize] & NodeTypeMask; - const ui8 expectedMask = static_cast<ui8>(type) << NodeTypeShift; - return currentMask == expectedMask; -} - -class TMapNode : public TManagedBoxedValue { -public: - template <bool NoSwap> - class TIterator: public TManagedBoxedValue { - public: +inline ENodeType GetNodeType(const TUnboxedValuePod& node) { + const auto* buffer = reinterpret_cast<const char*>(&node); + const ui8 flag = (buffer[TUnboxedValuePod::InternalBufferSize] & NodeTypeMask) >> NodeTypeShift; + return static_cast<ENodeType>(flag); +} + +inline bool IsNodeType(const TUnboxedValuePod& node, ENodeType type) { + const auto* buffer = reinterpret_cast<const char*>(&node); + const ui8 currentMask = buffer[TUnboxedValuePod::InternalBufferSize] & NodeTypeMask; + const ui8 expectedMask = static_cast<ui8>(type) << NodeTypeShift; + return currentMask == expectedMask; +} + +class TMapNode : public TManagedBoxedValue { +public: + template <bool NoSwap> + class TIterator: public TManagedBoxedValue { + public: TIterator(const TMapNode* parent); - - private: + + private: bool Skip() final; bool Next(TUnboxedValue& key) final; bool NextPair(TUnboxedValue& key, TUnboxedValue& payload) final; - - const TRefCountedPtr<TMapNode> Parent; - ui32 Index; - }; - + + const TRefCountedPtr<TMapNode> Parent; + ui32 Index; + }; + TMapNode(const TPair* items, ui32 count); - + TMapNode(TMapNode&& src); ~TMapNode(); - + TUnboxedValue Lookup(const TStringRef& key) const; private: ui64 GetDictLength() const final; - + TUnboxedValue GetDictIterator() const final; - + TUnboxedValue GetKeysIterator() const final; - + TUnboxedValue GetPayloadsIterator() const final; - + bool Contains(const TUnboxedValuePod& key) const final; - + TUnboxedValue Lookup(const TUnboxedValuePod& key) const final; - + bool HasDictItems() const final; - + bool IsSortedDict() const final; - + void* GetResource() final; ui32 Count_; - ui32 UniqueCount_; + ui32 UniqueCount_; TPair * Items_; -}; - +}; + class TAttrNode : public TMapNode { public: TAttrNode(const TUnboxedValue& map, NUdf::TUnboxedValue&& value); @@ -123,43 +123,43 @@ inline TUnboxedValuePod MakeAttr(TUnboxedValue&& value, TPair* items, ui32 count inline TUnboxedValuePod MakeString(const TStringBuf value, const IValueBuilder* valueBuilder) { return valueBuilder->NewString(value).Release(); -} - +} + inline TUnboxedValuePod MakeBool(bool value) { return SetNodeType<ENodeType::Bool>(TUnboxedValuePod(value)); -} - +} + inline TUnboxedValuePod MakeInt64(i64 value) { return SetNodeType<ENodeType::Int64>(TUnboxedValuePod(value)); -} - +} + inline TUnboxedValuePod MakeUint64(ui64 value) { return SetNodeType<ENodeType::Uint64>(TUnboxedValuePod(value)); -} - +} + inline TUnboxedValuePod MakeDouble(double value) { return SetNodeType<ENodeType::Double>(TUnboxedValuePod(value)); -} - +} + inline TUnboxedValuePod MakeEntity() { return SetNodeType<ENodeType::Entity>(TUnboxedValuePod::Zero()); -} - +} + inline TUnboxedValuePod MakeList(TUnboxedValue* items, ui32 count, const IValueBuilder* valueBuilder) { return SetNodeType<ENodeType::List>(count > 0U ? valueBuilder->NewList(items, count).Release() : TUnboxedValuePod::Zero()); -} - +} + inline TUnboxedValuePod MakeDict(const TPair* items, ui32 count) { return SetNodeType<ENodeType::Dict>(count > 0U ? TUnboxedValuePod(new TMapNode(items, count)) : TUnboxedValuePod::Zero()); -} - +} + struct TDebugPrinter { TDebugPrinter(const TUnboxedValuePod& node); class IOutputStream& Out(class IOutputStream &o) const; const TUnboxedValuePod& Node; }; -} +} template<> inline void Out<NYql::NDom::TDebugPrinter>(class IOutputStream &o, const NYql::NDom::TDebugPrinter& p) { diff --git a/ydb/library/yql/minikql/dom/ya.make b/ydb/library/yql/minikql/dom/ya.make index d18bbe29e6..870843acd5 100644 --- a/ydb/library/yql/minikql/dom/ya.make +++ b/ydb/library/yql/minikql/dom/ya.make @@ -1,30 +1,30 @@ -LIBRARY() - +LIBRARY() + OWNER( g:kikimr g:yql g:yql_ydb_core ) - + YQL_ABI_VERSION(2 21 0) -PEERDIR( +PEERDIR( library/cpp/containers/stack_vector library/cpp/json library/cpp/yson_pull ydb/library/yql/public/udf ydb/library/yql/utils -) - -SRCS( - node.cpp - json.cpp - yson.cpp +) + +SRCS( + node.cpp + json.cpp + yson.cpp make.cpp peel.cpp hash.cpp -) - +) + END() RECURSE_FOR_TESTS( diff --git a/ydb/library/yql/minikql/dom/yson.cpp b/ydb/library/yql/minikql/dom/yson.cpp index f3ab30f22c..d51802bca3 100644 --- a/ydb/library/yql/minikql/dom/yson.cpp +++ b/ydb/library/yql/minikql/dom/yson.cpp @@ -1,152 +1,152 @@ #include "node.h" -#include "yson.h" - +#include "yson.h" + #include <library/cpp/containers/stack_vector/stack_vec.h> #include <library/cpp/yson_pull/exceptions.h> #include <library/cpp/yson_pull/reader.h> #include <library/cpp/yson_pull/writer.h> - + #include <util/string/builder.h> -namespace NYql::NDom { - -using namespace NUdf; -using namespace NYsonPull; - -namespace { - +namespace NYql::NDom { + +using namespace NUdf; +using namespace NYsonPull; + +namespace { + [[noreturn]] Y_NO_INLINE void UnexpectedEvent(EEventType ev) { UdfTerminate((::TStringBuilder() << "Unexpected event: " << ev).c_str()); -} - +} + TUnboxedValuePod ParseScalar(const TScalar& scalar, const IValueBuilder* valueBuilder) { - switch (scalar.Type()) { - case EScalarType::Entity: + switch (scalar.Type()) { + case EScalarType::Entity: return MakeEntity(); - - case EScalarType::Boolean: + + case EScalarType::Boolean: return MakeBool(scalar.AsBoolean()); - - case EScalarType::Int64: + + case EScalarType::Int64: return MakeInt64(scalar.AsInt64()); - - case EScalarType::UInt64: + + case EScalarType::UInt64: return MakeUint64(scalar.AsUInt64()); - - case EScalarType::Float64: + + case EScalarType::Float64: return MakeDouble(scalar.AsFloat64()); - - case EScalarType::String: + + case EScalarType::String: return MakeString(scalar.AsString(), valueBuilder); - } -} - -TUnboxedValue ParseAttributes(TReader& reader, const IValueBuilder* valueBuilder); -TUnboxedValue ParseDict(TReader& reader, const IValueBuilder* valueBuilder); - -TUnboxedValue ParseList(TReader& reader, const IValueBuilder* valueBuilder) { - TSmallVec<TUnboxedValue, TStdAllocatorForUdf<TUnboxedValue>> items; - for (;;) { - const auto& ev = reader.NextEvent(); - switch (ev.Type()) { - case EEventType::BeginList: - items.emplace_back(ParseList(reader, valueBuilder)); - break; - case EEventType::EndList: - return MakeList(items.data(), items.size(), valueBuilder); - case EEventType::BeginMap: - items.emplace_back(ParseDict(reader, valueBuilder)); - break; - case EEventType::BeginAttributes: - items.emplace_back(ParseAttributes(reader, valueBuilder)); - break; - case EEventType::Scalar: - items.emplace_back(ParseScalar(ev.AsScalar(), valueBuilder)); - break; + } +} + +TUnboxedValue ParseAttributes(TReader& reader, const IValueBuilder* valueBuilder); +TUnboxedValue ParseDict(TReader& reader, const IValueBuilder* valueBuilder); + +TUnboxedValue ParseList(TReader& reader, const IValueBuilder* valueBuilder) { + TSmallVec<TUnboxedValue, TStdAllocatorForUdf<TUnboxedValue>> items; + for (;;) { + const auto& ev = reader.NextEvent(); + switch (ev.Type()) { + case EEventType::BeginList: + items.emplace_back(ParseList(reader, valueBuilder)); + break; + case EEventType::EndList: + return MakeList(items.data(), items.size(), valueBuilder); + case EEventType::BeginMap: + items.emplace_back(ParseDict(reader, valueBuilder)); + break; + case EEventType::BeginAttributes: + items.emplace_back(ParseAttributes(reader, valueBuilder)); + break; + case EEventType::Scalar: + items.emplace_back(ParseScalar(ev.AsScalar(), valueBuilder)); + break; default: UnexpectedEvent(ev.Type()); - } - } -} - -TUnboxedValue ParseDict(TReader& reader, const IValueBuilder* valueBuilder) { - TSmallVec<TPair, TStdAllocatorForUdf<TPair>> items; - for (;;) { - const auto& evKey = reader.NextEvent(); - if (evKey.Type() == EEventType::EndMap) { - return MakeDict(items.data(), items.size()); - } - - Y_ASSERT(evKey.Type() == EEventType::Key); - auto key = valueBuilder->NewString(evKey.AsString()); - const auto& ev = reader.NextEvent(); - switch (ev.Type()) { - case EEventType::BeginList: - items.emplace_back(std::make_pair(std::move(key), ParseList(reader, valueBuilder))); - break; - case EEventType::BeginMap: - items.emplace_back(std::make_pair(std::move(key), ParseDict(reader, valueBuilder))); - break; - case EEventType::BeginAttributes: - items.emplace_back(std::make_pair(std::move(key), ParseAttributes(reader, valueBuilder))); - break; - case EEventType::Scalar: - items.emplace_back(std::make_pair(std::move(key), ParseScalar(ev.AsScalar(), valueBuilder))); - break; + } + } +} + +TUnboxedValue ParseDict(TReader& reader, const IValueBuilder* valueBuilder) { + TSmallVec<TPair, TStdAllocatorForUdf<TPair>> items; + for (;;) { + const auto& evKey = reader.NextEvent(); + if (evKey.Type() == EEventType::EndMap) { + return MakeDict(items.data(), items.size()); + } + + Y_ASSERT(evKey.Type() == EEventType::Key); + auto key = valueBuilder->NewString(evKey.AsString()); + const auto& ev = reader.NextEvent(); + switch (ev.Type()) { + case EEventType::BeginList: + items.emplace_back(std::make_pair(std::move(key), ParseList(reader, valueBuilder))); + break; + case EEventType::BeginMap: + items.emplace_back(std::make_pair(std::move(key), ParseDict(reader, valueBuilder))); + break; + case EEventType::BeginAttributes: + items.emplace_back(std::make_pair(std::move(key), ParseAttributes(reader, valueBuilder))); + break; + case EEventType::Scalar: + items.emplace_back(std::make_pair(std::move(key), ParseScalar(ev.AsScalar(), valueBuilder))); + break; default: UnexpectedEvent(ev.Type()); - } - } -} - -TUnboxedValue ParseValue(TReader& reader, const IValueBuilder* valueBuilder); - -TUnboxedValue ParseAttributes(TReader& reader, const IValueBuilder* valueBuilder) { - TSmallVec<TPair, TStdAllocatorForUdf<TPair>> items; - for (;;) { - const auto& evKey = reader.NextEvent(); - if (evKey.Type() == EEventType::EndAttributes) { - break; - } - - Y_ASSERT(evKey.Type() == EEventType::Key); - auto key = valueBuilder->NewString(evKey.AsString()); - const auto& ev = reader.NextEvent(); - switch (ev.Type()) { - case EEventType::BeginList: - items.emplace_back(std::make_pair(std::move(key), ParseList(reader, valueBuilder))); - break; - case EEventType::BeginMap: - items.emplace_back(std::make_pair(std::move(key), ParseDict(reader, valueBuilder))); - break; - case EEventType::BeginAttributes: - items.emplace_back(std::make_pair(std::move(key), ParseAttributes(reader, valueBuilder))); - break; - case EEventType::Scalar: - items.emplace_back(std::make_pair(std::move(key), ParseScalar(ev.AsScalar(), valueBuilder))); - break; + } + } +} + +TUnboxedValue ParseValue(TReader& reader, const IValueBuilder* valueBuilder); + +TUnboxedValue ParseAttributes(TReader& reader, const IValueBuilder* valueBuilder) { + TSmallVec<TPair, TStdAllocatorForUdf<TPair>> items; + for (;;) { + const auto& evKey = reader.NextEvent(); + if (evKey.Type() == EEventType::EndAttributes) { + break; + } + + Y_ASSERT(evKey.Type() == EEventType::Key); + auto key = valueBuilder->NewString(evKey.AsString()); + const auto& ev = reader.NextEvent(); + switch (ev.Type()) { + case EEventType::BeginList: + items.emplace_back(std::make_pair(std::move(key), ParseList(reader, valueBuilder))); + break; + case EEventType::BeginMap: + items.emplace_back(std::make_pair(std::move(key), ParseDict(reader, valueBuilder))); + break; + case EEventType::BeginAttributes: + items.emplace_back(std::make_pair(std::move(key), ParseAttributes(reader, valueBuilder))); + break; + case EEventType::Scalar: + items.emplace_back(std::make_pair(std::move(key), ParseScalar(ev.AsScalar(), valueBuilder))); + break; default: UnexpectedEvent(ev.Type()); - } - } - - return MakeAttr(ParseValue(reader, valueBuilder), items.data(), items.size()); -} - -TUnboxedValue ParseValue(TReader& reader, const IValueBuilder* valueBuilder) { - const auto& ev = reader.NextEvent(); - switch (ev.Type()) { - case EEventType::BeginList: - return ParseList(reader, valueBuilder); - case EEventType::BeginMap: - return ParseDict(reader, valueBuilder); - case EEventType::BeginAttributes: - return ParseAttributes(reader, valueBuilder); + } + } + + return MakeAttr(ParseValue(reader, valueBuilder), items.data(), items.size()); +} + +TUnboxedValue ParseValue(TReader& reader, const IValueBuilder* valueBuilder) { + const auto& ev = reader.NextEvent(); + switch (ev.Type()) { + case EEventType::BeginList: + return ParseList(reader, valueBuilder); + case EEventType::BeginMap: + return ParseDict(reader, valueBuilder); + case EEventType::BeginAttributes: + return ParseAttributes(reader, valueBuilder); case EEventType::Scalar: return ParseScalar(ev.AsScalar(), valueBuilder); default: - UnexpectedEvent(ev.Type()); + UnexpectedEvent(ev.Type()); } } @@ -174,7 +174,7 @@ bool CheckAttributes(TReader& reader) { for (;;) { const auto& evKey = reader.NextEvent(); if (evKey.Type() == EEventType::EndAttributes) - break; + break; if (evKey.Type() != EEventType::Key) return false; @@ -236,14 +236,14 @@ bool CheckValue(TReader& reader) { else return false; case EEventType::Scalar: - break; + break; default: return false; - } + } return true; -} - -void WriteValue(TWriter& writer, const TUnboxedValue& x) { +} + +void WriteValue(TWriter& writer, const TUnboxedValue& x) { switch (GetNodeType(x)) { case ENodeType::String: writer.String(x.AsStringRef()); @@ -264,7 +264,7 @@ void WriteValue(TWriter& writer, const TUnboxedValue& x) { writer.Entity(); break; case ENodeType::List: - writer.BeginList(); + writer.BeginList(); if (x.IsBoxed()) { if (const auto elements = x.GetElements()) { const auto size = x.GetListLength(); @@ -276,53 +276,53 @@ void WriteValue(TWriter& writer, const TUnboxedValue& x) { for (TUnboxedValue v; it.Next(v); WriteValue(writer, v)) continue; } - } - writer.EndList(); - break; + } + writer.EndList(); + break; case ENodeType::Dict: - writer.BeginMap(); + writer.BeginMap(); if (x.IsBoxed()) { TUnboxedValue key, payload; for (const auto it = x.GetDictIterator(); it.NextPair(key, payload);) { writer.Key(key.AsStringRef()); WriteValue(writer, payload); } - } - writer.EndMap(); - break; + } + writer.EndMap(); + break; case ENodeType::Attr: { - writer.BeginAttributes(); - TUnboxedValue key, payload; + writer.BeginAttributes(); + TUnboxedValue key, payload; for (const auto it = x.GetDictIterator(); it.NextPair(key, payload);) { - writer.Key(key.AsStringRef()); - WriteValue(writer, payload); - } - - writer.EndAttributes(); - WriteValue(writer, x.GetVariantItem()); + writer.Key(key.AsStringRef()); + WriteValue(writer, payload); + } + + writer.EndAttributes(); + WriteValue(writer, x.GetVariantItem()); } break; - } -} - -void SerializeYsonDomImpl(const NUdf::TUnboxedValue& dom, TWriter& writer) { - writer.BeginStream(); - WriteValue(writer, dom); - writer.EndStream(); -} - -} - -NUdf::TUnboxedValue TryParseYsonDom(const TStringBuf yson, const NUdf::IValueBuilder* valueBuilder) { - auto reader = TReader(NInput::FromMemory(yson), EStreamType::Node); - const auto& begin = reader.NextEvent(); - Y_ASSERT(begin.Type() == EEventType::BeginStream); - auto value = ParseValue(reader, valueBuilder); - const auto& end = reader.NextEvent(); - Y_ASSERT(end.Type() == EEventType::EndStream); - return value; -} - + } +} + +void SerializeYsonDomImpl(const NUdf::TUnboxedValue& dom, TWriter& writer) { + writer.BeginStream(); + WriteValue(writer, dom); + writer.EndStream(); +} + +} + +NUdf::TUnboxedValue TryParseYsonDom(const TStringBuf yson, const NUdf::IValueBuilder* valueBuilder) { + auto reader = TReader(NInput::FromMemory(yson), EStreamType::Node); + const auto& begin = reader.NextEvent(); + Y_ASSERT(begin.Type() == EEventType::BeginStream); + auto value = ParseValue(reader, valueBuilder); + const auto& end = reader.NextEvent(); + Y_ASSERT(end.Type() == EEventType::EndStream); + return value; +} + bool IsValidYson(const TStringBuf yson) try { auto reader = TReader(NInput::FromMemory(yson), EStreamType::Node); const auto& begin = reader.NextEvent(); @@ -336,25 +336,25 @@ bool IsValidYson(const TStringBuf yson) try { return false; } -TString SerializeYsonDomToBinary(const NUdf::TUnboxedValue& dom) { - TString result; - TWriter writer = MakeBinaryWriter(NOutput::FromString(&result), EStreamType::Node); - SerializeYsonDomImpl(dom, writer); - return result; -} - -TString SerializeYsonDomToText(const NUdf::TUnboxedValue& dom) { - TString result; - TWriter writer = MakeTextWriter(NOutput::FromString(&result), EStreamType::Node); - SerializeYsonDomImpl(dom, writer); - return result; -} - -TString SerializeYsonDomToPrettyText(const NUdf::TUnboxedValue& dom) { - TString result; - TWriter writer = MakePrettyTextWriter(NOutput::FromString(&result), EStreamType::Node); - SerializeYsonDomImpl(dom, writer); - return result; -} - -} +TString SerializeYsonDomToBinary(const NUdf::TUnboxedValue& dom) { + TString result; + TWriter writer = MakeBinaryWriter(NOutput::FromString(&result), EStreamType::Node); + SerializeYsonDomImpl(dom, writer); + return result; +} + +TString SerializeYsonDomToText(const NUdf::TUnboxedValue& dom) { + TString result; + TWriter writer = MakeTextWriter(NOutput::FromString(&result), EStreamType::Node); + SerializeYsonDomImpl(dom, writer); + return result; +} + +TString SerializeYsonDomToPrettyText(const NUdf::TUnboxedValue& dom) { + TString result; + TWriter writer = MakePrettyTextWriter(NOutput::FromString(&result), EStreamType::Node); + SerializeYsonDomImpl(dom, writer); + return result; +} + +} diff --git a/ydb/library/yql/minikql/dom/yson.h b/ydb/library/yql/minikql/dom/yson.h index aba5077699..a8d5f8874e 100644 --- a/ydb/library/yql/minikql/dom/yson.h +++ b/ydb/library/yql/minikql/dom/yson.h @@ -1,18 +1,18 @@ -#pragma once - +#pragma once + #include <ydb/library/yql/public/udf/udf_value.h> #include <ydb/library/yql/public/udf/udf_value_builder.h> - -namespace NYql::NDom { - + +namespace NYql::NDom { + bool IsValidYson(const TStringBuf yson); -NUdf::TUnboxedValue TryParseYsonDom(const TStringBuf yson, const NUdf::IValueBuilder* valueBuilder); - -TString SerializeYsonDomToBinary(const NUdf::TUnboxedValue& dom); - -TString SerializeYsonDomToText(const NUdf::TUnboxedValue& dom); - -TString SerializeYsonDomToPrettyText(const NUdf::TUnboxedValue& dom); - -} +NUdf::TUnboxedValue TryParseYsonDom(const TStringBuf yson, const NUdf::IValueBuilder* valueBuilder); + +TString SerializeYsonDomToBinary(const NUdf::TUnboxedValue& dom); + +TString SerializeYsonDomToText(const NUdf::TUnboxedValue& dom); + +TString SerializeYsonDomToPrettyText(const NUdf::TUnboxedValue& dom); + +} diff --git a/ydb/library/yql/minikql/invoke_builtins/mkql_builtins_compare.h b/ydb/library/yql/minikql/invoke_builtins/mkql_builtins_compare.h index 8d865b86d9..fbe5b3173c 100644 --- a/ydb/library/yql/minikql/invoke_builtins/mkql_builtins_compare.h +++ b/ydb/library/yql/minikql/invoke_builtins/mkql_builtins_compare.h @@ -454,7 +454,7 @@ void RegisterCompareStrings(IBuiltinFunctionRegistry& registry, const std::strin RegisterCompareCustomOpt<NUdf::TDataType<char*>, NUdf::TDataType<NUdf::TUtf8>, TFunc<NUdf::EDataSlot::Utf8>, TArgs>(registry, name); if constexpr (WithSpecial) { RegisterCompareCustomOpt<NUdf::TDataType<NUdf::TUuid>, NUdf::TDataType<NUdf::TUuid>, TFunc<NUdf::EDataSlot::Uuid>, TArgs>(registry, name); - RegisterCompareCustomOpt<NUdf::TDataType<NUdf::TDyNumber>, NUdf::TDataType<NUdf::TDyNumber>, TFunc<NUdf::EDataSlot::DyNumber>, TArgs>(registry, name); + RegisterCompareCustomOpt<NUdf::TDataType<NUdf::TDyNumber>, NUdf::TDataType<NUdf::TDyNumber>, TFunc<NUdf::EDataSlot::DyNumber>, TArgs>(registry, name); } } @@ -466,7 +466,7 @@ void RegisterAggrCompareStrings(IBuiltinFunctionRegistry& registry, const std::s RegisterAggrCompareCustomOpt<NUdf::TDataType<char*>, TFunc<NUdf::EDataSlot::String>, TArgs>(registry, name); RegisterAggrCompareCustomOpt<NUdf::TDataType<NUdf::TUtf8>, TFunc<NUdf::EDataSlot::Utf8>, TArgs>(registry, name); RegisterAggrCompareCustomOpt<NUdf::TDataType<NUdf::TUuid>, TFunc<NUdf::EDataSlot::Uuid>, TArgs>(registry, name); - RegisterAggrCompareCustomOpt<NUdf::TDataType<NUdf::TDyNumber>, TFunc<NUdf::EDataSlot::DyNumber>, TArgs>(registry, name); + RegisterAggrCompareCustomOpt<NUdf::TDataType<NUdf::TDyNumber>, TFunc<NUdf::EDataSlot::DyNumber>, TArgs>(registry, name); } void RegisterEquals(IBuiltinFunctionRegistry& registry); diff --git a/ydb/library/yql/minikql/invoke_builtins/mkql_builtins_convert.cpp b/ydb/library/yql/minikql/invoke_builtins/mkql_builtins_convert.cpp index 43f7287be2..e7ec46309a 100644 --- a/ydb/library/yql/minikql/invoke_builtins/mkql_builtins_convert.cpp +++ b/ydb/library/yql/minikql/invoke_builtins/mkql_builtins_convert.cpp @@ -7,7 +7,7 @@ #include <ydb/library/binary_json/write.h> #include <ydb/library/binary_json/read.h> - + namespace NKikimr { namespace NMiniKQL { @@ -379,58 +379,58 @@ struct TStringConvert { #endif }; -NUdf::TUnboxedValuePod JsonToJsonDocument(const NUdf::TUnboxedValuePod value) { - auto binaryJson = NKikimr::NBinaryJson::SerializeToBinaryJson(value.AsStringRef()); - if (!binaryJson.Defined()) { - // JSON parse error happened, return NULL - return NUdf::TUnboxedValuePod(); - } - return MakeString(TStringBuf(binaryJson->Data(), binaryJson->Size())); -} - -struct TJsonToJsonDocumentConvert { - static NUdf::TUnboxedValuePod Execute(const NUdf::TUnboxedValuePod& arg) - { - return JsonToJsonDocument(arg); - } - -#ifndef MKQL_DISABLE_CODEGEN - static Value* Generate(Value* json, const TCodegenContext& ctx, BasicBlock*& block) - { - auto& context = ctx.Codegen->GetContext(); - const auto functionAddress = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(JsonToJsonDocument)); - const auto functionType = FunctionType::get(json->getType(), {json->getType()}, /* isVarArg */ false); - const auto functionPtr = CastInst::Create(Instruction::IntToPtr, functionAddress, PointerType::getUnqual(functionType), "func", block); - return CallInst::Create(functionPtr, {json}, "jsonToJsonDocument", block); - } -#endif -}; - -NUdf::TUnboxedValuePod JsonDocumentToJson(const NUdf::TUnboxedValuePod value) { - auto json = NKikimr::NBinaryJson::SerializeToJson(value.AsStringRef()); - return MakeString(json); -} - -struct TJsonDocumentToJsonConvert { - static NUdf::TUnboxedValuePod Execute(const NUdf::TUnboxedValuePod& arg) - { - return JsonDocumentToJson(arg); - } - -#ifndef MKQL_DISABLE_CODEGEN - static Value* Generate(Value* jsonDocument, const TCodegenContext& ctx, BasicBlock*& block) - { - auto& context = ctx.Codegen->GetContext(); - const auto functionAddress = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(JsonDocumentToJson)); - const auto functionType = FunctionType::get(jsonDocument->getType(), {jsonDocument->getType()}, /* isVarArg */ false); - const auto functionPtr = CastInst::Create(Instruction::IntToPtr, functionAddress, PointerType::getUnqual(functionType), "func", block); - return CallInst::Create(functionPtr, {jsonDocument}, "jsonDocumentToJson", block); - } -#endif -}; - +NUdf::TUnboxedValuePod JsonToJsonDocument(const NUdf::TUnboxedValuePod value) { + auto binaryJson = NKikimr::NBinaryJson::SerializeToBinaryJson(value.AsStringRef()); + if (!binaryJson.Defined()) { + // JSON parse error happened, return NULL + return NUdf::TUnboxedValuePod(); + } + return MakeString(TStringBuf(binaryJson->Data(), binaryJson->Size())); } +struct TJsonToJsonDocumentConvert { + static NUdf::TUnboxedValuePod Execute(const NUdf::TUnboxedValuePod& arg) + { + return JsonToJsonDocument(arg); + } + +#ifndef MKQL_DISABLE_CODEGEN + static Value* Generate(Value* json, const TCodegenContext& ctx, BasicBlock*& block) + { + auto& context = ctx.Codegen->GetContext(); + const auto functionAddress = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(JsonToJsonDocument)); + const auto functionType = FunctionType::get(json->getType(), {json->getType()}, /* isVarArg */ false); + const auto functionPtr = CastInst::Create(Instruction::IntToPtr, functionAddress, PointerType::getUnqual(functionType), "func", block); + return CallInst::Create(functionPtr, {json}, "jsonToJsonDocument", block); + } +#endif +}; + +NUdf::TUnboxedValuePod JsonDocumentToJson(const NUdf::TUnboxedValuePod value) { + auto json = NKikimr::NBinaryJson::SerializeToJson(value.AsStringRef()); + return MakeString(json); +} + +struct TJsonDocumentToJsonConvert { + static NUdf::TUnboxedValuePod Execute(const NUdf::TUnboxedValuePod& arg) + { + return JsonDocumentToJson(arg); + } + +#ifndef MKQL_DISABLE_CODEGEN + static Value* Generate(Value* jsonDocument, const TCodegenContext& ctx, BasicBlock*& block) + { + auto& context = ctx.Codegen->GetContext(); + const auto functionAddress = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(JsonDocumentToJson)); + const auto functionType = FunctionType::get(jsonDocument->getType(), {jsonDocument->getType()}, /* isVarArg */ false); + const auto functionPtr = CastInst::Create(Instruction::IntToPtr, functionAddress, PointerType::getUnqual(functionType), "func", block); + return CallInst::Create(functionPtr, {jsonDocument}, "jsonDocumentToJson", block); + } +#endif +}; + +} + namespace NDecimal { template <typename TInput> @@ -1062,21 +1062,21 @@ void RegisterTzDateimeConvert(IBuiltinFunctionRegistry& registry) { RegisterTzDateimeOpt<NUdf::TDataType<NUdf::TTzTimestamp>, NUdf::TDataType<NUdf::TTimestamp>, true>(registry); } -void RegisterJsonDocumentConvert(IBuiltinFunctionRegistry& registry) { - // String/Utf8 -> JsonDocument and JsonDocument -> String/Utf8 conversions. TStringConvert is used as a placeholder because - // actual conversions are handled by ValueFromString and ValueToString in mkql_type_ops.cpp - RegisterFunctionOpt<NUdf::TDataType<char*>, NUdf::TDataType<NUdf::TJsonDocument>, TStringConvert, TUnaryArgsOpt>(registry, convert); - RegisterFunctionOpt<NUdf::TDataType<NUdf::TUtf8>, NUdf::TDataType<NUdf::TJsonDocument>, TStringConvert, TUnaryArgsOpt>(registry, convert); - RegisterFunctionOpt<NUdf::TDataType<NUdf::TJsonDocument>, NUdf::TDataType<char*>, TStringConvert, TUnaryArgsOpt>(registry, convert); - RegisterFunctionOpt<NUdf::TDataType<NUdf::TJsonDocument>, NUdf::TDataType<NUdf::TUtf8>, TStringConvert, TUnaryArgsOpt>(registry, convert); - - // Json -> JsonDocument and JsonDocument -> Json conversions - RegisterFunctionOpt<NUdf::TDataType<NUdf::TJson>, NUdf::TDataType<NUdf::TJsonDocument>, TJsonToJsonDocumentConvert, TUnaryArgsOpt>(registry, convert); - RegisterFunctionOpt<NUdf::TDataType<NUdf::TJsonDocument>, NUdf::TDataType<NUdf::TJson>, TJsonDocumentToJsonConvert, TUnaryArgsOpt>(registry, convert); -} - +void RegisterJsonDocumentConvert(IBuiltinFunctionRegistry& registry) { + // String/Utf8 -> JsonDocument and JsonDocument -> String/Utf8 conversions. TStringConvert is used as a placeholder because + // actual conversions are handled by ValueFromString and ValueToString in mkql_type_ops.cpp + RegisterFunctionOpt<NUdf::TDataType<char*>, NUdf::TDataType<NUdf::TJsonDocument>, TStringConvert, TUnaryArgsOpt>(registry, convert); + RegisterFunctionOpt<NUdf::TDataType<NUdf::TUtf8>, NUdf::TDataType<NUdf::TJsonDocument>, TStringConvert, TUnaryArgsOpt>(registry, convert); + RegisterFunctionOpt<NUdf::TDataType<NUdf::TJsonDocument>, NUdf::TDataType<char*>, TStringConvert, TUnaryArgsOpt>(registry, convert); + RegisterFunctionOpt<NUdf::TDataType<NUdf::TJsonDocument>, NUdf::TDataType<NUdf::TUtf8>, TStringConvert, TUnaryArgsOpt>(registry, convert); + + // Json -> JsonDocument and JsonDocument -> Json conversions + RegisterFunctionOpt<NUdf::TDataType<NUdf::TJson>, NUdf::TDataType<NUdf::TJsonDocument>, TJsonToJsonDocumentConvert, TUnaryArgsOpt>(registry, convert); + RegisterFunctionOpt<NUdf::TDataType<NUdf::TJsonDocument>, NUdf::TDataType<NUdf::TJson>, TJsonDocumentToJsonConvert, TUnaryArgsOpt>(registry, convert); } +} + void RegisterConvert(IBuiltinFunctionRegistry& registry) { RegisterIntegralCasts<NUdf::TDataType<i32>>(registry); RegisterIntegralCasts<NUdf::TDataType<ui32>>(registry); @@ -1127,8 +1127,8 @@ void RegisterConvert(IBuiltinFunctionRegistry& registry) { RegisterToDateConvert(registry); RegisterDecimalConvert(registry); - - RegisterJsonDocumentConvert(registry); + + RegisterJsonDocumentConvert(registry); } } // namespace NMiniKQL diff --git a/ydb/library/yql/minikql/jsonpath/JsonPath.g b/ydb/library/yql/minikql/jsonpath/JsonPath.g index f32f98d185..59a0fb0cf8 100644 --- a/ydb/library/yql/minikql/jsonpath/JsonPath.g +++ b/ydb/library/yql/minikql/jsonpath/JsonPath.g @@ -1,203 +1,203 @@ -grammar JsonPath; - -options { - language = Cpp; - memoize = true; -} - -// Root rule. Input is a mode followed by jsonpath expression -jsonpath: (STRICT | LAX)? expr EOF; - -// Generic jsonpath expression -expr: or_expr; - -// Arithmetic and boolean operations -// Operator precedence: -// 1. Unary plus, minus and logical not -// 2. Multiplication, division, modulus -// 3. Addition, substraction -// 4. Compare operators (<, <=, >, >=) -// 5. Equality operators (==, !=, <>) -// 6. Logical and -// 7. Logical or -// NOTE: We execute JsonPath using bottom up approach. Thus -// operations with higher precedence must be located "deeper" inside AST -or_expr: and_expr (OR and_expr)*; -and_expr: equal_expr (AND equal_expr)*; -equal_expr: compare_expr ((EQUAL | NOT_EQUAL | NOT_EQUAL_SQL) compare_expr)?; -compare_expr: add_expr ((LESS | LESS_EQUAL | GREATER | GREATER_EQUAL) add_expr)?; -add_expr: mul_expr ((PLUS | MINUS) mul_expr)*; -mul_expr: unary_expr ((ASTERISK | SLASH | PERCENT) unary_expr)*; -unary_expr: (PLUS | MINUS | NOT)? predicate_expr; - -// Predicates, `"string" starts with "str"` -// NOTE: `is unknown` predicate is defined separately in primary rule. This is done -// because if we add it as an alternative to predicate_expr, ANTLR would need backtacking. -// For example it would not be possible to tell if expression like `( ... ) is unknown` is -// related to `starts with` (and braces are part of plain_expr rule) or it is related to -// `is unknown` rule (and braces are not included in plain_expr). -predicate_expr: - (plain_expr (starts_with_expr | like_regex_expr)?) - | (EXISTS LBRACE expr RBRACE); - -starts_with_expr: STARTS WITH plain_expr; -like_regex_expr: LIKE_REGEX STRING_VALUE (FLAG STRING_VALUE)?; - -// Plain expression serves as an argument to binary and unary operators -plain_expr: accessor_expr; - -accessor_expr: primary accessor_op*; -accessor_op: member_accessor | wildcard_member_accessor | array_accessor | wildcard_array_accessor | filter | method; - -// Member acceccors, `$.key` and `$.*` -member_accessor: DOT (identifier | STRING_VALUE); -wildcard_member_accessor: DOT ASTERISK; - -// Array accessors, `$[0, 1 to 3, last]` and `$[*]` -array_subscript: expr (TO expr)?; -array_accessor: LBRACE_SQUARE array_subscript (COMMA array_subscript)* RBRACE_SQUARE; -wildcard_array_accessor: LBRACE_SQUARE ASTERISK RBRACE_SQUARE; - -// Filters, `$ ? (@.age >= 18)` -filter: QUESTION LBRACE expr RBRACE; - -// Methods, `$.abs().ceiling()` -method: DOT (ABS_METHOD | FLOOR_METHOD | CEILING_METHOD | DOUBLE_METHOD | TYPE_METHOD | SIZE_METHOD | KEYVALUE_METHOD) LBRACE RBRACE; - -// Primaries are objects to perform operations on: -// 1. All literals: -// - Numbers, `1.23e-5` -// - Bool, `false` and `true` -// - Null, `null` -// - Strings, `"привет"`, `\r\n\t` -// 2. Current object, `$` -// 3. Current filtering object, `@` -// 4. Variables, `$my_cool_variable` -// 5. Last array index, `last` -// 6. Parenthesized jsonpath expression, `($.key + $[0])` -primary: - NUMBER - | DOLLAR - | LAST - | (LBRACE expr RBRACE (IS UNKNOWN)?) - | VARIABLE - | TRUE - | FALSE - | NULL - | STRING_VALUE - | AT; - -// Identifier for member accessors and variable names, `$.key` and `$variable_name` -// JsonPath supports using keywords as identifiers. We need to mention keywords in -// identifer rule because otherwise ANTLR will treat them as a separate token. -// For instance input `$.to` without this modification will be treated as -// `DOLLAR DOT TO`, not `DOLLAR DOT IDENTIFIER` -identifier: IDENTIFIER | keyword; - -keyword: - ABS_METHOD - | CEILING_METHOD - | DOUBLE_METHOD - | EXISTS - | FALSE - | FLAG - | FLOOR_METHOD - | IS - | KEYVALUE_METHOD - | LAST - | LAX - | LIKE_REGEX - | NULL - | SIZE_METHOD - | STARTS - | STRICT - | TO - | TRUE - | TYPE_METHOD - | UNKNOWN - | WITH; - -// -// Lexer -// - -AND: '&&'; -ASTERISK: '*'; -AT: '@'; -BACKSLASH: '\\'; -COMMA: ','; -DOLLAR: '$'; -DOT: '.'; -EQUAL: '=='; -GREATER_EQUAL: '>='; -GREATER: '>'; -LBRACE_SQUARE: '['; -LBRACE: '('; -LESS_EQUAL: '<='; -LESS: '<'; -MINUS: '-'; -NOT_EQUAL_SQL: '<>'; -NOT_EQUAL: '!='; -NOT: '!'; -OR: '||'; -PERCENT: '%'; -PLUS: '+'; -QUESTION: '?'; -QUOTE_DOUBLE: '"'; -QUOTE_SINGLE: '\''; -RBRACE_SQUARE: ']'; -RBRACE: ')'; -SLASH: '/'; -UNDERSCORE: '_'; - -// Keywords -ABS_METHOD: 'abs'; -CEILING_METHOD: 'ceiling'; -DOUBLE_METHOD: 'double'; -EXISTS: 'exists'; -FALSE: 'false'; -FLAG: 'flag'; -FLOOR_METHOD: 'floor'; -IS: 'is'; -KEYVALUE_METHOD: 'keyvalue'; -LAST: 'last'; -LAX: 'lax'; -LIKE_REGEX: 'like_regex'; -NULL: 'null'; -SIZE_METHOD: 'size'; -STARTS: 'starts'; -STRICT: 'strict'; -TO: 'to'; -TRUE: 'true'; -TYPE_METHOD: 'type'; -UNKNOWN: 'unknown'; -WITH: 'with'; - -// String literal +grammar JsonPath; + +options { + language = Cpp; + memoize = true; +} + +// Root rule. Input is a mode followed by jsonpath expression +jsonpath: (STRICT | LAX)? expr EOF; + +// Generic jsonpath expression +expr: or_expr; + +// Arithmetic and boolean operations +// Operator precedence: +// 1. Unary plus, minus and logical not +// 2. Multiplication, division, modulus +// 3. Addition, substraction +// 4. Compare operators (<, <=, >, >=) +// 5. Equality operators (==, !=, <>) +// 6. Logical and +// 7. Logical or +// NOTE: We execute JsonPath using bottom up approach. Thus +// operations with higher precedence must be located "deeper" inside AST +or_expr: and_expr (OR and_expr)*; +and_expr: equal_expr (AND equal_expr)*; +equal_expr: compare_expr ((EQUAL | NOT_EQUAL | NOT_EQUAL_SQL) compare_expr)?; +compare_expr: add_expr ((LESS | LESS_EQUAL | GREATER | GREATER_EQUAL) add_expr)?; +add_expr: mul_expr ((PLUS | MINUS) mul_expr)*; +mul_expr: unary_expr ((ASTERISK | SLASH | PERCENT) unary_expr)*; +unary_expr: (PLUS | MINUS | NOT)? predicate_expr; + +// Predicates, `"string" starts with "str"` +// NOTE: `is unknown` predicate is defined separately in primary rule. This is done +// because if we add it as an alternative to predicate_expr, ANTLR would need backtacking. +// For example it would not be possible to tell if expression like `( ... ) is unknown` is +// related to `starts with` (and braces are part of plain_expr rule) or it is related to +// `is unknown` rule (and braces are not included in plain_expr). +predicate_expr: + (plain_expr (starts_with_expr | like_regex_expr)?) + | (EXISTS LBRACE expr RBRACE); + +starts_with_expr: STARTS WITH plain_expr; +like_regex_expr: LIKE_REGEX STRING_VALUE (FLAG STRING_VALUE)?; + +// Plain expression serves as an argument to binary and unary operators +plain_expr: accessor_expr; + +accessor_expr: primary accessor_op*; +accessor_op: member_accessor | wildcard_member_accessor | array_accessor | wildcard_array_accessor | filter | method; + +// Member acceccors, `$.key` and `$.*` +member_accessor: DOT (identifier | STRING_VALUE); +wildcard_member_accessor: DOT ASTERISK; + +// Array accessors, `$[0, 1 to 3, last]` and `$[*]` +array_subscript: expr (TO expr)?; +array_accessor: LBRACE_SQUARE array_subscript (COMMA array_subscript)* RBRACE_SQUARE; +wildcard_array_accessor: LBRACE_SQUARE ASTERISK RBRACE_SQUARE; + +// Filters, `$ ? (@.age >= 18)` +filter: QUESTION LBRACE expr RBRACE; + +// Methods, `$.abs().ceiling()` +method: DOT (ABS_METHOD | FLOOR_METHOD | CEILING_METHOD | DOUBLE_METHOD | TYPE_METHOD | SIZE_METHOD | KEYVALUE_METHOD) LBRACE RBRACE; + +// Primaries are objects to perform operations on: +// 1. All literals: +// - Numbers, `1.23e-5` +// - Bool, `false` and `true` +// - Null, `null` +// - Strings, `"привет"`, `\r\n\t` +// 2. Current object, `$` +// 3. Current filtering object, `@` +// 4. Variables, `$my_cool_variable` +// 5. Last array index, `last` +// 6. Parenthesized jsonpath expression, `($.key + $[0])` +primary: + NUMBER + | DOLLAR + | LAST + | (LBRACE expr RBRACE (IS UNKNOWN)?) + | VARIABLE + | TRUE + | FALSE + | NULL + | STRING_VALUE + | AT; + +// Identifier for member accessors and variable names, `$.key` and `$variable_name` +// JsonPath supports using keywords as identifiers. We need to mention keywords in +// identifer rule because otherwise ANTLR will treat them as a separate token. +// For instance input `$.to` without this modification will be treated as +// `DOLLAR DOT TO`, not `DOLLAR DOT IDENTIFIER` +identifier: IDENTIFIER | keyword; + +keyword: + ABS_METHOD + | CEILING_METHOD + | DOUBLE_METHOD + | EXISTS + | FALSE + | FLAG + | FLOOR_METHOD + | IS + | KEYVALUE_METHOD + | LAST + | LAX + | LIKE_REGEX + | NULL + | SIZE_METHOD + | STARTS + | STRICT + | TO + | TRUE + | TYPE_METHOD + | UNKNOWN + | WITH; + +// +// Lexer +// + +AND: '&&'; +ASTERISK: '*'; +AT: '@'; +BACKSLASH: '\\'; +COMMA: ','; +DOLLAR: '$'; +DOT: '.'; +EQUAL: '=='; +GREATER_EQUAL: '>='; +GREATER: '>'; +LBRACE_SQUARE: '['; +LBRACE: '('; +LESS_EQUAL: '<='; +LESS: '<'; +MINUS: '-'; +NOT_EQUAL_SQL: '<>'; +NOT_EQUAL: '!='; +NOT: '!'; +OR: '||'; +PERCENT: '%'; +PLUS: '+'; +QUESTION: '?'; +QUOTE_DOUBLE: '"'; +QUOTE_SINGLE: '\''; +RBRACE_SQUARE: ']'; +RBRACE: ')'; +SLASH: '/'; +UNDERSCORE: '_'; + +// Keywords +ABS_METHOD: 'abs'; +CEILING_METHOD: 'ceiling'; +DOUBLE_METHOD: 'double'; +EXISTS: 'exists'; +FALSE: 'false'; +FLAG: 'flag'; +FLOOR_METHOD: 'floor'; +IS: 'is'; +KEYVALUE_METHOD: 'keyvalue'; +LAST: 'last'; +LAX: 'lax'; +LIKE_REGEX: 'like_regex'; +NULL: 'null'; +SIZE_METHOD: 'size'; +STARTS: 'starts'; +STRICT: 'strict'; +TO: 'to'; +TRUE: 'true'; +TYPE_METHOD: 'type'; +UNKNOWN: 'unknown'; +WITH: 'with'; + +// String literal fragment STRING_CORE_SINGLE: ( ~(QUOTE_SINGLE | BACKSLASH) | (BACKSLASH .) )*; fragment STRING_CORE_DOUBLE: ( ~(QUOTE_DOUBLE | BACKSLASH) | (BACKSLASH .) )*; -fragment STRING_SINGLE: (QUOTE_SINGLE STRING_CORE_SINGLE QUOTE_SINGLE); -fragment STRING_DOUBLE: (QUOTE_DOUBLE STRING_CORE_DOUBLE QUOTE_DOUBLE); - -STRING_VALUE: (STRING_SINGLE | STRING_DOUBLE); - -// Number literal -fragment DIGIT: '0'..'9'; -fragment DIGITS: DIGIT+; -fragment REAL_PART: DOT DIGITS; -fragment EXP_PART: ('e' | 'E') (PLUS | MINUS)? DIGITS; - -NUMBER: DIGITS REAL_PART? EXP_PART?; - -// Javascript identifier -fragment ID_START: ('a'..'z' | 'A'..'Z' | UNDERSCORE); -fragment ID_CORE: (ID_START | DIGIT | DOLLAR); - -IDENTIFIER: ID_START (ID_CORE)*; - -// Jsonpath variable -VARIABLE: DOLLAR (ID_CORE)*; - +fragment STRING_SINGLE: (QUOTE_SINGLE STRING_CORE_SINGLE QUOTE_SINGLE); +fragment STRING_DOUBLE: (QUOTE_DOUBLE STRING_CORE_DOUBLE QUOTE_DOUBLE); + +STRING_VALUE: (STRING_SINGLE | STRING_DOUBLE); + +// Number literal +fragment DIGIT: '0'..'9'; +fragment DIGITS: DIGIT+; +fragment REAL_PART: DOT DIGITS; +fragment EXP_PART: ('e' | 'E') (PLUS | MINUS)? DIGITS; + +NUMBER: DIGITS REAL_PART? EXP_PART?; + +// Javascript identifier +fragment ID_START: ('a'..'z' | 'A'..'Z' | UNDERSCORE); +fragment ID_CORE: (ID_START | DIGIT | DOLLAR); + +IDENTIFIER: ID_START (ID_CORE)*; + +// Jsonpath variable +VARIABLE: DOLLAR (ID_CORE)*; + WS: (' '|'\r'|'\t'|'\n') {$channel=HIDDEN;}; // FIXME: WS and COMMENT tokens are currently required. // FIXME: Since there are no comments in JSONPATH, we split whitespace characters between WS and COMMENT diff --git a/ydb/library/yql/minikql/jsonpath/ast_builder.cpp b/ydb/library/yql/minikql/jsonpath/ast_builder.cpp index c13a2264c1..2bb68ba236 100644 --- a/ydb/library/yql/minikql/jsonpath/ast_builder.cpp +++ b/ydb/library/yql/minikql/jsonpath/ast_builder.cpp @@ -1,482 +1,482 @@ -#include "ast_builder.h" -#include "ast_nodes.h" -#include "parse_double.h" - +#include "ast_builder.h" +#include "ast_nodes.h" +#include "parse_double.h" + #include <ydb/library/yql/core/issue/protos/issue_id.pb.h> - -#include <library/cpp/regex/hyperscan/hyperscan.h> - -#include <util/generic/singleton.h> -#include <util/system/compiler.h> -#include <util/string/cast.h> -#include <util/string/builder.h> -#include <util/charset/utf8.h> -#include <util/system/cpu_id.h> - -#include <cmath> - -using namespace NYql; -using namespace NYql::NJsonPath; -using namespace NJsonPathGenerated; -using namespace NHyperscan; - -namespace { - -TPosition GetPos(const TToken& token) { - return TPosition(token.GetColumn(), token.GetLine()); -} - -bool TryStringContent(const TString& str, TString& result, TString& error, bool onlyDoubleQuoted = true) { - result.clear(); - error.clear(); - - const bool doubleQuoted = str.StartsWith('"') && str.EndsWith('"'); - const bool singleQuoted = str.StartsWith('\'') && str.EndsWith('\''); - if (!doubleQuoted && !singleQuoted) { - error = "String must be quoted"; - return false; - } - if (singleQuoted && onlyDoubleQuoted) { - error = "Only double quoted strings allowed"; - return false; - } - - result = str.substr(1, str.length() - 2); - return true; -} - -} - -TAstBuilder::TAstBuilder(TIssues& issues) - : Issues(issues) -{ -} - -void TAstBuilder::Error(TPosition pos, const TStringBuf message) { - Issues.AddIssue(pos, message); - Issues.back().SetCode(TIssuesIds::JSONPATH_PARSE_ERROR, TSeverityIds::S_ERROR); -} - -TArrayAccessNode::TSubscript TAstBuilder::BuildArraySubscript(const TRule_array_subscript& node) { - TAstNodePtr from = BuildExpr(node.GetRule_expr1()); - TAstNodePtr to = nullptr; - if (node.HasBlock2()) { - to = BuildExpr(node.GetBlock2().GetRule_expr2()); - } - return {from, to}; -} - -TAstNodePtr TAstBuilder::BuildArrayAccessor(const TRule_array_accessor& node, TAstNodePtr input) { - TVector<TArrayAccessNode::TSubscript> subscripts; - subscripts.reserve(1 + node.Block3Size()); - - subscripts.push_back(BuildArraySubscript(node.GetRule_array_subscript2())); - for (size_t i = 0; i < node.Block3Size(); i++) { - subscripts.push_back(BuildArraySubscript(node.GetBlock3(i).GetRule_array_subscript2())); - } - - return new TArrayAccessNode(GetPos(node.GetToken1()), subscripts, input); -} - -TAstNodePtr TAstBuilder::BuildWildcardArrayAccessor(const TRule_wildcard_array_accessor& node, TAstNodePtr input) { - return new TWildcardArrayAccessNode(GetPos(node.GetToken1()), input); -} - -TString TAstBuilder::BuildIdentifier(const TRule_identifier& node) { - switch (node.GetAltCase()) { - case TRule_identifier::kAltIdentifier1: - return node.GetAlt_identifier1().GetToken1().GetValue(); - case TRule_identifier::kAltIdentifier2: - return node.GetAlt_identifier2().GetRule_keyword1().GetToken1().GetValue(); - case TRule_identifier::ALT_NOT_SET: - Y_FAIL("Alternative for 'identifier' rule is not set"); - } -} - -TAstNodePtr TAstBuilder::BuildMemberAccessor(const TRule_member_accessor& node, TAstNodePtr input) { - TString name; - const auto& nameBlock = node.GetBlock2(); - switch (nameBlock.GetAltCase()) { - case TRule_member_accessor_TBlock2::kAlt1: - name = BuildIdentifier(nameBlock.GetAlt1().GetRule_identifier1()); - break; - case TRule_member_accessor_TBlock2::kAlt2: { - const auto& token = nameBlock.GetAlt2().GetToken1(); - TString error; - if (!TryStringContent(token.GetValue(), name, error, /* onlyDoubleQuoted */ false)) { - Error(GetPos(token), error); - return nullptr; - } - break; - } - case TRule_member_accessor_TBlock2::ALT_NOT_SET: - Y_FAIL("Alternative for 'member_accessor' rule is not set"); - } - - return new TMemberAccessNode(GetPos(node.GetToken1()), name, input); -} - -TAstNodePtr TAstBuilder::BuildWildcardMemberAccessor(const TRule_wildcard_member_accessor& node, TAstNodePtr input) { - const auto& token = node.GetToken2(); - return new TWildcardMemberAccessNode(GetPos(token), input); -} - -TAstNodePtr TAstBuilder::BuildFilter(const TRule_filter& node, TAstNodePtr input) { - const auto predicate = BuildExpr(node.GetRule_expr3()); - return new TFilterPredicateNode(GetPos(node.GetToken2()), predicate, input); -} - -TAstNodePtr TAstBuilder::BuildMethod(const TRule_method& node, TAstNodePtr input) { - const auto& token = node.GetToken2(); - const auto pos = GetPos(token); - const auto& value = token.GetValue(); - auto type = EMethodType::Double; - if (value == "abs") { - type = EMethodType::Abs; - } else if (value == "floor") { - type = EMethodType::Floor; - } else if (value == "ceiling") { - type = EMethodType::Ceiling; - } else if (value == "type") { - type = EMethodType::Type; - } else if (value == "size") { - type = EMethodType::Size; - } else if (value == "keyvalue") { - type = EMethodType::KeyValue; - } - - return new TMethodCallNode(pos, type, input); -} - -TAstNodePtr TAstBuilder::BuildAccessorOp(const TRule_accessor_op& node, TAstNodePtr input) { - switch (node.GetAltCase()) { - case TRule_accessor_op::kAltAccessorOp1: - return BuildMemberAccessor(node.GetAlt_accessor_op1().GetRule_member_accessor1(), input); - case TRule_accessor_op::kAltAccessorOp2: - return BuildWildcardMemberAccessor(node.GetAlt_accessor_op2().GetRule_wildcard_member_accessor1(), input); - case TRule_accessor_op::kAltAccessorOp3: - return BuildArrayAccessor(node.GetAlt_accessor_op3().GetRule_array_accessor1(), input); - case TRule_accessor_op::kAltAccessorOp4: - return BuildWildcardArrayAccessor(node.GetAlt_accessor_op4().GetRule_wildcard_array_accessor1(), input); - case TRule_accessor_op::kAltAccessorOp5: - return BuildFilter(node.GetAlt_accessor_op5().GetRule_filter1(), input); - case TRule_accessor_op::kAltAccessorOp6: - return BuildMethod(node.GetAlt_accessor_op6().GetRule_method1(), input); - case TRule_accessor_op::ALT_NOT_SET: - Y_FAIL("Alternative for 'accessor_op' rule is not set"); - } -} - -TAstNodePtr TAstBuilder::BuildPrimary(const TRule_primary& node) { - switch (node.GetAltCase()) { - case TRule_primary::kAltPrimary1: { - const auto& token = node.GetAlt_primary1().GetToken1(); - const auto& numberString = token.GetValue(); - const double parsedValue = ParseDouble(numberString); - if (Y_UNLIKELY(std::isnan(parsedValue))) { - Y_FAIL("Invalid number was allowed by JsonPath grammar"); - } - if (Y_UNLIKELY(std::isinf(parsedValue))) { - Error(GetPos(token), "Number literal is infinity"); - return nullptr; - } - return new TNumberLiteralNode(GetPos(token), parsedValue); - } - case TRule_primary::kAltPrimary2: { - const auto& token = node.GetAlt_primary2().GetToken1(); - return new TContextObjectNode(GetPos(token)); - } - case TRule_primary::kAltPrimary3: { - const auto& token = node.GetAlt_primary3().GetToken1(); - return new TLastArrayIndexNode(GetPos(token)); - } - case TRule_primary::kAltPrimary4: { - const auto& primary = node.GetAlt_primary4().GetBlock1(); - const auto input = BuildExpr(primary.GetRule_expr2()); - if (primary.HasBlock4()) { - const auto& token = primary.GetBlock4().GetToken1(); - return new TIsUnknownPredicateNode(GetPos(token), input); - } - return input; - } - case TRule_primary::kAltPrimary5: { - const auto& token = node.GetAlt_primary5().GetToken1(); - return new TVariableNode(GetPos(token), token.GetValue().substr(1)); - } - case TRule_primary::kAltPrimary6: { - const auto& token = node.GetAlt_primary6().GetToken1(); - return new TBooleanLiteralNode(GetPos(token), true); - } - case TRule_primary::kAltPrimary7: { - const auto& token = node.GetAlt_primary7().GetToken1(); - return new TBooleanLiteralNode(GetPos(token), false); - } - case TRule_primary::kAltPrimary8: { - const auto& token = node.GetAlt_primary8().GetToken1(); - return new TNullLiteralNode(GetPos(token)); - } - case TRule_primary::kAltPrimary9: { - const auto& token = node.GetAlt_primary9().GetToken1(); - TString value; - TString error; - if (!TryStringContent(token.GetValue(), value, error)) { - Error(GetPos(token), error); - return nullptr; - } - return new TStringLiteralNode(GetPos(token), value); - } - case TRule_primary::kAltPrimary10: { - const auto& token = node.GetAlt_primary10().GetToken1(); - return new TFilterObjectNode(GetPos(token)); - } - case TRule_primary::ALT_NOT_SET: - Y_FAIL("Alternative for 'primary' rule is not set"); - } -} - -TAstNodePtr TAstBuilder::BuildAccessorExpr(const TRule_accessor_expr& node) { - TAstNodePtr input = BuildPrimary(node.GetRule_primary1()); - for (size_t i = 0; i < node.Block2Size(); i++) { - input = BuildAccessorOp(node.GetBlock2(i).GetRule_accessor_op1(), input); - } - return input; -} - -TAstNodePtr TAstBuilder::BuildPlainExpr(const TRule_plain_expr& node) { - return BuildAccessorExpr(node.GetRule_accessor_expr1()); -} - -TAstNodePtr TAstBuilder::BuildLikeRegexExpr(const TRule_like_regex_expr& node, TAstNodePtr input) { - const auto& regexToken = node.GetToken2(); - TString regex; - TString error; - if (!TryStringContent(regexToken.GetValue(), regex, error)) { - Error(GetPos(regexToken), error); - return nullptr; - } - - ui32 parsedFlags = 0; - if (node.HasBlock3()) { - TString flags; - const auto& flagsToken = node.GetBlock3().GetToken2(); - if (!TryStringContent(flagsToken.GetValue(), flags, error)) { - Error(GetPos(flagsToken), error); - return nullptr; - } - - for (char flag : flags) { - switch (flag) { - case 'i': - parsedFlags |= HS_FLAG_CASELESS; - break; - default: - Error(GetPos(flagsToken), TStringBuilder() << "Unsupported regex flag '" << flag << "'"); - break; - } - } - } - if (UTF8Detect(regex)) { - parsedFlags |= HS_FLAG_UTF8; - } - if (NX86::HaveAVX2()) { - parsedFlags |= HS_CPU_FEATURES_AVX2; - } - - TDatabase compiledRegex; - try { - compiledRegex = Compile(regex, parsedFlags); - } catch (const TCompileException& e) { - Error(GetPos(regexToken), e.AsStrBuf()); - return nullptr; - } - - return new TLikeRegexPredicateNode(GetPos(node.GetToken1()), input, std::move(compiledRegex)); -} - -TAstNodePtr TAstBuilder::BuildPredicateExpr(const TRule_predicate_expr& node) { - switch (node.GetAltCase()) { - case TRule_predicate_expr::kAltPredicateExpr1: { - const auto& predicate = node.GetAlt_predicate_expr1().GetBlock1(); - const auto input = BuildPlainExpr(predicate.GetRule_plain_expr1()); - if (!predicate.HasBlock2()) { - return input; - } - - const auto& block = predicate.GetBlock2(); - switch (block.GetAltCase()) { - case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::kAlt1: { - const auto& innerBlock = block.GetAlt1().GetRule_starts_with_expr1(); - const auto& prefix = BuildPlainExpr(innerBlock.GetRule_plain_expr3()); - return new TStartsWithPredicateNode(GetPos(innerBlock.GetToken1()), input, prefix); - } - case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::kAlt2: { - return BuildLikeRegexExpr(block.GetAlt2().GetRule_like_regex_expr1(), input); - } - case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::ALT_NOT_SET: - Y_FAIL("Alternative for inner block of 'predicate_expr' rule is not set"); - } + +#include <library/cpp/regex/hyperscan/hyperscan.h> + +#include <util/generic/singleton.h> +#include <util/system/compiler.h> +#include <util/string/cast.h> +#include <util/string/builder.h> +#include <util/charset/utf8.h> +#include <util/system/cpu_id.h> + +#include <cmath> + +using namespace NYql; +using namespace NYql::NJsonPath; +using namespace NJsonPathGenerated; +using namespace NHyperscan; + +namespace { + +TPosition GetPos(const TToken& token) { + return TPosition(token.GetColumn(), token.GetLine()); +} + +bool TryStringContent(const TString& str, TString& result, TString& error, bool onlyDoubleQuoted = true) { + result.clear(); + error.clear(); + + const bool doubleQuoted = str.StartsWith('"') && str.EndsWith('"'); + const bool singleQuoted = str.StartsWith('\'') && str.EndsWith('\''); + if (!doubleQuoted && !singleQuoted) { + error = "String must be quoted"; + return false; + } + if (singleQuoted && onlyDoubleQuoted) { + error = "Only double quoted strings allowed"; + return false; + } + + result = str.substr(1, str.length() - 2); + return true; +} + +} + +TAstBuilder::TAstBuilder(TIssues& issues) + : Issues(issues) +{ +} + +void TAstBuilder::Error(TPosition pos, const TStringBuf message) { + Issues.AddIssue(pos, message); + Issues.back().SetCode(TIssuesIds::JSONPATH_PARSE_ERROR, TSeverityIds::S_ERROR); +} + +TArrayAccessNode::TSubscript TAstBuilder::BuildArraySubscript(const TRule_array_subscript& node) { + TAstNodePtr from = BuildExpr(node.GetRule_expr1()); + TAstNodePtr to = nullptr; + if (node.HasBlock2()) { + to = BuildExpr(node.GetBlock2().GetRule_expr2()); + } + return {from, to}; +} + +TAstNodePtr TAstBuilder::BuildArrayAccessor(const TRule_array_accessor& node, TAstNodePtr input) { + TVector<TArrayAccessNode::TSubscript> subscripts; + subscripts.reserve(1 + node.Block3Size()); + + subscripts.push_back(BuildArraySubscript(node.GetRule_array_subscript2())); + for (size_t i = 0; i < node.Block3Size(); i++) { + subscripts.push_back(BuildArraySubscript(node.GetBlock3(i).GetRule_array_subscript2())); + } + + return new TArrayAccessNode(GetPos(node.GetToken1()), subscripts, input); +} + +TAstNodePtr TAstBuilder::BuildWildcardArrayAccessor(const TRule_wildcard_array_accessor& node, TAstNodePtr input) { + return new TWildcardArrayAccessNode(GetPos(node.GetToken1()), input); +} + +TString TAstBuilder::BuildIdentifier(const TRule_identifier& node) { + switch (node.GetAltCase()) { + case TRule_identifier::kAltIdentifier1: + return node.GetAlt_identifier1().GetToken1().GetValue(); + case TRule_identifier::kAltIdentifier2: + return node.GetAlt_identifier2().GetRule_keyword1().GetToken1().GetValue(); + case TRule_identifier::ALT_NOT_SET: + Y_FAIL("Alternative for 'identifier' rule is not set"); + } +} + +TAstNodePtr TAstBuilder::BuildMemberAccessor(const TRule_member_accessor& node, TAstNodePtr input) { + TString name; + const auto& nameBlock = node.GetBlock2(); + switch (nameBlock.GetAltCase()) { + case TRule_member_accessor_TBlock2::kAlt1: + name = BuildIdentifier(nameBlock.GetAlt1().GetRule_identifier1()); + break; + case TRule_member_accessor_TBlock2::kAlt2: { + const auto& token = nameBlock.GetAlt2().GetToken1(); + TString error; + if (!TryStringContent(token.GetValue(), name, error, /* onlyDoubleQuoted */ false)) { + Error(GetPos(token), error); + return nullptr; + } + break; + } + case TRule_member_accessor_TBlock2::ALT_NOT_SET: + Y_FAIL("Alternative for 'member_accessor' rule is not set"); + } + + return new TMemberAccessNode(GetPos(node.GetToken1()), name, input); +} + +TAstNodePtr TAstBuilder::BuildWildcardMemberAccessor(const TRule_wildcard_member_accessor& node, TAstNodePtr input) { + const auto& token = node.GetToken2(); + return new TWildcardMemberAccessNode(GetPos(token), input); +} + +TAstNodePtr TAstBuilder::BuildFilter(const TRule_filter& node, TAstNodePtr input) { + const auto predicate = BuildExpr(node.GetRule_expr3()); + return new TFilterPredicateNode(GetPos(node.GetToken2()), predicate, input); +} + +TAstNodePtr TAstBuilder::BuildMethod(const TRule_method& node, TAstNodePtr input) { + const auto& token = node.GetToken2(); + const auto pos = GetPos(token); + const auto& value = token.GetValue(); + auto type = EMethodType::Double; + if (value == "abs") { + type = EMethodType::Abs; + } else if (value == "floor") { + type = EMethodType::Floor; + } else if (value == "ceiling") { + type = EMethodType::Ceiling; + } else if (value == "type") { + type = EMethodType::Type; + } else if (value == "size") { + type = EMethodType::Size; + } else if (value == "keyvalue") { + type = EMethodType::KeyValue; + } + + return new TMethodCallNode(pos, type, input); +} + +TAstNodePtr TAstBuilder::BuildAccessorOp(const TRule_accessor_op& node, TAstNodePtr input) { + switch (node.GetAltCase()) { + case TRule_accessor_op::kAltAccessorOp1: + return BuildMemberAccessor(node.GetAlt_accessor_op1().GetRule_member_accessor1(), input); + case TRule_accessor_op::kAltAccessorOp2: + return BuildWildcardMemberAccessor(node.GetAlt_accessor_op2().GetRule_wildcard_member_accessor1(), input); + case TRule_accessor_op::kAltAccessorOp3: + return BuildArrayAccessor(node.GetAlt_accessor_op3().GetRule_array_accessor1(), input); + case TRule_accessor_op::kAltAccessorOp4: + return BuildWildcardArrayAccessor(node.GetAlt_accessor_op4().GetRule_wildcard_array_accessor1(), input); + case TRule_accessor_op::kAltAccessorOp5: + return BuildFilter(node.GetAlt_accessor_op5().GetRule_filter1(), input); + case TRule_accessor_op::kAltAccessorOp6: + return BuildMethod(node.GetAlt_accessor_op6().GetRule_method1(), input); + case TRule_accessor_op::ALT_NOT_SET: + Y_FAIL("Alternative for 'accessor_op' rule is not set"); + } +} + +TAstNodePtr TAstBuilder::BuildPrimary(const TRule_primary& node) { + switch (node.GetAltCase()) { + case TRule_primary::kAltPrimary1: { + const auto& token = node.GetAlt_primary1().GetToken1(); + const auto& numberString = token.GetValue(); + const double parsedValue = ParseDouble(numberString); + if (Y_UNLIKELY(std::isnan(parsedValue))) { + Y_FAIL("Invalid number was allowed by JsonPath grammar"); + } + if (Y_UNLIKELY(std::isinf(parsedValue))) { + Error(GetPos(token), "Number literal is infinity"); + return nullptr; + } + return new TNumberLiteralNode(GetPos(token), parsedValue); + } + case TRule_primary::kAltPrimary2: { + const auto& token = node.GetAlt_primary2().GetToken1(); + return new TContextObjectNode(GetPos(token)); + } + case TRule_primary::kAltPrimary3: { + const auto& token = node.GetAlt_primary3().GetToken1(); + return new TLastArrayIndexNode(GetPos(token)); + } + case TRule_primary::kAltPrimary4: { + const auto& primary = node.GetAlt_primary4().GetBlock1(); + const auto input = BuildExpr(primary.GetRule_expr2()); + if (primary.HasBlock4()) { + const auto& token = primary.GetBlock4().GetToken1(); + return new TIsUnknownPredicateNode(GetPos(token), input); + } + return input; + } + case TRule_primary::kAltPrimary5: { + const auto& token = node.GetAlt_primary5().GetToken1(); + return new TVariableNode(GetPos(token), token.GetValue().substr(1)); + } + case TRule_primary::kAltPrimary6: { + const auto& token = node.GetAlt_primary6().GetToken1(); + return new TBooleanLiteralNode(GetPos(token), true); + } + case TRule_primary::kAltPrimary7: { + const auto& token = node.GetAlt_primary7().GetToken1(); + return new TBooleanLiteralNode(GetPos(token), false); + } + case TRule_primary::kAltPrimary8: { + const auto& token = node.GetAlt_primary8().GetToken1(); + return new TNullLiteralNode(GetPos(token)); + } + case TRule_primary::kAltPrimary9: { + const auto& token = node.GetAlt_primary9().GetToken1(); + TString value; + TString error; + if (!TryStringContent(token.GetValue(), value, error)) { + Error(GetPos(token), error); + return nullptr; + } + return new TStringLiteralNode(GetPos(token), value); + } + case TRule_primary::kAltPrimary10: { + const auto& token = node.GetAlt_primary10().GetToken1(); + return new TFilterObjectNode(GetPos(token)); + } + case TRule_primary::ALT_NOT_SET: + Y_FAIL("Alternative for 'primary' rule is not set"); + } +} + +TAstNodePtr TAstBuilder::BuildAccessorExpr(const TRule_accessor_expr& node) { + TAstNodePtr input = BuildPrimary(node.GetRule_primary1()); + for (size_t i = 0; i < node.Block2Size(); i++) { + input = BuildAccessorOp(node.GetBlock2(i).GetRule_accessor_op1(), input); + } + return input; +} + +TAstNodePtr TAstBuilder::BuildPlainExpr(const TRule_plain_expr& node) { + return BuildAccessorExpr(node.GetRule_accessor_expr1()); +} + +TAstNodePtr TAstBuilder::BuildLikeRegexExpr(const TRule_like_regex_expr& node, TAstNodePtr input) { + const auto& regexToken = node.GetToken2(); + TString regex; + TString error; + if (!TryStringContent(regexToken.GetValue(), regex, error)) { + Error(GetPos(regexToken), error); + return nullptr; + } + + ui32 parsedFlags = 0; + if (node.HasBlock3()) { + TString flags; + const auto& flagsToken = node.GetBlock3().GetToken2(); + if (!TryStringContent(flagsToken.GetValue(), flags, error)) { + Error(GetPos(flagsToken), error); + return nullptr; + } + + for (char flag : flags) { + switch (flag) { + case 'i': + parsedFlags |= HS_FLAG_CASELESS; + break; + default: + Error(GetPos(flagsToken), TStringBuilder() << "Unsupported regex flag '" << flag << "'"); + break; + } + } + } + if (UTF8Detect(regex)) { + parsedFlags |= HS_FLAG_UTF8; + } + if (NX86::HaveAVX2()) { + parsedFlags |= HS_CPU_FEATURES_AVX2; + } + + TDatabase compiledRegex; + try { + compiledRegex = Compile(regex, parsedFlags); + } catch (const TCompileException& e) { + Error(GetPos(regexToken), e.AsStrBuf()); + return nullptr; + } + + return new TLikeRegexPredicateNode(GetPos(node.GetToken1()), input, std::move(compiledRegex)); +} + +TAstNodePtr TAstBuilder::BuildPredicateExpr(const TRule_predicate_expr& node) { + switch (node.GetAltCase()) { + case TRule_predicate_expr::kAltPredicateExpr1: { + const auto& predicate = node.GetAlt_predicate_expr1().GetBlock1(); + const auto input = BuildPlainExpr(predicate.GetRule_plain_expr1()); + if (!predicate.HasBlock2()) { + return input; + } + + const auto& block = predicate.GetBlock2(); + switch (block.GetAltCase()) { + case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::kAlt1: { + const auto& innerBlock = block.GetAlt1().GetRule_starts_with_expr1(); + const auto& prefix = BuildPlainExpr(innerBlock.GetRule_plain_expr3()); + return new TStartsWithPredicateNode(GetPos(innerBlock.GetToken1()), input, prefix); + } + case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::kAlt2: { + return BuildLikeRegexExpr(block.GetAlt2().GetRule_like_regex_expr1(), input); + } + case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::ALT_NOT_SET: + Y_FAIL("Alternative for inner block of 'predicate_expr' rule is not set"); + } Y_UNREACHABLE(); - } - case TRule_predicate_expr::kAltPredicateExpr2: { - const auto& predicate = node.GetAlt_predicate_expr2().GetBlock1(); - const auto input = BuildExpr(predicate.GetRule_expr3()); - return new TExistsPredicateNode(GetPos(predicate.GetToken1()), input); - } - case TRule_predicate_expr::ALT_NOT_SET: - Y_FAIL("Alternative for 'predicate' rule is not set"); - } + } + case TRule_predicate_expr::kAltPredicateExpr2: { + const auto& predicate = node.GetAlt_predicate_expr2().GetBlock1(); + const auto input = BuildExpr(predicate.GetRule_expr3()); + return new TExistsPredicateNode(GetPos(predicate.GetToken1()), input); + } + case TRule_predicate_expr::ALT_NOT_SET: + Y_FAIL("Alternative for 'predicate' rule is not set"); + } Y_UNREACHABLE(); -} - -TAstNodePtr TAstBuilder::BuildUnaryExpr(const TRule_unary_expr& node) { - const auto predicateExpr = BuildPredicateExpr(node.GetRule_predicate_expr2()); - if (!node.HasBlock1()) { - return predicateExpr; - } - - const auto& opToken = node.GetBlock1().GetToken1(); - const auto& opValue = opToken.GetValue(); - auto operation = EUnaryOperation::Plus; - if (opValue == "-") { - operation = EUnaryOperation::Minus; - } else if (opValue == "!") { - operation = EUnaryOperation::Not; - } - return new TUnaryOperationNode(GetPos(opToken), operation, predicateExpr); -} - -TAstNodePtr TAstBuilder::BuildMulExpr(const TRule_mul_expr& node) { - TAstNodePtr result = BuildUnaryExpr(node.GetRule_unary_expr1()); - - for (size_t i = 0; i < node.Block2Size(); i++) { - const auto& block = node.GetBlock2(i); - - const auto& opToken = block.GetToken1(); - const auto& opValue = opToken.GetValue(); - auto operation = EBinaryOperation::Multiply; - if (opValue == "/") { - operation = EBinaryOperation::Divide; - } else if (opValue == "%") { - operation = EBinaryOperation::Modulo; - } - - const auto rightOperand = BuildUnaryExpr(block.GetRule_unary_expr2()); - result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand); - } - - return result; -} - -TAstNodePtr TAstBuilder::BuildAddExpr(const TRule_add_expr& node) { - TAstNodePtr result = BuildMulExpr(node.GetRule_mul_expr1()); - - for (size_t i = 0; i < node.Block2Size(); i++) { - const auto& block = node.GetBlock2(i); - - const auto& opToken = block.GetToken1(); - auto operation = EBinaryOperation::Add; - if (opToken.GetValue() == "-") { - operation = EBinaryOperation::Substract; - } - - const auto rightOperand = BuildMulExpr(block.GetRule_mul_expr2()); - result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand); - } - - return result; -} - -TAstNodePtr TAstBuilder::BuildCompareExpr(const TRule_compare_expr& node) { - TAstNodePtr result = BuildAddExpr(node.GetRule_add_expr1()); - - if (node.HasBlock2()) { - const auto& block = node.GetBlock2(); - - const auto& opToken = block.GetToken1(); - const auto& opValue = opToken.GetValue(); - auto operation = EBinaryOperation::Less; - if (opValue == "<=") { - operation = EBinaryOperation::LessEqual; - } else if (opValue == ">") { - operation = EBinaryOperation::Greater; - } else if (opValue == ">=") { - operation = EBinaryOperation::GreaterEqual; - } - - const auto rightOperand = BuildAddExpr(block.GetRule_add_expr2()); - result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand); - } - - return result; -} - -TAstNodePtr TAstBuilder::BuildEqualExpr(const TRule_equal_expr& node) { - TAstNodePtr result = BuildCompareExpr(node.GetRule_compare_expr1()); - - if (node.HasBlock2()) { - const auto& block = node.GetBlock2(); - - const auto& opToken = block.GetToken1(); - const auto& opValue = opToken.GetValue(); - auto operation = EBinaryOperation::Equal; - if (opValue == "<>" || opValue == "!=") { - operation = EBinaryOperation::NotEqual; - } - - const auto rightOperand = BuildCompareExpr(block.GetRule_compare_expr2()); - result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand); - } - - return result; -} - -TAstNodePtr TAstBuilder::BuildAndExpr(const TRule_and_expr& node) { - TAstNodePtr result = BuildEqualExpr(node.GetRule_equal_expr1()); - - for (size_t i = 0; i < node.Block2Size(); i++) { - const auto& block = node.GetBlock2(i); - - const auto& opToken = block.GetToken1(); - const auto rightOperand = BuildEqualExpr(block.GetRule_equal_expr2()); - result = new TBinaryOperationNode(GetPos(opToken), EBinaryOperation::And, result, rightOperand); - } - - return result; -} - -TAstNodePtr TAstBuilder::BuildOrExpr(const TRule_or_expr& node) { - TAstNodePtr result = BuildAndExpr(node.GetRule_and_expr1()); - - for (size_t i = 0; i < node.Block2Size(); i++) { - const auto& block = node.GetBlock2(i); - - const auto& opToken = block.GetToken1(); - const auto rightOperand = BuildAndExpr(block.GetRule_and_expr2()); - result = new TBinaryOperationNode(GetPos(opToken), EBinaryOperation::Or, result, rightOperand); - } - - return result; -} - -TAstNodePtr TAstBuilder::BuildExpr(const TRule_expr& node) { - return BuildOrExpr(node.GetRule_or_expr1()); -} - -TAstNodePtr TAstBuilder::BuildJsonPath(const TRule_jsonpath& node) { - TPosition pos; - auto mode = EJsonPathMode::Lax; - if (node.HasBlock1()) { - const auto& modeToken = node.GetBlock1().GetToken1(); - pos = GetPos(modeToken); - if (modeToken.GetValue() == "strict") { - mode = EJsonPathMode::Strict; - } - } - - const auto expr = BuildExpr(node.GetRule_expr2()); - return new TRootNode(pos, expr, mode); -} - -TAstNodePtr TAstBuilder::Build(const TJsonPathParserAST& ast) { - return BuildJsonPath(ast.GetRule_jsonpath()); -} +} + +TAstNodePtr TAstBuilder::BuildUnaryExpr(const TRule_unary_expr& node) { + const auto predicateExpr = BuildPredicateExpr(node.GetRule_predicate_expr2()); + if (!node.HasBlock1()) { + return predicateExpr; + } + + const auto& opToken = node.GetBlock1().GetToken1(); + const auto& opValue = opToken.GetValue(); + auto operation = EUnaryOperation::Plus; + if (opValue == "-") { + operation = EUnaryOperation::Minus; + } else if (opValue == "!") { + operation = EUnaryOperation::Not; + } + return new TUnaryOperationNode(GetPos(opToken), operation, predicateExpr); +} + +TAstNodePtr TAstBuilder::BuildMulExpr(const TRule_mul_expr& node) { + TAstNodePtr result = BuildUnaryExpr(node.GetRule_unary_expr1()); + + for (size_t i = 0; i < node.Block2Size(); i++) { + const auto& block = node.GetBlock2(i); + + const auto& opToken = block.GetToken1(); + const auto& opValue = opToken.GetValue(); + auto operation = EBinaryOperation::Multiply; + if (opValue == "/") { + operation = EBinaryOperation::Divide; + } else if (opValue == "%") { + operation = EBinaryOperation::Modulo; + } + + const auto rightOperand = BuildUnaryExpr(block.GetRule_unary_expr2()); + result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand); + } + + return result; +} + +TAstNodePtr TAstBuilder::BuildAddExpr(const TRule_add_expr& node) { + TAstNodePtr result = BuildMulExpr(node.GetRule_mul_expr1()); + + for (size_t i = 0; i < node.Block2Size(); i++) { + const auto& block = node.GetBlock2(i); + + const auto& opToken = block.GetToken1(); + auto operation = EBinaryOperation::Add; + if (opToken.GetValue() == "-") { + operation = EBinaryOperation::Substract; + } + + const auto rightOperand = BuildMulExpr(block.GetRule_mul_expr2()); + result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand); + } + + return result; +} + +TAstNodePtr TAstBuilder::BuildCompareExpr(const TRule_compare_expr& node) { + TAstNodePtr result = BuildAddExpr(node.GetRule_add_expr1()); + + if (node.HasBlock2()) { + const auto& block = node.GetBlock2(); + + const auto& opToken = block.GetToken1(); + const auto& opValue = opToken.GetValue(); + auto operation = EBinaryOperation::Less; + if (opValue == "<=") { + operation = EBinaryOperation::LessEqual; + } else if (opValue == ">") { + operation = EBinaryOperation::Greater; + } else if (opValue == ">=") { + operation = EBinaryOperation::GreaterEqual; + } + + const auto rightOperand = BuildAddExpr(block.GetRule_add_expr2()); + result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand); + } + + return result; +} + +TAstNodePtr TAstBuilder::BuildEqualExpr(const TRule_equal_expr& node) { + TAstNodePtr result = BuildCompareExpr(node.GetRule_compare_expr1()); + + if (node.HasBlock2()) { + const auto& block = node.GetBlock2(); + + const auto& opToken = block.GetToken1(); + const auto& opValue = opToken.GetValue(); + auto operation = EBinaryOperation::Equal; + if (opValue == "<>" || opValue == "!=") { + operation = EBinaryOperation::NotEqual; + } + + const auto rightOperand = BuildCompareExpr(block.GetRule_compare_expr2()); + result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand); + } + + return result; +} + +TAstNodePtr TAstBuilder::BuildAndExpr(const TRule_and_expr& node) { + TAstNodePtr result = BuildEqualExpr(node.GetRule_equal_expr1()); + + for (size_t i = 0; i < node.Block2Size(); i++) { + const auto& block = node.GetBlock2(i); + + const auto& opToken = block.GetToken1(); + const auto rightOperand = BuildEqualExpr(block.GetRule_equal_expr2()); + result = new TBinaryOperationNode(GetPos(opToken), EBinaryOperation::And, result, rightOperand); + } + + return result; +} + +TAstNodePtr TAstBuilder::BuildOrExpr(const TRule_or_expr& node) { + TAstNodePtr result = BuildAndExpr(node.GetRule_and_expr1()); + + for (size_t i = 0; i < node.Block2Size(); i++) { + const auto& block = node.GetBlock2(i); + + const auto& opToken = block.GetToken1(); + const auto rightOperand = BuildAndExpr(block.GetRule_and_expr2()); + result = new TBinaryOperationNode(GetPos(opToken), EBinaryOperation::Or, result, rightOperand); + } + + return result; +} + +TAstNodePtr TAstBuilder::BuildExpr(const TRule_expr& node) { + return BuildOrExpr(node.GetRule_or_expr1()); +} + +TAstNodePtr TAstBuilder::BuildJsonPath(const TRule_jsonpath& node) { + TPosition pos; + auto mode = EJsonPathMode::Lax; + if (node.HasBlock1()) { + const auto& modeToken = node.GetBlock1().GetToken1(); + pos = GetPos(modeToken); + if (modeToken.GetValue() == "strict") { + mode = EJsonPathMode::Strict; + } + } + + const auto expr = BuildExpr(node.GetRule_expr2()); + return new TRootNode(pos, expr, mode); +} + +TAstNodePtr TAstBuilder::Build(const TJsonPathParserAST& ast) { + return BuildJsonPath(ast.GetRule_jsonpath()); +} diff --git a/ydb/library/yql/minikql/jsonpath/ast_builder.h b/ydb/library/yql/minikql/jsonpath/ast_builder.h index 4d6331dea7..0e6f92a8a4 100644 --- a/ydb/library/yql/minikql/jsonpath/ast_builder.h +++ b/ydb/library/yql/minikql/jsonpath/ast_builder.h @@ -1,52 +1,52 @@ -#pragma once - -#include "ast_nodes.h" - +#pragma once + +#include "ast_nodes.h" + #include <ydb/library/yql/parser/proto_ast/gen/jsonpath/JsonPathParser.pb.h> - -namespace NYql::NJsonPath { - -class TAstBuilder { -public: - TAstBuilder(TIssues& issues); - - TAstNodePtr Build(const NJsonPathGenerated::TJsonPathParserAST& ast); - -private: - TArrayAccessNode::TSubscript BuildArraySubscript(const NJsonPathGenerated::TRule_array_subscript& node); - TAstNodePtr BuildArrayAccessor(const NJsonPathGenerated::TRule_array_accessor& node, TAstNodePtr input); - TAstNodePtr BuildWildcardArrayAccessor(const NJsonPathGenerated::TRule_wildcard_array_accessor& node, TAstNodePtr input); - - TString BuildIdentifier(const NJsonPathGenerated::TRule_identifier& node); - TAstNodePtr BuildMemberAccessor(const NJsonPathGenerated::TRule_member_accessor& node, TAstNodePtr input); - TAstNodePtr BuildWildcardMemberAccessor(const NJsonPathGenerated::TRule_wildcard_member_accessor& node, TAstNodePtr input); - - TAstNodePtr BuildFilter(const NJsonPathGenerated::TRule_filter& node, TAstNodePtr input); - - TAstNodePtr BuildMethod(const NJsonPathGenerated::TRule_method& node, TAstNodePtr input); - - TAstNodePtr BuildAccessorOp(const NJsonPathGenerated::TRule_accessor_op& node, TAstNodePtr input); - TAstNodePtr BuildAccessorExpr(const NJsonPathGenerated::TRule_accessor_expr& node); - - TAstNodePtr BuildPrimary(const NJsonPathGenerated::TRule_primary& node); - - TAstNodePtr BuildPlainExpr(const NJsonPathGenerated::TRule_plain_expr& node); - TAstNodePtr BuildLikeRegexExpr(const NJsonPathGenerated::TRule_like_regex_expr& node, TAstNodePtr input); - TAstNodePtr BuildPredicateExpr(const NJsonPathGenerated::TRule_predicate_expr& node); - TAstNodePtr BuildUnaryExpr(const NJsonPathGenerated::TRule_unary_expr& node); - TAstNodePtr BuildMulExpr(const NJsonPathGenerated::TRule_mul_expr& node); - TAstNodePtr BuildAddExpr(const NJsonPathGenerated::TRule_add_expr& node); - TAstNodePtr BuildCompareExpr(const NJsonPathGenerated::TRule_compare_expr& node); - TAstNodePtr BuildEqualExpr(const NJsonPathGenerated::TRule_equal_expr& node); - TAstNodePtr BuildAndExpr(const NJsonPathGenerated::TRule_and_expr& node); - TAstNodePtr BuildOrExpr(const NJsonPathGenerated::TRule_or_expr& node); - - TAstNodePtr BuildExpr(const NJsonPathGenerated::TRule_expr& node); - TAstNodePtr BuildJsonPath(const NJsonPathGenerated::TRule_jsonpath& node); - - void Error(TPosition pos, const TStringBuf message); - - TIssues& Issues; -}; - -} + +namespace NYql::NJsonPath { + +class TAstBuilder { +public: + TAstBuilder(TIssues& issues); + + TAstNodePtr Build(const NJsonPathGenerated::TJsonPathParserAST& ast); + +private: + TArrayAccessNode::TSubscript BuildArraySubscript(const NJsonPathGenerated::TRule_array_subscript& node); + TAstNodePtr BuildArrayAccessor(const NJsonPathGenerated::TRule_array_accessor& node, TAstNodePtr input); + TAstNodePtr BuildWildcardArrayAccessor(const NJsonPathGenerated::TRule_wildcard_array_accessor& node, TAstNodePtr input); + + TString BuildIdentifier(const NJsonPathGenerated::TRule_identifier& node); + TAstNodePtr BuildMemberAccessor(const NJsonPathGenerated::TRule_member_accessor& node, TAstNodePtr input); + TAstNodePtr BuildWildcardMemberAccessor(const NJsonPathGenerated::TRule_wildcard_member_accessor& node, TAstNodePtr input); + + TAstNodePtr BuildFilter(const NJsonPathGenerated::TRule_filter& node, TAstNodePtr input); + + TAstNodePtr BuildMethod(const NJsonPathGenerated::TRule_method& node, TAstNodePtr input); + + TAstNodePtr BuildAccessorOp(const NJsonPathGenerated::TRule_accessor_op& node, TAstNodePtr input); + TAstNodePtr BuildAccessorExpr(const NJsonPathGenerated::TRule_accessor_expr& node); + + TAstNodePtr BuildPrimary(const NJsonPathGenerated::TRule_primary& node); + + TAstNodePtr BuildPlainExpr(const NJsonPathGenerated::TRule_plain_expr& node); + TAstNodePtr BuildLikeRegexExpr(const NJsonPathGenerated::TRule_like_regex_expr& node, TAstNodePtr input); + TAstNodePtr BuildPredicateExpr(const NJsonPathGenerated::TRule_predicate_expr& node); + TAstNodePtr BuildUnaryExpr(const NJsonPathGenerated::TRule_unary_expr& node); + TAstNodePtr BuildMulExpr(const NJsonPathGenerated::TRule_mul_expr& node); + TAstNodePtr BuildAddExpr(const NJsonPathGenerated::TRule_add_expr& node); + TAstNodePtr BuildCompareExpr(const NJsonPathGenerated::TRule_compare_expr& node); + TAstNodePtr BuildEqualExpr(const NJsonPathGenerated::TRule_equal_expr& node); + TAstNodePtr BuildAndExpr(const NJsonPathGenerated::TRule_and_expr& node); + TAstNodePtr BuildOrExpr(const NJsonPathGenerated::TRule_or_expr& node); + + TAstNodePtr BuildExpr(const NJsonPathGenerated::TRule_expr& node); + TAstNodePtr BuildJsonPath(const NJsonPathGenerated::TRule_jsonpath& node); + + void Error(TPosition pos, const TStringBuf message); + + TIssues& Issues; +}; + +} diff --git a/ydb/library/yql/minikql/jsonpath/ast_nodes.cpp b/ydb/library/yql/minikql/jsonpath/ast_nodes.cpp index 0e7dee2e22..147a06c49a 100644 --- a/ydb/library/yql/minikql/jsonpath/ast_nodes.cpp +++ b/ydb/library/yql/minikql/jsonpath/ast_nodes.cpp @@ -1,383 +1,383 @@ -#include "ast_nodes.h" - -namespace NYql::NJsonPath { - -TAstNode::TAstNode(TPosition pos) - : Pos(pos) -{ -} - -TPosition TAstNode::GetPos() const { - return Pos; -} - -EReturnType TAstNode::GetReturnType() const { - return EReturnType::Any; -} - -TRootNode::TRootNode(TPosition pos, TAstNodePtr expr, EJsonPathMode mode) - : TAstNode(pos) - , Expr(expr) - , Mode(mode) -{ -} - -const TAstNodePtr TRootNode::GetExpr() const { - return Expr; -} - -EJsonPathMode TRootNode::GetMode() const { - return Mode; -} - -void TRootNode::Accept(IAstNodeVisitor& visitor) const { - return visitor.VisitRoot(*this); -} - -EReturnType TRootNode::GetReturnType() const { - return Expr->GetReturnType(); -} - -TContextObjectNode::TContextObjectNode(TPosition pos) - : TAstNode(pos) -{ -} - -void TContextObjectNode::Accept(IAstNodeVisitor& visitor) const { - return visitor.VisitContextObject(*this); -} - -TVariableNode::TVariableNode(TPosition pos, const TString& name) - : TAstNode(pos) - , Name(name) -{ -} - -const TString& TVariableNode::GetName() const { - return Name; -} - -void TVariableNode::Accept(IAstNodeVisitor& visitor) const { - visitor.VisitVariable(*this); -} - -TLastArrayIndexNode::TLastArrayIndexNode(TPosition pos) - : TAstNode(pos) -{ -} - -void TLastArrayIndexNode::Accept(IAstNodeVisitor& visitor) const { - visitor.VisitLastArrayIndex(*this); -} - -TNumberLiteralNode::TNumberLiteralNode(TPosition pos, double value) - : TAstNode(pos) - , Value(value) -{ -} - -double TNumberLiteralNode::GetValue() const { - return Value; -} - -void TNumberLiteralNode::Accept(IAstNodeVisitor& visitor) const { - return visitor.VisitNumberLiteral(*this); -} - -TMemberAccessNode::TMemberAccessNode(TPosition pos, const TString& member, TAstNodePtr input) - : TAstNode(pos) - , Member(member) - , Input(input) -{ -} - -const TStringBuf TMemberAccessNode::GetMember() const { - return Member; -} - -const TAstNodePtr TMemberAccessNode::GetInput() const { - return Input; -} - -void TMemberAccessNode::Accept(IAstNodeVisitor& visitor) const { - return visitor.VisitMemberAccess(*this); -} - -TWildcardMemberAccessNode::TWildcardMemberAccessNode(TPosition pos, TAstNodePtr input) - : TAstNode(pos) - , Input(input) -{ -} - -const TAstNodePtr TWildcardMemberAccessNode::GetInput() const { - return Input; -} - -void TWildcardMemberAccessNode::Accept(IAstNodeVisitor& visitor) const { - return visitor.VisitWildcardMemberAccess(*this); -} - -TArrayAccessNode::TArrayAccessNode(TPosition pos, TVector<TSubscript> subscripts, TAstNodePtr input) - : TAstNode(pos) - , Subscripts(subscripts) - , Input(input) -{ -} - -const TVector<TArrayAccessNode::TSubscript>& TArrayAccessNode::GetSubscripts() const { - return Subscripts; -} - -const TAstNodePtr TArrayAccessNode::GetInput() const { - return Input; -} - -void TArrayAccessNode::Accept(IAstNodeVisitor& visitor) const { - return visitor.VisitArrayAccess(*this); -} - -TWildcardArrayAccessNode::TWildcardArrayAccessNode(TPosition pos, TAstNodePtr input) - : TAstNode(pos) - , Input(input) -{ -} - -const TAstNodePtr TWildcardArrayAccessNode::GetInput() const { - return Input; -} - -void TWildcardArrayAccessNode::Accept(IAstNodeVisitor& visitor) const { - return visitor.VisitWildcardArrayAccess(*this); -} - -TUnaryOperationNode::TUnaryOperationNode(TPosition pos, EUnaryOperation op, TAstNodePtr expr) - : TAstNode(pos) - , Operation(op) - , Expr(expr) -{ -} - -EUnaryOperation TUnaryOperationNode::GetOp() const { - return Operation; -} - -const TAstNodePtr TUnaryOperationNode::GetExpr() const { - return Expr; -} - -void TUnaryOperationNode::Accept(IAstNodeVisitor& visitor) const { - return visitor.VisitUnaryOperation(*this); -} - -EReturnType TUnaryOperationNode::GetReturnType() const { - return Operation == EUnaryOperation::Not ? EReturnType::Bool : EReturnType::Any; -} - -TBinaryOperationNode::TBinaryOperationNode(TPosition pos, EBinaryOperation op, TAstNodePtr leftExpr, TAstNodePtr rightExpr) - : TAstNode(pos) - , Operation(op) - , LeftExpr(leftExpr) - , RightExpr(rightExpr) -{ -} - -EBinaryOperation TBinaryOperationNode::GetOp() const { - return Operation; -} - -const TAstNodePtr TBinaryOperationNode::GetLeftExpr() const { - return LeftExpr; -} - -const TAstNodePtr TBinaryOperationNode::GetRightExpr() const { - return RightExpr; -} - -void TBinaryOperationNode::Accept(IAstNodeVisitor& visitor) const { - return visitor.VisitBinaryOperation(*this); -} - -EReturnType TBinaryOperationNode::GetReturnType() const { - switch (Operation) { - case EBinaryOperation::Less: - case EBinaryOperation::LessEqual: - case EBinaryOperation::Greater: - case EBinaryOperation::GreaterEqual: - case EBinaryOperation::Equal: - case EBinaryOperation::NotEqual: - case EBinaryOperation::And: - case EBinaryOperation::Or: - return EReturnType::Bool; - - default: - return EReturnType::Any; - } -} - -TBooleanLiteralNode::TBooleanLiteralNode(TPosition pos, bool value) - : TAstNode(pos) - , Value(value) -{ -} - -bool TBooleanLiteralNode::GetValue() const { - return Value; -} - -void TBooleanLiteralNode::Accept(IAstNodeVisitor& visitor) const { - return visitor.VisitBooleanLiteral(*this); -} - -TNullLiteralNode::TNullLiteralNode(TPosition pos) - : TAstNode(pos) -{ -} - -void TNullLiteralNode::Accept(IAstNodeVisitor& visitor) const { - return visitor.VisitNullLiteral(*this); -} - -TStringLiteralNode::TStringLiteralNode(TPosition pos, const TString& value) - : TAstNode(pos) - , Value(value) -{ -} - -const TString& TStringLiteralNode::GetValue() const { - return Value; -} - -void TStringLiteralNode::Accept(IAstNodeVisitor& visitor) const { - return visitor.VisitStringLiteral(*this); -} - -TFilterObjectNode::TFilterObjectNode(TPosition pos) - : TAstNode(pos) -{ -} - -void TFilterObjectNode::Accept(IAstNodeVisitor& visitor) const { - return visitor.VisitFilterObject(*this); -} - -TFilterPredicateNode::TFilterPredicateNode(TPosition pos, TAstNodePtr predicate, TAstNodePtr input) - : TAstNode(pos) - , Predicate(predicate) - , Input(input) -{ -} - -const TAstNodePtr TFilterPredicateNode::GetPredicate() const { - return Predicate; -} - -const TAstNodePtr TFilterPredicateNode::GetInput() const { - return Input; -} - -void TFilterPredicateNode::Accept(IAstNodeVisitor& visitor) const { - return visitor.VisitFilterPredicate(*this); -} - -TMethodCallNode::TMethodCallNode(TPosition pos, EMethodType type, TAstNodePtr input) - : TAstNode(pos) - , Type(type) - , Input(input) -{ -} - -EMethodType TMethodCallNode::GetType() const { - return Type; -} - -const TAstNodePtr TMethodCallNode::GetInput() const { - return Input; -} - -void TMethodCallNode::Accept(IAstNodeVisitor& visitor) const { - return visitor.VisitMethodCall(*this); -} - -TStartsWithPredicateNode::TStartsWithPredicateNode(TPosition pos, TAstNodePtr input, TAstNodePtr prefix) - : TAstNode(pos) - , Input(input) - , Prefix(prefix) -{ -} - -const TAstNodePtr TStartsWithPredicateNode::GetInput() const { - return Input; -} - -const TAstNodePtr TStartsWithPredicateNode::GetPrefix() const { - return Prefix; -} - -EReturnType TStartsWithPredicateNode::GetReturnType() const { - return EReturnType::Bool; -} - -void TStartsWithPredicateNode::Accept(IAstNodeVisitor& visitor) const { - return visitor.VisitStartsWithPredicate(*this); -} - -TExistsPredicateNode::TExistsPredicateNode(TPosition pos, TAstNodePtr input) - : TAstNode(pos) - , Input(input) -{ -} - -const TAstNodePtr TExistsPredicateNode::GetInput() const { - return Input; -} - -EReturnType TExistsPredicateNode::GetReturnType() const { - return EReturnType::Bool; -} - -void TExistsPredicateNode::Accept(IAstNodeVisitor& visitor) const { - return visitor.VisitExistsPredicate(*this); -} - -TIsUnknownPredicateNode::TIsUnknownPredicateNode(TPosition pos, TAstNodePtr input) - : TAstNode(pos) - , Input(input) -{ -} - -const TAstNodePtr TIsUnknownPredicateNode::GetInput() const { - return Input; -} - -EReturnType TIsUnknownPredicateNode::GetReturnType() const { - return EReturnType::Bool; -} - -void TIsUnknownPredicateNode::Accept(IAstNodeVisitor& visitor) const { - return visitor.VisitIsUnknownPredicate(*this); -} - -TLikeRegexPredicateNode::TLikeRegexPredicateNode(TPosition pos, TAstNodePtr input, NHyperscan::TDatabase&& regex) - : TAstNode(pos) - , Input(input) - , Regex(std::move(regex)) -{ -} - -const TAstNodePtr TLikeRegexPredicateNode::GetInput() const { - return Input; -} - -const NHyperscan::TDatabase& TLikeRegexPredicateNode::GetRegex() const { - return Regex; -} - -EReturnType TLikeRegexPredicateNode::GetReturnType() const { - return EReturnType::Bool; -} - -void TLikeRegexPredicateNode::Accept(IAstNodeVisitor& visitor) const { - return visitor.VisitLikeRegexPredicate(*this); -} - -}
\ No newline at end of file +#include "ast_nodes.h" + +namespace NYql::NJsonPath { + +TAstNode::TAstNode(TPosition pos) + : Pos(pos) +{ +} + +TPosition TAstNode::GetPos() const { + return Pos; +} + +EReturnType TAstNode::GetReturnType() const { + return EReturnType::Any; +} + +TRootNode::TRootNode(TPosition pos, TAstNodePtr expr, EJsonPathMode mode) + : TAstNode(pos) + , Expr(expr) + , Mode(mode) +{ +} + +const TAstNodePtr TRootNode::GetExpr() const { + return Expr; +} + +EJsonPathMode TRootNode::GetMode() const { + return Mode; +} + +void TRootNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitRoot(*this); +} + +EReturnType TRootNode::GetReturnType() const { + return Expr->GetReturnType(); +} + +TContextObjectNode::TContextObjectNode(TPosition pos) + : TAstNode(pos) +{ +} + +void TContextObjectNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitContextObject(*this); +} + +TVariableNode::TVariableNode(TPosition pos, const TString& name) + : TAstNode(pos) + , Name(name) +{ +} + +const TString& TVariableNode::GetName() const { + return Name; +} + +void TVariableNode::Accept(IAstNodeVisitor& visitor) const { + visitor.VisitVariable(*this); +} + +TLastArrayIndexNode::TLastArrayIndexNode(TPosition pos) + : TAstNode(pos) +{ +} + +void TLastArrayIndexNode::Accept(IAstNodeVisitor& visitor) const { + visitor.VisitLastArrayIndex(*this); +} + +TNumberLiteralNode::TNumberLiteralNode(TPosition pos, double value) + : TAstNode(pos) + , Value(value) +{ +} + +double TNumberLiteralNode::GetValue() const { + return Value; +} + +void TNumberLiteralNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitNumberLiteral(*this); +} + +TMemberAccessNode::TMemberAccessNode(TPosition pos, const TString& member, TAstNodePtr input) + : TAstNode(pos) + , Member(member) + , Input(input) +{ +} + +const TStringBuf TMemberAccessNode::GetMember() const { + return Member; +} + +const TAstNodePtr TMemberAccessNode::GetInput() const { + return Input; +} + +void TMemberAccessNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitMemberAccess(*this); +} + +TWildcardMemberAccessNode::TWildcardMemberAccessNode(TPosition pos, TAstNodePtr input) + : TAstNode(pos) + , Input(input) +{ +} + +const TAstNodePtr TWildcardMemberAccessNode::GetInput() const { + return Input; +} + +void TWildcardMemberAccessNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitWildcardMemberAccess(*this); +} + +TArrayAccessNode::TArrayAccessNode(TPosition pos, TVector<TSubscript> subscripts, TAstNodePtr input) + : TAstNode(pos) + , Subscripts(subscripts) + , Input(input) +{ +} + +const TVector<TArrayAccessNode::TSubscript>& TArrayAccessNode::GetSubscripts() const { + return Subscripts; +} + +const TAstNodePtr TArrayAccessNode::GetInput() const { + return Input; +} + +void TArrayAccessNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitArrayAccess(*this); +} + +TWildcardArrayAccessNode::TWildcardArrayAccessNode(TPosition pos, TAstNodePtr input) + : TAstNode(pos) + , Input(input) +{ +} + +const TAstNodePtr TWildcardArrayAccessNode::GetInput() const { + return Input; +} + +void TWildcardArrayAccessNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitWildcardArrayAccess(*this); +} + +TUnaryOperationNode::TUnaryOperationNode(TPosition pos, EUnaryOperation op, TAstNodePtr expr) + : TAstNode(pos) + , Operation(op) + , Expr(expr) +{ +} + +EUnaryOperation TUnaryOperationNode::GetOp() const { + return Operation; +} + +const TAstNodePtr TUnaryOperationNode::GetExpr() const { + return Expr; +} + +void TUnaryOperationNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitUnaryOperation(*this); +} + +EReturnType TUnaryOperationNode::GetReturnType() const { + return Operation == EUnaryOperation::Not ? EReturnType::Bool : EReturnType::Any; +} + +TBinaryOperationNode::TBinaryOperationNode(TPosition pos, EBinaryOperation op, TAstNodePtr leftExpr, TAstNodePtr rightExpr) + : TAstNode(pos) + , Operation(op) + , LeftExpr(leftExpr) + , RightExpr(rightExpr) +{ +} + +EBinaryOperation TBinaryOperationNode::GetOp() const { + return Operation; +} + +const TAstNodePtr TBinaryOperationNode::GetLeftExpr() const { + return LeftExpr; +} + +const TAstNodePtr TBinaryOperationNode::GetRightExpr() const { + return RightExpr; +} + +void TBinaryOperationNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitBinaryOperation(*this); +} + +EReturnType TBinaryOperationNode::GetReturnType() const { + switch (Operation) { + case EBinaryOperation::Less: + case EBinaryOperation::LessEqual: + case EBinaryOperation::Greater: + case EBinaryOperation::GreaterEqual: + case EBinaryOperation::Equal: + case EBinaryOperation::NotEqual: + case EBinaryOperation::And: + case EBinaryOperation::Or: + return EReturnType::Bool; + + default: + return EReturnType::Any; + } +} + +TBooleanLiteralNode::TBooleanLiteralNode(TPosition pos, bool value) + : TAstNode(pos) + , Value(value) +{ +} + +bool TBooleanLiteralNode::GetValue() const { + return Value; +} + +void TBooleanLiteralNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitBooleanLiteral(*this); +} + +TNullLiteralNode::TNullLiteralNode(TPosition pos) + : TAstNode(pos) +{ +} + +void TNullLiteralNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitNullLiteral(*this); +} + +TStringLiteralNode::TStringLiteralNode(TPosition pos, const TString& value) + : TAstNode(pos) + , Value(value) +{ +} + +const TString& TStringLiteralNode::GetValue() const { + return Value; +} + +void TStringLiteralNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitStringLiteral(*this); +} + +TFilterObjectNode::TFilterObjectNode(TPosition pos) + : TAstNode(pos) +{ +} + +void TFilterObjectNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitFilterObject(*this); +} + +TFilterPredicateNode::TFilterPredicateNode(TPosition pos, TAstNodePtr predicate, TAstNodePtr input) + : TAstNode(pos) + , Predicate(predicate) + , Input(input) +{ +} + +const TAstNodePtr TFilterPredicateNode::GetPredicate() const { + return Predicate; +} + +const TAstNodePtr TFilterPredicateNode::GetInput() const { + return Input; +} + +void TFilterPredicateNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitFilterPredicate(*this); +} + +TMethodCallNode::TMethodCallNode(TPosition pos, EMethodType type, TAstNodePtr input) + : TAstNode(pos) + , Type(type) + , Input(input) +{ +} + +EMethodType TMethodCallNode::GetType() const { + return Type; +} + +const TAstNodePtr TMethodCallNode::GetInput() const { + return Input; +} + +void TMethodCallNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitMethodCall(*this); +} + +TStartsWithPredicateNode::TStartsWithPredicateNode(TPosition pos, TAstNodePtr input, TAstNodePtr prefix) + : TAstNode(pos) + , Input(input) + , Prefix(prefix) +{ +} + +const TAstNodePtr TStartsWithPredicateNode::GetInput() const { + return Input; +} + +const TAstNodePtr TStartsWithPredicateNode::GetPrefix() const { + return Prefix; +} + +EReturnType TStartsWithPredicateNode::GetReturnType() const { + return EReturnType::Bool; +} + +void TStartsWithPredicateNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitStartsWithPredicate(*this); +} + +TExistsPredicateNode::TExistsPredicateNode(TPosition pos, TAstNodePtr input) + : TAstNode(pos) + , Input(input) +{ +} + +const TAstNodePtr TExistsPredicateNode::GetInput() const { + return Input; +} + +EReturnType TExistsPredicateNode::GetReturnType() const { + return EReturnType::Bool; +} + +void TExistsPredicateNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitExistsPredicate(*this); +} + +TIsUnknownPredicateNode::TIsUnknownPredicateNode(TPosition pos, TAstNodePtr input) + : TAstNode(pos) + , Input(input) +{ +} + +const TAstNodePtr TIsUnknownPredicateNode::GetInput() const { + return Input; +} + +EReturnType TIsUnknownPredicateNode::GetReturnType() const { + return EReturnType::Bool; +} + +void TIsUnknownPredicateNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitIsUnknownPredicate(*this); +} + +TLikeRegexPredicateNode::TLikeRegexPredicateNode(TPosition pos, TAstNodePtr input, NHyperscan::TDatabase&& regex) + : TAstNode(pos) + , Input(input) + , Regex(std::move(regex)) +{ +} + +const TAstNodePtr TLikeRegexPredicateNode::GetInput() const { + return Input; +} + +const NHyperscan::TDatabase& TLikeRegexPredicateNode::GetRegex() const { + return Regex; +} + +EReturnType TLikeRegexPredicateNode::GetReturnType() const { + return EReturnType::Bool; +} + +void TLikeRegexPredicateNode::Accept(IAstNodeVisitor& visitor) const { + return visitor.VisitLikeRegexPredicate(*this); +} + +}
\ No newline at end of file diff --git a/ydb/library/yql/minikql/jsonpath/ast_nodes.h b/ydb/library/yql/minikql/jsonpath/ast_nodes.h index f9129ef1dd..30da1e5416 100644 --- a/ydb/library/yql/minikql/jsonpath/ast_nodes.h +++ b/ydb/library/yql/minikql/jsonpath/ast_nodes.h @@ -1,401 +1,401 @@ -#pragma once - +#pragma once + #include <ydb/library/yql/public/issue/yql_issue.h> - + #include <library/cpp/json/json_value.h> -#include <library/cpp/regex/hyperscan/hyperscan.h> - -namespace NYql::NJsonPath { - -class TRootNode; -class TContextObjectNode; -class TVariableNode; -class TLastArrayIndexNode; -class TNumberLiteralNode; -class TAccessorExprNode; -class TMemberAccessNode; -class TWildcardMemberAccessNode; -class TArrayAccessNode; -class TWildcardArrayAccessNode; -class TUnaryOperationNode; -class TBinaryOperationNode; -class TBooleanLiteralNode; -class TNullLiteralNode; -class TStringLiteralNode; -class TFilterObjectNode; -class TFilterPredicateNode; -class TMethodCallNode; -class TStartsWithPredicateNode; -class TExistsPredicateNode; -class TIsUnknownPredicateNode; -class TLikeRegexPredicateNode; - -enum class EJsonPathMode { - Lax = 0, - Strict = 1, -}; - -class IAstNodeVisitor { -public: - virtual void VisitRoot(const TRootNode& node) = 0; - virtual void VisitContextObject(const TContextObjectNode& node) = 0; - virtual void VisitVariable(const TVariableNode& node) = 0; - virtual void VisitLastArrayIndex(const TLastArrayIndexNode& node) = 0; - virtual void VisitNumberLiteral(const TNumberLiteralNode& node) = 0; - virtual void VisitMemberAccess(const TMemberAccessNode& node) = 0; - virtual void VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) = 0; - virtual void VisitArrayAccess(const TArrayAccessNode& node) = 0; - virtual void VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) = 0; - virtual void VisitUnaryOperation(const TUnaryOperationNode& node) = 0; - virtual void VisitBinaryOperation(const TBinaryOperationNode& node) = 0; - virtual void VisitBooleanLiteral(const TBooleanLiteralNode& node) = 0; - virtual void VisitNullLiteral(const TNullLiteralNode& node) = 0; - virtual void VisitStringLiteral(const TStringLiteralNode& node) = 0; - virtual void VisitFilterObject(const TFilterObjectNode& node) = 0; - virtual void VisitFilterPredicate(const TFilterPredicateNode& node) = 0; - virtual void VisitMethodCall(const TMethodCallNode& node) = 0; - virtual void VisitStartsWithPredicate(const TStartsWithPredicateNode& node) = 0; - virtual void VisitExistsPredicate(const TExistsPredicateNode& node) = 0; - virtual void VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) = 0; - virtual void VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) = 0; - - virtual ~IAstNodeVisitor() = default; -}; - -enum class EReturnType { - Any = 0, - Bool = 1, -}; - -class TAstNode : public TSimpleRefCount<TAstNode> { -public: - explicit TAstNode(TPosition pos); - - TPosition GetPos() const; - - virtual void Accept(IAstNodeVisitor& visitor) const = 0; - - virtual EReturnType GetReturnType() const; - - virtual ~TAstNode() = default; - -private: - TPosition Pos; -}; - -using TAstNodePtr = TIntrusivePtr<TAstNode>; - -class TRootNode : public TAstNode { -public: - TRootNode(TPosition pos, TAstNodePtr expr, EJsonPathMode mode); - - const TAstNodePtr GetExpr() const; - - EJsonPathMode GetMode() const; - - void Accept(IAstNodeVisitor& visitor) const override; - - EReturnType GetReturnType() const override; - -private: - TAstNodePtr Expr; - EJsonPathMode Mode; -}; - -class TContextObjectNode : public TAstNode { -public: - explicit TContextObjectNode(TPosition pos); - - void Accept(IAstNodeVisitor& visitor) const override; -}; - -class TVariableNode : public TAstNode { -public: - TVariableNode(TPosition pos, const TString& name); - - const TString& GetName() const; - - void Accept(IAstNodeVisitor& visitor) const override; - -private: - TString Name; -}; - -class TLastArrayIndexNode : public TAstNode { -public: - explicit TLastArrayIndexNode(TPosition pos); - - void Accept(IAstNodeVisitor& visitor) const override; -}; - -class TNumberLiteralNode : public TAstNode { -public: - TNumberLiteralNode(TPosition pos, double value); - - double GetValue() const; - - void Accept(IAstNodeVisitor& visitor) const override; - -private: - double Value; -}; - -class TMemberAccessNode : public TAstNode { -public: - TMemberAccessNode(TPosition pos, const TString& member, TAstNodePtr input); - - const TStringBuf GetMember() const; - - const TAstNodePtr GetInput() const; - - void Accept(IAstNodeVisitor& visitor) const override; - -private: - TString Member; - TAstNodePtr Input; -}; - -class TWildcardMemberAccessNode : public TAstNode { -public: - TWildcardMemberAccessNode(TPosition pos, TAstNodePtr input); - - const TAstNodePtr GetInput() const; - - void Accept(IAstNodeVisitor& visitor) const override; - -private: - TAstNodePtr Input; -}; - -class TArrayAccessNode : public TAstNode { -public: - struct TSubscript { - TAstNodePtr From; - TAstNodePtr To; - }; - - TArrayAccessNode(TPosition pos, TVector<TSubscript> subscripts, TAstNodePtr input); - - const TVector<TSubscript>& GetSubscripts() const; - - const TAstNodePtr GetInput() const; - - void Accept(IAstNodeVisitor& visitor) const override; - -private: - TVector<TSubscript> Subscripts; - TAstNodePtr Input; -}; - -class TWildcardArrayAccessNode : public TAstNode { -public: - TWildcardArrayAccessNode(TPosition pos, TAstNodePtr input); - - const TAstNodePtr GetInput() const; - - void Accept(IAstNodeVisitor& visitor) const override; - -private: - TAstNodePtr Input; -}; - -enum class EUnaryOperation { - Plus = 0, - Minus = 1, - Not = 2, -}; - -class TUnaryOperationNode : public TAstNode { -public: - TUnaryOperationNode(TPosition pos, EUnaryOperation op, TAstNodePtr expr); - - EUnaryOperation GetOp() const; - - const TAstNodePtr GetExpr() const; - - void Accept(IAstNodeVisitor& visitor) const override; - - EReturnType GetReturnType() const override; - -private: - EUnaryOperation Operation; - TAstNodePtr Expr; -}; - -enum class EBinaryOperation { - Add = 0, - Substract = 1, - Multiply = 2, - Divide = 3, - Modulo = 4, - Less = 5, - LessEqual = 6, - Greater = 7, - GreaterEqual = 8, - Equal = 9, - NotEqual = 10, - And = 11, - Or = 12, -}; - -class TBinaryOperationNode : public TAstNode { -public: - TBinaryOperationNode(TPosition pos, EBinaryOperation op, TAstNodePtr leftExpr, TAstNodePtr rightExpr); - - EBinaryOperation GetOp() const; - - const TAstNodePtr GetLeftExpr() const; - - const TAstNodePtr GetRightExpr() const; - - void Accept(IAstNodeVisitor& visitor) const override; - - EReturnType GetReturnType() const override; - -private: - EBinaryOperation Operation; - TAstNodePtr LeftExpr; - TAstNodePtr RightExpr; -}; - -class TBooleanLiteralNode : public TAstNode { -public: - TBooleanLiteralNode(TPosition pos, bool value); - - bool GetValue() const; - - void Accept(IAstNodeVisitor& visitor) const override; - -private: - bool Value; -}; - -class TNullLiteralNode : public TAstNode { -public: - explicit TNullLiteralNode(TPosition pos); - - void Accept(IAstNodeVisitor& visitor) const override; -}; - -class TStringLiteralNode : public TAstNode { -public: - TStringLiteralNode(TPosition pos, const TString& value); - - const TString& GetValue() const; - - void Accept(IAstNodeVisitor& visitor) const override; - -private: - TString Value; -}; - -class TFilterObjectNode : public TAstNode { -public: - explicit TFilterObjectNode(TPosition pos); - - void Accept(IAstNodeVisitor& visitor) const override; -}; - -class TFilterPredicateNode : public TAstNode { -public: - TFilterPredicateNode(TPosition pos, TAstNodePtr predicate, TAstNodePtr input); - - const TAstNodePtr GetPredicate() const; - - const TAstNodePtr GetInput() const; - - void Accept(IAstNodeVisitor& visitor) const override; - -private: - TAstNodePtr Predicate; - TAstNodePtr Input; -}; - -enum class EMethodType { - Abs = 0, - Floor = 1, - Ceiling = 2, - Double = 3, - Type = 4, - Size = 5, - KeyValue = 6, -}; - -class TMethodCallNode : public TAstNode { -public: - TMethodCallNode(TPosition pos, EMethodType type, TAstNodePtr input); - - EMethodType GetType() const; - - const TAstNodePtr GetInput() const; - - void Accept(IAstNodeVisitor& visitor) const override; - -private: - EMethodType Type; - TAstNodePtr Input; -}; - -class TStartsWithPredicateNode : public TAstNode { -public: - TStartsWithPredicateNode(TPosition pos, TAstNodePtr input, TAstNodePtr prefix); - - const TAstNodePtr GetInput() const; - - const TAstNodePtr GetPrefix() const; - - EReturnType GetReturnType() const override; - - void Accept(IAstNodeVisitor& visitor) const override; - -private: - TAstNodePtr Input; - TAstNodePtr Prefix; -}; - -class TExistsPredicateNode : public TAstNode { -public: - TExistsPredicateNode(TPosition pos, TAstNodePtr input); - - const TAstNodePtr GetInput() const; - - EReturnType GetReturnType() const override; - - void Accept(IAstNodeVisitor& visitor) const override; - -private: - TAstNodePtr Input; -}; - -class TIsUnknownPredicateNode : public TAstNode { -public: - TIsUnknownPredicateNode(TPosition pos, TAstNodePtr input); - - const TAstNodePtr GetInput() const; - - EReturnType GetReturnType() const override; - - void Accept(IAstNodeVisitor& visitor) const override; - -private: - TAstNodePtr Input; -}; - -class TLikeRegexPredicateNode : public TAstNode { -public: - TLikeRegexPredicateNode(TPosition pos, TAstNodePtr input, NHyperscan::TDatabase&& regex); - - const TAstNodePtr GetInput() const; - - const NHyperscan::TDatabase& GetRegex() const; - - EReturnType GetReturnType() const override; - - void Accept(IAstNodeVisitor& visitor) const override; - -private: - TAstNodePtr Input; - NHyperscan::TDatabase Regex; -}; - -} +#include <library/cpp/regex/hyperscan/hyperscan.h> + +namespace NYql::NJsonPath { + +class TRootNode; +class TContextObjectNode; +class TVariableNode; +class TLastArrayIndexNode; +class TNumberLiteralNode; +class TAccessorExprNode; +class TMemberAccessNode; +class TWildcardMemberAccessNode; +class TArrayAccessNode; +class TWildcardArrayAccessNode; +class TUnaryOperationNode; +class TBinaryOperationNode; +class TBooleanLiteralNode; +class TNullLiteralNode; +class TStringLiteralNode; +class TFilterObjectNode; +class TFilterPredicateNode; +class TMethodCallNode; +class TStartsWithPredicateNode; +class TExistsPredicateNode; +class TIsUnknownPredicateNode; +class TLikeRegexPredicateNode; + +enum class EJsonPathMode { + Lax = 0, + Strict = 1, +}; + +class IAstNodeVisitor { +public: + virtual void VisitRoot(const TRootNode& node) = 0; + virtual void VisitContextObject(const TContextObjectNode& node) = 0; + virtual void VisitVariable(const TVariableNode& node) = 0; + virtual void VisitLastArrayIndex(const TLastArrayIndexNode& node) = 0; + virtual void VisitNumberLiteral(const TNumberLiteralNode& node) = 0; + virtual void VisitMemberAccess(const TMemberAccessNode& node) = 0; + virtual void VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) = 0; + virtual void VisitArrayAccess(const TArrayAccessNode& node) = 0; + virtual void VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) = 0; + virtual void VisitUnaryOperation(const TUnaryOperationNode& node) = 0; + virtual void VisitBinaryOperation(const TBinaryOperationNode& node) = 0; + virtual void VisitBooleanLiteral(const TBooleanLiteralNode& node) = 0; + virtual void VisitNullLiteral(const TNullLiteralNode& node) = 0; + virtual void VisitStringLiteral(const TStringLiteralNode& node) = 0; + virtual void VisitFilterObject(const TFilterObjectNode& node) = 0; + virtual void VisitFilterPredicate(const TFilterPredicateNode& node) = 0; + virtual void VisitMethodCall(const TMethodCallNode& node) = 0; + virtual void VisitStartsWithPredicate(const TStartsWithPredicateNode& node) = 0; + virtual void VisitExistsPredicate(const TExistsPredicateNode& node) = 0; + virtual void VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) = 0; + virtual void VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) = 0; + + virtual ~IAstNodeVisitor() = default; +}; + +enum class EReturnType { + Any = 0, + Bool = 1, +}; + +class TAstNode : public TSimpleRefCount<TAstNode> { +public: + explicit TAstNode(TPosition pos); + + TPosition GetPos() const; + + virtual void Accept(IAstNodeVisitor& visitor) const = 0; + + virtual EReturnType GetReturnType() const; + + virtual ~TAstNode() = default; + +private: + TPosition Pos; +}; + +using TAstNodePtr = TIntrusivePtr<TAstNode>; + +class TRootNode : public TAstNode { +public: + TRootNode(TPosition pos, TAstNodePtr expr, EJsonPathMode mode); + + const TAstNodePtr GetExpr() const; + + EJsonPathMode GetMode() const; + + void Accept(IAstNodeVisitor& visitor) const override; + + EReturnType GetReturnType() const override; + +private: + TAstNodePtr Expr; + EJsonPathMode Mode; +}; + +class TContextObjectNode : public TAstNode { +public: + explicit TContextObjectNode(TPosition pos); + + void Accept(IAstNodeVisitor& visitor) const override; +}; + +class TVariableNode : public TAstNode { +public: + TVariableNode(TPosition pos, const TString& name); + + const TString& GetName() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TString Name; +}; + +class TLastArrayIndexNode : public TAstNode { +public: + explicit TLastArrayIndexNode(TPosition pos); + + void Accept(IAstNodeVisitor& visitor) const override; +}; + +class TNumberLiteralNode : public TAstNode { +public: + TNumberLiteralNode(TPosition pos, double value); + + double GetValue() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + double Value; +}; + +class TMemberAccessNode : public TAstNode { +public: + TMemberAccessNode(TPosition pos, const TString& member, TAstNodePtr input); + + const TStringBuf GetMember() const; + + const TAstNodePtr GetInput() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TString Member; + TAstNodePtr Input; +}; + +class TWildcardMemberAccessNode : public TAstNode { +public: + TWildcardMemberAccessNode(TPosition pos, TAstNodePtr input); + + const TAstNodePtr GetInput() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TAstNodePtr Input; +}; + +class TArrayAccessNode : public TAstNode { +public: + struct TSubscript { + TAstNodePtr From; + TAstNodePtr To; + }; + + TArrayAccessNode(TPosition pos, TVector<TSubscript> subscripts, TAstNodePtr input); + + const TVector<TSubscript>& GetSubscripts() const; + + const TAstNodePtr GetInput() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TVector<TSubscript> Subscripts; + TAstNodePtr Input; +}; + +class TWildcardArrayAccessNode : public TAstNode { +public: + TWildcardArrayAccessNode(TPosition pos, TAstNodePtr input); + + const TAstNodePtr GetInput() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TAstNodePtr Input; +}; + +enum class EUnaryOperation { + Plus = 0, + Minus = 1, + Not = 2, +}; + +class TUnaryOperationNode : public TAstNode { +public: + TUnaryOperationNode(TPosition pos, EUnaryOperation op, TAstNodePtr expr); + + EUnaryOperation GetOp() const; + + const TAstNodePtr GetExpr() const; + + void Accept(IAstNodeVisitor& visitor) const override; + + EReturnType GetReturnType() const override; + +private: + EUnaryOperation Operation; + TAstNodePtr Expr; +}; + +enum class EBinaryOperation { + Add = 0, + Substract = 1, + Multiply = 2, + Divide = 3, + Modulo = 4, + Less = 5, + LessEqual = 6, + Greater = 7, + GreaterEqual = 8, + Equal = 9, + NotEqual = 10, + And = 11, + Or = 12, +}; + +class TBinaryOperationNode : public TAstNode { +public: + TBinaryOperationNode(TPosition pos, EBinaryOperation op, TAstNodePtr leftExpr, TAstNodePtr rightExpr); + + EBinaryOperation GetOp() const; + + const TAstNodePtr GetLeftExpr() const; + + const TAstNodePtr GetRightExpr() const; + + void Accept(IAstNodeVisitor& visitor) const override; + + EReturnType GetReturnType() const override; + +private: + EBinaryOperation Operation; + TAstNodePtr LeftExpr; + TAstNodePtr RightExpr; +}; + +class TBooleanLiteralNode : public TAstNode { +public: + TBooleanLiteralNode(TPosition pos, bool value); + + bool GetValue() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + bool Value; +}; + +class TNullLiteralNode : public TAstNode { +public: + explicit TNullLiteralNode(TPosition pos); + + void Accept(IAstNodeVisitor& visitor) const override; +}; + +class TStringLiteralNode : public TAstNode { +public: + TStringLiteralNode(TPosition pos, const TString& value); + + const TString& GetValue() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TString Value; +}; + +class TFilterObjectNode : public TAstNode { +public: + explicit TFilterObjectNode(TPosition pos); + + void Accept(IAstNodeVisitor& visitor) const override; +}; + +class TFilterPredicateNode : public TAstNode { +public: + TFilterPredicateNode(TPosition pos, TAstNodePtr predicate, TAstNodePtr input); + + const TAstNodePtr GetPredicate() const; + + const TAstNodePtr GetInput() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TAstNodePtr Predicate; + TAstNodePtr Input; +}; + +enum class EMethodType { + Abs = 0, + Floor = 1, + Ceiling = 2, + Double = 3, + Type = 4, + Size = 5, + KeyValue = 6, +}; + +class TMethodCallNode : public TAstNode { +public: + TMethodCallNode(TPosition pos, EMethodType type, TAstNodePtr input); + + EMethodType GetType() const; + + const TAstNodePtr GetInput() const; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + EMethodType Type; + TAstNodePtr Input; +}; + +class TStartsWithPredicateNode : public TAstNode { +public: + TStartsWithPredicateNode(TPosition pos, TAstNodePtr input, TAstNodePtr prefix); + + const TAstNodePtr GetInput() const; + + const TAstNodePtr GetPrefix() const; + + EReturnType GetReturnType() const override; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TAstNodePtr Input; + TAstNodePtr Prefix; +}; + +class TExistsPredicateNode : public TAstNode { +public: + TExistsPredicateNode(TPosition pos, TAstNodePtr input); + + const TAstNodePtr GetInput() const; + + EReturnType GetReturnType() const override; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TAstNodePtr Input; +}; + +class TIsUnknownPredicateNode : public TAstNode { +public: + TIsUnknownPredicateNode(TPosition pos, TAstNodePtr input); + + const TAstNodePtr GetInput() const; + + EReturnType GetReturnType() const override; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TAstNodePtr Input; +}; + +class TLikeRegexPredicateNode : public TAstNode { +public: + TLikeRegexPredicateNode(TPosition pos, TAstNodePtr input, NHyperscan::TDatabase&& regex); + + const TAstNodePtr GetInput() const; + + const NHyperscan::TDatabase& GetRegex() const; + + EReturnType GetReturnType() const override; + + void Accept(IAstNodeVisitor& visitor) const override; + +private: + TAstNodePtr Input; + NHyperscan::TDatabase Regex; +}; + +} diff --git a/ydb/library/yql/minikql/jsonpath/benchmark/main.cpp b/ydb/library/yql/minikql/jsonpath/benchmark/main.cpp index 742a9e18b4..9ac8b0b979 100644 --- a/ydb/library/yql/minikql/jsonpath/benchmark/main.cpp +++ b/ydb/library/yql/minikql/jsonpath/benchmark/main.cpp @@ -1,6 +1,6 @@ #include <ydb/library/yql/minikql/dom/json.h> #include <ydb/library/yql/minikql/jsonpath/jsonpath.h> - + #include <ydb/library/yql/minikql/computation/mkql_value_builder.h> #include <ydb/library/yql/minikql/computation/mkql_computation_node_holders.h> #include <ydb/library/yql/minikql/invoke_builtins/mkql_builtins.h> @@ -8,75 +8,75 @@ #include <ydb/library/yql/minikql/mkql_function_registry.h> #include <ydb/library/yql/minikql/mkql_alloc.h> #include <ydb/library/yql/minikql/mkql_node.h> - + #include <library/cpp/json/json_value.h> #include <library/cpp/testing/benchmark/bench.h> - -#include <util/random/fast.h> - -using namespace NJson; - -using namespace NYql; -using namespace NYql::NDom; -using namespace NYql::NUdf; -using namespace NYql::NJsonPath; -using namespace NJson; -using namespace NKikimr::NMiniKQL; - -TString RandomString(ui32 min, ui32 max) { - static TReallyFastRng32 rand(0); - TString result; - const ui32 length = rand.Uniform(min, max + 1); - result.reserve(length); - for (ui32 i = 0; i < length; ++i) { - result.push_back(char(rand.Uniform('a', 'z' + 1))); - } - return result; -} - -TString RandomString(ui32 length) { - return RandomString(length, length); -} - -TString GenerateRandomJson() { - TJsonMap result; - TJsonMap id; - id.InsertValue("id", TJsonValue(RandomString(24))); - id.InsertValue("issueId", TJsonValue(RandomString(24))); - result.InsertValue("_id", std::move(id)); - result.InsertValue("@class", TJsonValue(RandomString(60))); - result.InsertValue("author", TJsonValue(RandomString(10))); - result.InsertValue("transitionId", TJsonValue(RandomString(24))); - TJsonArray comments; - for (ui32 i = 0; i < 30; i++) { - TJsonMap comment; - comment.InsertValue("id", TJsonValue(RandomString(24))); - comment.InsertValue("newText", TJsonValue(RandomString(150))); - comments.AppendValue(std::move(comment)); - } - TJsonMap changes; - changes.InsertValue("comment", std::move(comments)); - result.InsertValue("changes", std::move(changes)); - return result.GetStringRobust(); -} - -const size_t MAX_PARSE_ERRORS = 100; - -Y_CPU_BENCHMARK(JsonPath, iface) { - TIntrusivePtr<IFunctionRegistry> FunctionRegistry(CreateFunctionRegistry(CreateBuiltinRegistry())); - TScopedAlloc Alloc; - TTypeEnvironment Env(Alloc); - TMemoryUsageInfo MemInfo("Memory"); - THolderFactory HolderFactory(Alloc.Ref(), MemInfo, FunctionRegistry.Get()); - TDefaultValueBuilder ValueBuilder(HolderFactory); - - const TString json = GenerateRandomJson(); - const TUnboxedValue dom = TryParseJsonDom(json, &ValueBuilder); - - for (size_t i = 0; i < iface.Iterations(); i++) { - TIssues issues; - const auto jsonPath = ParseJsonPath("$.'_id'.issueId", issues, MAX_PARSE_ERRORS); - const auto result = ExecuteJsonPath(jsonPath, TValue(dom), TVariablesMap(), &ValueBuilder); - Y_VERIFY(!result.IsError()); - } + +#include <util/random/fast.h> + +using namespace NJson; + +using namespace NYql; +using namespace NYql::NDom; +using namespace NYql::NUdf; +using namespace NYql::NJsonPath; +using namespace NJson; +using namespace NKikimr::NMiniKQL; + +TString RandomString(ui32 min, ui32 max) { + static TReallyFastRng32 rand(0); + TString result; + const ui32 length = rand.Uniform(min, max + 1); + result.reserve(length); + for (ui32 i = 0; i < length; ++i) { + result.push_back(char(rand.Uniform('a', 'z' + 1))); + } + return result; +} + +TString RandomString(ui32 length) { + return RandomString(length, length); +} + +TString GenerateRandomJson() { + TJsonMap result; + TJsonMap id; + id.InsertValue("id", TJsonValue(RandomString(24))); + id.InsertValue("issueId", TJsonValue(RandomString(24))); + result.InsertValue("_id", std::move(id)); + result.InsertValue("@class", TJsonValue(RandomString(60))); + result.InsertValue("author", TJsonValue(RandomString(10))); + result.InsertValue("transitionId", TJsonValue(RandomString(24))); + TJsonArray comments; + for (ui32 i = 0; i < 30; i++) { + TJsonMap comment; + comment.InsertValue("id", TJsonValue(RandomString(24))); + comment.InsertValue("newText", TJsonValue(RandomString(150))); + comments.AppendValue(std::move(comment)); + } + TJsonMap changes; + changes.InsertValue("comment", std::move(comments)); + result.InsertValue("changes", std::move(changes)); + return result.GetStringRobust(); +} + +const size_t MAX_PARSE_ERRORS = 100; + +Y_CPU_BENCHMARK(JsonPath, iface) { + TIntrusivePtr<IFunctionRegistry> FunctionRegistry(CreateFunctionRegistry(CreateBuiltinRegistry())); + TScopedAlloc Alloc; + TTypeEnvironment Env(Alloc); + TMemoryUsageInfo MemInfo("Memory"); + THolderFactory HolderFactory(Alloc.Ref(), MemInfo, FunctionRegistry.Get()); + TDefaultValueBuilder ValueBuilder(HolderFactory); + + const TString json = GenerateRandomJson(); + const TUnboxedValue dom = TryParseJsonDom(json, &ValueBuilder); + + for (size_t i = 0; i < iface.Iterations(); i++) { + TIssues issues; + const auto jsonPath = ParseJsonPath("$.'_id'.issueId", issues, MAX_PARSE_ERRORS); + const auto result = ExecuteJsonPath(jsonPath, TValue(dom), TVariablesMap(), &ValueBuilder); + Y_VERIFY(!result.IsError()); + } } diff --git a/ydb/library/yql/minikql/jsonpath/benchmark/ya.make b/ydb/library/yql/minikql/jsonpath/benchmark/ya.make index 461937db13..fcdd289b55 100644 --- a/ydb/library/yql/minikql/jsonpath/benchmark/ya.make +++ b/ydb/library/yql/minikql/jsonpath/benchmark/ya.make @@ -1,11 +1,11 @@ Y_BENCHMARK() - + OWNER( g:kikimr g:yql ) - -PEERDIR( + +PEERDIR( library/cpp/json ydb/library/yql/minikql ydb/library/yql/minikql/computation @@ -14,12 +14,12 @@ PEERDIR( ydb/library/yql/minikql/jsonpath ydb/library/yql/public/issue ydb/library/yql/public/udf/service/exception_policy -) - +) + YQL_LAST_ABI_VERSION() -SRCS( - main.cpp -) - +SRCS( + main.cpp +) + END() diff --git a/ydb/library/yql/minikql/jsonpath/binary.cpp b/ydb/library/yql/minikql/jsonpath/binary.cpp index 1d159599b8..dcf6791408 100644 --- a/ydb/library/yql/minikql/jsonpath/binary.cpp +++ b/ydb/library/yql/minikql/jsonpath/binary.cpp @@ -1,605 +1,605 @@ -#include "binary.h" - +#include "binary.h" + #include <ydb/library/yql/utils/yql_panic.h> - -namespace NYql::NJsonPath { - -bool TArraySubscriptOffsets::IsRange() const { - return ToOffset > 0; -} - -const TStringBuf TJsonPathItem::GetString() const { - return std::get<TStringBuf>(Data); -} - -const TVector<TArraySubscriptOffsets>& TJsonPathItem::GetSubscripts() const { - return std::get<TVector<TArraySubscriptOffsets>>(Data); -} - -const TBinaryOpArgumentsOffset& TJsonPathItem::GetBinaryOpArguments() const { - return std::get<TBinaryOpArgumentsOffset>(Data); -} - -double TJsonPathItem::GetNumber() const { - return std::get<double>(Data); -} - -bool TJsonPathItem::GetBoolean() const { - return std::get<bool>(Data); -} - -TFilterPredicateOffset TJsonPathItem::GetFilterPredicateOffset() const { - return std::get<TFilterPredicateOffset>(Data); -} - -TStartsWithPrefixOffset TJsonPathItem::GetStartsWithPrefixOffset() const { - return std::get<TStartsWithPrefixOffset>(Data); -} - -const THyperscanRegex& TJsonPathItem::GetRegex() const { - return std::get<THyperscanRegex>(Data); -} - -TJsonPathReader::TJsonPathReader(const TJsonPathPtr path) - : Path(path) - , InitialPos(0) - , Mode(ReadMode(InitialPos)) -{ -} - -const TJsonPathItem& TJsonPathReader::ReadFirst() { - return ReadFromPos(InitialPos); -} - -const TJsonPathItem& TJsonPathReader::ReadInput(const TJsonPathItem& item) { - YQL_ENSURE(item.InputItemOffset.Defined()); - return ReadFromPos(*item.InputItemOffset); -} - -const TJsonPathItem& TJsonPathReader::ReadFromSubscript(const TArraySubscriptOffsets& subscript) { - return ReadFromPos(subscript.FromOffset); -} - -const TJsonPathItem& TJsonPathReader::ReadToSubscript(const TArraySubscriptOffsets& subscript) { - YQL_ENSURE(subscript.IsRange()); - return ReadFromPos(subscript.ToOffset); -} - -const TJsonPathItem& TJsonPathReader::ReadLeftOperand(const TJsonPathItem& node) { - return ReadFromPos(node.GetBinaryOpArguments().LeftOffset); -} - -const TJsonPathItem& TJsonPathReader::ReadRightOperand(const TJsonPathItem& node) { - return ReadFromPos(node.GetBinaryOpArguments().RightOffset); -} - -const TJsonPathItem& TJsonPathReader::ReadFilterPredicate(const TJsonPathItem& node) { - return ReadFromPos(node.GetFilterPredicateOffset().Offset); -} - -const TJsonPathItem& TJsonPathReader::ReadPrefix(const TJsonPathItem& node) { - return ReadFromPos(node.GetStartsWithPrefixOffset().Offset); -} - -EJsonPathMode TJsonPathReader::GetMode() const { - return Mode; -} - -const TJsonPathItem& TJsonPathReader::ReadFromPos(TUint pos) { - YQL_ENSURE(pos < Path->Size()); - - const auto it = ItemCache.find(pos); - if (it != ItemCache.end()) { - return it->second; - } - - TJsonPathItem& result = ItemCache[pos]; - result.Type = ReadType(pos); - - const auto row = ReadUint(pos); - const auto column = ReadUint(pos); - result.Pos = TPosition(column, row, "jsonpath"); - - switch (result.Type) { - // Items without input - case EJsonPathItemType::FilterObject: - case EJsonPathItemType::NullLiteral: - case EJsonPathItemType::ContextObject: - case EJsonPathItemType::LastArrayIndex: - break; - - case EJsonPathItemType::Variable: - case EJsonPathItemType::StringLiteral: - result.Data = ReadString(pos); - break; - - case EJsonPathItemType::NumberLiteral: - result.Data = ReadDouble(pos); - break; - - case EJsonPathItemType::BooleanLiteral: - result.Data = ReadBool(pos); - break; - - // Items with single input - case EJsonPathItemType::TypeMethod: - case EJsonPathItemType::SizeMethod: - case EJsonPathItemType::KeyValueMethod: - case EJsonPathItemType::AbsMethod: - case EJsonPathItemType::FloorMethod: - case EJsonPathItemType::CeilingMethod: - case EJsonPathItemType::DoubleMethod: - case EJsonPathItemType::WildcardArrayAccess: - case EJsonPathItemType::WildcardMemberAccess: - case EJsonPathItemType::UnaryMinus: - case EJsonPathItemType::UnaryPlus: - case EJsonPathItemType::UnaryNot: - case EJsonPathItemType::IsUnknownPredicate: - case EJsonPathItemType::ExistsPredicate: - result.InputItemOffset = ReadUint(pos); - break; - - case EJsonPathItemType::MemberAccess: - result.Data = ReadString(pos); - result.InputItemOffset = ReadUint(pos); - break; - - case EJsonPathItemType::ArrayAccess: - result.Data = ReadSubscripts(pos); - result.InputItemOffset = ReadUint(pos); - break; - - case EJsonPathItemType::FilterPredicate: - result.Data = TFilterPredicateOffset{ReadUint(pos)}; - result.InputItemOffset = ReadUint(pos); - break; - - case EJsonPathItemType::StartsWithPredicate: - result.Data = TStartsWithPrefixOffset{ReadUint(pos)}; - result.InputItemOffset = ReadUint(pos); - break; - - case EJsonPathItemType::LikeRegexPredicate: { - const auto serializedRegex = ReadString(pos); - THyperscanRegex regex; - regex.Regex = NHyperscan::Deserialize(serializedRegex); - regex.Scratch = NHyperscan::MakeScratch(regex.Regex); - result.Data = std::move(regex); - result.InputItemOffset = ReadUint(pos); - break; - } - - // Items with 2 inputs - case EJsonPathItemType::BinaryAdd: - case EJsonPathItemType::BinarySubstract: - case EJsonPathItemType::BinaryMultiply: - case EJsonPathItemType::BinaryDivide: - case EJsonPathItemType::BinaryModulo: - case EJsonPathItemType::BinaryLess: - case EJsonPathItemType::BinaryLessEqual: - case EJsonPathItemType::BinaryGreater: - case EJsonPathItemType::BinaryGreaterEqual: - case EJsonPathItemType::BinaryEqual: - case EJsonPathItemType::BinaryNotEqual: - case EJsonPathItemType::BinaryAnd: - case EJsonPathItemType::BinaryOr: - TBinaryOpArgumentsOffset data; - data.LeftOffset = ReadUint(pos); - data.RightOffset = ReadUint(pos); - result.Data = data; - break; - } - - return result; -} - -TUint TJsonPathReader::ReadUint(TUint& pos) { - return ReadPOD<TUint>(pos); -} - -double TJsonPathReader::ReadDouble(TUint& pos) { - return ReadPOD<double>(pos); -} - -bool TJsonPathReader::ReadBool(TUint& pos) { - return ReadPOD<bool>(pos); -} - -EJsonPathItemType TJsonPathReader::ReadType(TUint& pos) { - return static_cast<EJsonPathItemType>(ReadUint(pos)); -} - -EJsonPathMode TJsonPathReader::ReadMode(TUint& pos) { - return static_cast<EJsonPathMode>(ReadUint(pos)); -} - -const TStringBuf TJsonPathReader::ReadString(TUint& pos) { - TUint length = ReadUint(pos); - TStringBuf result(Path->Begin() + pos, length); - pos += length; - return result; -} - -TVector<TArraySubscriptOffsets> TJsonPathReader::ReadSubscripts(TUint& pos) { - const auto count = ReadUint(pos); - TVector<TArraySubscriptOffsets> result(count); - - for (size_t i = 0; i < count; i++) { - result[i].FromOffset = ReadUint(pos); - result[i].ToOffset = ReadUint(pos); - } - return result; -} - -void TJsonPathBuilder::VisitRoot(const TRootNode& node) { - // Block structure: - // <(1) TUint> - // Components: - // (1) Must be casted to EJsonPathMode. Jsonpath execution mode - WriteMode(node.GetMode()); - node.GetExpr()->Accept(*this); -} - -void TJsonPathBuilder::VisitContextObject(const TContextObjectNode& node) { - WriteZeroInputItem(EJsonPathItemType::ContextObject, node); -} - -void TJsonPathBuilder::VisitVariable(const TVariableNode& node) { - WriteZeroInputItem(EJsonPathItemType::Variable, node); - WriteString(node.GetName()); -} - -void TJsonPathBuilder::VisitLastArrayIndex(const TLastArrayIndexNode& node) { - WriteZeroInputItem(EJsonPathItemType::LastArrayIndex, node); -} - -void TJsonPathBuilder::VisitNumberLiteral(const TNumberLiteralNode& node) { - WriteZeroInputItem(EJsonPathItemType::NumberLiteral, node); - WriteDouble(node.GetValue()); -} - -void TJsonPathBuilder::VisitMemberAccess(const TMemberAccessNode& node) { - // Block structure: - // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) char[]> <(6) TUint> - // Components: - // (1) Must be casted to EJsonPathItemType. Member access item type - // (2) Row of the position in the source jsonpath - // (3) Column of the position in the source jsonpath - // (4) Length of member name string - // (5) Member name string - // (6) Offset of the input item - WriteType(EJsonPathItemType::MemberAccess); - WritePos(node); - WriteString(node.GetMember()); - - WriteNextPosition(); - node.GetInput()->Accept(*this); -} - -void TJsonPathBuilder::VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) { - WriteSingleInputItem(EJsonPathItemType::WildcardMemberAccess, node, node.GetInput()); -} - -void TJsonPathBuilder::VisitArrayAccess(const TArrayAccessNode& node) { - // Block structure: - // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) pair<TUint, TUint>[]> <(6) TUint> <(7) items> - // Components: - // (1) Must be casted to EJsonPathItemType. Array access item type - // (2) Row of the position in the source jsonpath - // (3) Column of the position in the source jsonpath - // (4) Count of subscripts stored - // (5) Array of pairs with offsets to subscript items. If subscript is a single index, only first element - // is set to it's offset and second is zero. If subscript is a range, both pair elements are valid offsets - // to the elements of range (lower and upper bound). - // (6) Offset of the input item - // (7) Array of subcsripts. For details about encoding see VisitArraySubscript - WriteType(EJsonPathItemType::ArrayAccess); - WritePos(node); - - // (4) Write count of subscripts stored - const auto& subscripts = node.GetSubscripts(); - const auto count = subscripts.size(); - WriteUint(count); - - // (5) We do not know sizes of each subscript. Write array of zeros for offsets - const auto indexStart = CurrentEndPos(); - TVector<TUint> offsets(2 * count); - WriteUintSequence(offsets); - - // (6) Reserve space for input offset to rewrite it later - const auto inputStart = CurrentEndPos(); - WriteFinishPosition(); - - // (7) Write all subscripts and record offset for each of them - for (size_t i = 0; i < count; i++) { - offsets[2 * i] = CurrentEndPos(); - subscripts[i].From->Accept(*this); - - if (subscripts[i].To) { - offsets[2 * i + 1] = CurrentEndPos(); - subscripts[i].To->Accept(*this); - } - } - - // (5) Rewrite offsets with correct values - RewriteUintSequence(offsets, indexStart); - - // (6) Rewrite input offset - RewriteUint(CurrentEndPos(), inputStart); - node.GetInput()->Accept(*this); -} - -void TJsonPathBuilder::VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) { - WriteSingleInputItem(EJsonPathItemType::WildcardArrayAccess, node, node.GetInput()); -} - -void TJsonPathBuilder::VisitUnaryOperation(const TUnaryOperationNode& node) { - EJsonPathItemType type; - switch (node.GetOp()) { - case EUnaryOperation::Plus: - type = EJsonPathItemType::UnaryPlus; - break; - case EUnaryOperation::Minus: - type = EJsonPathItemType::UnaryMinus; - break; - case EUnaryOperation::Not: - type = EJsonPathItemType::UnaryNot; - break; - } - - WriteSingleInputItem(type, node, node.GetExpr()); -} - -void TJsonPathBuilder::VisitBinaryOperation(const TBinaryOperationNode& node) { - EJsonPathItemType type; - switch (node.GetOp()) { - case EBinaryOperation::Add: - type = EJsonPathItemType::BinaryAdd; - break; - case EBinaryOperation::Substract: - type = EJsonPathItemType::BinarySubstract; - break; - case EBinaryOperation::Multiply: - type = EJsonPathItemType::BinaryMultiply; - break; - case EBinaryOperation::Divide: - type = EJsonPathItemType::BinaryDivide; - break; - case EBinaryOperation::Modulo: - type = EJsonPathItemType::BinaryModulo; - break; - case EBinaryOperation::Less: - type = EJsonPathItemType::BinaryLess; - break; - case EBinaryOperation::LessEqual: - type = EJsonPathItemType::BinaryLessEqual; - break; - case EBinaryOperation::Greater: - type = EJsonPathItemType::BinaryGreater; - break; - case EBinaryOperation::GreaterEqual: - type = EJsonPathItemType::BinaryGreaterEqual; - break; - case EBinaryOperation::Equal: - type = EJsonPathItemType::BinaryEqual; - break; - case EBinaryOperation::NotEqual: - type = EJsonPathItemType::BinaryNotEqual; - break; - case EBinaryOperation::And: - type = EJsonPathItemType::BinaryAnd; - break; - case EBinaryOperation::Or: - type = EJsonPathItemType::BinaryOr; - break; - } - - WriteTwoInputsItem(type, node, node.GetLeftExpr(), node.GetRightExpr()); -} - -void TJsonPathBuilder::VisitBooleanLiteral(const TBooleanLiteralNode& node) { - WriteZeroInputItem(EJsonPathItemType::BooleanLiteral, node); - WriteBool(node.GetValue()); -} - -void TJsonPathBuilder::VisitNullLiteral(const TNullLiteralNode& node) { - WriteZeroInputItem(EJsonPathItemType::NullLiteral, node); -} - -void TJsonPathBuilder::VisitStringLiteral(const TStringLiteralNode& node) { - WriteZeroInputItem(EJsonPathItemType::StringLiteral, node); - WriteString(node.GetValue()); -} - -void TJsonPathBuilder::VisitFilterObject(const TFilterObjectNode& node) { - WriteZeroInputItem(EJsonPathItemType::FilterObject, node); -} - -void TJsonPathBuilder::VisitFilterPredicate(const TFilterPredicateNode& node) { - WriteTwoInputsItem(EJsonPathItemType::FilterPredicate, node, node.GetPredicate(), node.GetInput()); -} - -void TJsonPathBuilder::VisitMethodCall(const TMethodCallNode& node) { - EJsonPathItemType type; - switch (node.GetType()) { - case EMethodType::Abs: - type = EJsonPathItemType::AbsMethod; - break; - case EMethodType::Floor: - type = EJsonPathItemType::FloorMethod; - break; - case EMethodType::Ceiling: - type = EJsonPathItemType::CeilingMethod; - break; - case EMethodType::Double: - type = EJsonPathItemType::DoubleMethod; - break; - case EMethodType::Type: - type = EJsonPathItemType::TypeMethod; - break; - case EMethodType::Size: - type = EJsonPathItemType::SizeMethod; - break; - case EMethodType::KeyValue: - type = EJsonPathItemType::KeyValueMethod; - break; - } - - WriteSingleInputItem(type, node, node.GetInput()); -} - -TJsonPathPtr TJsonPathBuilder::ShrinkAndGetResult() { - Result->ShrinkToFit(); - return Result; -} - -void TJsonPathBuilder::VisitStartsWithPredicate(const TStartsWithPredicateNode& node) { - WriteTwoInputsItem(EJsonPathItemType::StartsWithPredicate, node, node.GetPrefix(), node.GetInput()); -} - -void TJsonPathBuilder::VisitExistsPredicate(const TExistsPredicateNode& node) { - WriteSingleInputItem(EJsonPathItemType::ExistsPredicate, node, node.GetInput()); -} - -void TJsonPathBuilder::VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) { - WriteSingleInputItem(EJsonPathItemType::IsUnknownPredicate, node, node.GetInput()); -} - -void TJsonPathBuilder::VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) { - // Block structure: - // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) char[]> <(6) TUint> - // Components: - // (1) Must be casted to EJsonPathItemType. Member access item type - // (2) Row of the position in the source jsonpath - // (3) Column of the position in the source jsonpath - // (4) Length of serialized Hyperscan database - // (5) Serialized Hyperscan database - // (6) Offset of the input item - WriteType(EJsonPathItemType::LikeRegexPredicate); - WritePos(node); - - const TString serializedRegex = NHyperscan::Serialize(node.GetRegex()); - WriteString(serializedRegex); - - WriteNextPosition(); - node.GetInput()->Accept(*this); -} - -void TJsonPathBuilder::WriteZeroInputItem(EJsonPathItemType type, const TAstNode& node) { - // Block structure: - // <(1) TUint> <(2) TUint> <(3) TUint> - // Components: - // (1) Item type - // (2) Row of the position in the source jsonpath - // (3) Column of the position in the source jsonpath - WriteType(type); - WritePos(node); -} - -void TJsonPathBuilder::WriteSingleInputItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr input) { - // Block structure: - // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) item> - // Components: - // (1) Item type - // (2) Row of the position in the source jsonpath - // (3) Column of the position in the source jsonpath - // (4) Offset of the input item - // (5) Input item - WriteZeroInputItem(type, node); - - WriteNextPosition(); - input->Accept(*this); -} - -void TJsonPathBuilder::WriteTwoInputsItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr firstInput, const TAstNodePtr secondInput) { - // Block structure: - // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) TUint> <(6) item> <(7) item> - // Components: - // (1) Item type - // (2) Row of the position in the source jsonpath - // (3) Column of the position in the source jsonpath - // (4) Offset of the first input - // (5) Offset of the second input - // (6) JsonPath item representing first input - // (7) JsonPath item representing right input - WriteZeroInputItem(type, node); - - // (4) and (5) Fill offsets with zeros - const auto indexStart = CurrentEndPos(); - WriteUint(0); - WriteUint(0); - - // (6) Write first input and record it's offset - const auto firstInputStart = CurrentEndPos(); - firstInput->Accept(*this); - - // (7) Write second input and record it's offset - const auto secondInputStart = CurrentEndPos(); - secondInput->Accept(*this); - - // (4) and (5) Rewrite offsets with correct values - RewriteUintSequence({firstInputStart, secondInputStart}, indexStart); -} - -void TJsonPathBuilder::WritePos(const TAstNode& node) { - WriteUint(node.GetPos().Row); - WriteUint(node.GetPos().Column); -} - -void TJsonPathBuilder::WriteType(EJsonPathItemType type) { - WriteUint(static_cast<TUint>(type)); -} - -void TJsonPathBuilder::WriteMode(EJsonPathMode mode) { - WriteUint(static_cast<TUint>(mode)); -} - -void TJsonPathBuilder::WriteNextPosition() { - WriteUint(CurrentEndPos() + sizeof(TUint)); -} - -void TJsonPathBuilder::WriteFinishPosition() { - WriteUint(0); -} - -void TJsonPathBuilder::WriteString(TStringBuf value) { - WriteUint(value.size()); - Result->Append(value.Data(), value.size()); -} - -void TJsonPathBuilder::RewriteUintSequence(const TVector<TUint>& sequence, TUint offset) { - const auto length = sequence.size() * sizeof(TUint); - Y_ASSERT(offset + length < CurrentEndPos()); - - MemCopy(Result->Data() + offset, reinterpret_cast<const char*>(sequence.data()), length); -} - -void TJsonPathBuilder::WriteUintSequence(const TVector<TUint>& sequence) { - const auto length = sequence.size() * sizeof(TUint); - Result->Append(reinterpret_cast<const char*>(sequence.data()), length); -} - -void TJsonPathBuilder::RewriteUint(TUint value, TUint offset) { - Y_ASSERT(offset + sizeof(TUint) < CurrentEndPos()); - - MemCopy(Result->Data() + offset, reinterpret_cast<const char*>(&value), sizeof(TUint)); -} - -void TJsonPathBuilder::WriteUint(TUint value) { - WritePOD(value); -} - -void TJsonPathBuilder::WriteDouble(double value) { - WritePOD(value); -} - -void TJsonPathBuilder::WriteBool(bool value) { - WritePOD(value); -} - -TUint TJsonPathBuilder::CurrentEndPos() const { - return Result->Size(); -} - - + +namespace NYql::NJsonPath { + +bool TArraySubscriptOffsets::IsRange() const { + return ToOffset > 0; +} + +const TStringBuf TJsonPathItem::GetString() const { + return std::get<TStringBuf>(Data); +} + +const TVector<TArraySubscriptOffsets>& TJsonPathItem::GetSubscripts() const { + return std::get<TVector<TArraySubscriptOffsets>>(Data); +} + +const TBinaryOpArgumentsOffset& TJsonPathItem::GetBinaryOpArguments() const { + return std::get<TBinaryOpArgumentsOffset>(Data); +} + +double TJsonPathItem::GetNumber() const { + return std::get<double>(Data); +} + +bool TJsonPathItem::GetBoolean() const { + return std::get<bool>(Data); +} + +TFilterPredicateOffset TJsonPathItem::GetFilterPredicateOffset() const { + return std::get<TFilterPredicateOffset>(Data); +} + +TStartsWithPrefixOffset TJsonPathItem::GetStartsWithPrefixOffset() const { + return std::get<TStartsWithPrefixOffset>(Data); +} + +const THyperscanRegex& TJsonPathItem::GetRegex() const { + return std::get<THyperscanRegex>(Data); +} + +TJsonPathReader::TJsonPathReader(const TJsonPathPtr path) + : Path(path) + , InitialPos(0) + , Mode(ReadMode(InitialPos)) +{ +} + +const TJsonPathItem& TJsonPathReader::ReadFirst() { + return ReadFromPos(InitialPos); +} + +const TJsonPathItem& TJsonPathReader::ReadInput(const TJsonPathItem& item) { + YQL_ENSURE(item.InputItemOffset.Defined()); + return ReadFromPos(*item.InputItemOffset); +} + +const TJsonPathItem& TJsonPathReader::ReadFromSubscript(const TArraySubscriptOffsets& subscript) { + return ReadFromPos(subscript.FromOffset); +} + +const TJsonPathItem& TJsonPathReader::ReadToSubscript(const TArraySubscriptOffsets& subscript) { + YQL_ENSURE(subscript.IsRange()); + return ReadFromPos(subscript.ToOffset); +} + +const TJsonPathItem& TJsonPathReader::ReadLeftOperand(const TJsonPathItem& node) { + return ReadFromPos(node.GetBinaryOpArguments().LeftOffset); +} + +const TJsonPathItem& TJsonPathReader::ReadRightOperand(const TJsonPathItem& node) { + return ReadFromPos(node.GetBinaryOpArguments().RightOffset); +} + +const TJsonPathItem& TJsonPathReader::ReadFilterPredicate(const TJsonPathItem& node) { + return ReadFromPos(node.GetFilterPredicateOffset().Offset); +} + +const TJsonPathItem& TJsonPathReader::ReadPrefix(const TJsonPathItem& node) { + return ReadFromPos(node.GetStartsWithPrefixOffset().Offset); +} + +EJsonPathMode TJsonPathReader::GetMode() const { + return Mode; +} + +const TJsonPathItem& TJsonPathReader::ReadFromPos(TUint pos) { + YQL_ENSURE(pos < Path->Size()); + + const auto it = ItemCache.find(pos); + if (it != ItemCache.end()) { + return it->second; + } + + TJsonPathItem& result = ItemCache[pos]; + result.Type = ReadType(pos); + + const auto row = ReadUint(pos); + const auto column = ReadUint(pos); + result.Pos = TPosition(column, row, "jsonpath"); + + switch (result.Type) { + // Items without input + case EJsonPathItemType::FilterObject: + case EJsonPathItemType::NullLiteral: + case EJsonPathItemType::ContextObject: + case EJsonPathItemType::LastArrayIndex: + break; + + case EJsonPathItemType::Variable: + case EJsonPathItemType::StringLiteral: + result.Data = ReadString(pos); + break; + + case EJsonPathItemType::NumberLiteral: + result.Data = ReadDouble(pos); + break; + + case EJsonPathItemType::BooleanLiteral: + result.Data = ReadBool(pos); + break; + + // Items with single input + case EJsonPathItemType::TypeMethod: + case EJsonPathItemType::SizeMethod: + case EJsonPathItemType::KeyValueMethod: + case EJsonPathItemType::AbsMethod: + case EJsonPathItemType::FloorMethod: + case EJsonPathItemType::CeilingMethod: + case EJsonPathItemType::DoubleMethod: + case EJsonPathItemType::WildcardArrayAccess: + case EJsonPathItemType::WildcardMemberAccess: + case EJsonPathItemType::UnaryMinus: + case EJsonPathItemType::UnaryPlus: + case EJsonPathItemType::UnaryNot: + case EJsonPathItemType::IsUnknownPredicate: + case EJsonPathItemType::ExistsPredicate: + result.InputItemOffset = ReadUint(pos); + break; + + case EJsonPathItemType::MemberAccess: + result.Data = ReadString(pos); + result.InputItemOffset = ReadUint(pos); + break; + + case EJsonPathItemType::ArrayAccess: + result.Data = ReadSubscripts(pos); + result.InputItemOffset = ReadUint(pos); + break; + + case EJsonPathItemType::FilterPredicate: + result.Data = TFilterPredicateOffset{ReadUint(pos)}; + result.InputItemOffset = ReadUint(pos); + break; + + case EJsonPathItemType::StartsWithPredicate: + result.Data = TStartsWithPrefixOffset{ReadUint(pos)}; + result.InputItemOffset = ReadUint(pos); + break; + + case EJsonPathItemType::LikeRegexPredicate: { + const auto serializedRegex = ReadString(pos); + THyperscanRegex regex; + regex.Regex = NHyperscan::Deserialize(serializedRegex); + regex.Scratch = NHyperscan::MakeScratch(regex.Regex); + result.Data = std::move(regex); + result.InputItemOffset = ReadUint(pos); + break; + } + + // Items with 2 inputs + case EJsonPathItemType::BinaryAdd: + case EJsonPathItemType::BinarySubstract: + case EJsonPathItemType::BinaryMultiply: + case EJsonPathItemType::BinaryDivide: + case EJsonPathItemType::BinaryModulo: + case EJsonPathItemType::BinaryLess: + case EJsonPathItemType::BinaryLessEqual: + case EJsonPathItemType::BinaryGreater: + case EJsonPathItemType::BinaryGreaterEqual: + case EJsonPathItemType::BinaryEqual: + case EJsonPathItemType::BinaryNotEqual: + case EJsonPathItemType::BinaryAnd: + case EJsonPathItemType::BinaryOr: + TBinaryOpArgumentsOffset data; + data.LeftOffset = ReadUint(pos); + data.RightOffset = ReadUint(pos); + result.Data = data; + break; + } + + return result; +} + +TUint TJsonPathReader::ReadUint(TUint& pos) { + return ReadPOD<TUint>(pos); +} + +double TJsonPathReader::ReadDouble(TUint& pos) { + return ReadPOD<double>(pos); +} + +bool TJsonPathReader::ReadBool(TUint& pos) { + return ReadPOD<bool>(pos); +} + +EJsonPathItemType TJsonPathReader::ReadType(TUint& pos) { + return static_cast<EJsonPathItemType>(ReadUint(pos)); +} + +EJsonPathMode TJsonPathReader::ReadMode(TUint& pos) { + return static_cast<EJsonPathMode>(ReadUint(pos)); +} + +const TStringBuf TJsonPathReader::ReadString(TUint& pos) { + TUint length = ReadUint(pos); + TStringBuf result(Path->Begin() + pos, length); + pos += length; + return result; +} + +TVector<TArraySubscriptOffsets> TJsonPathReader::ReadSubscripts(TUint& pos) { + const auto count = ReadUint(pos); + TVector<TArraySubscriptOffsets> result(count); + + for (size_t i = 0; i < count; i++) { + result[i].FromOffset = ReadUint(pos); + result[i].ToOffset = ReadUint(pos); + } + return result; +} + +void TJsonPathBuilder::VisitRoot(const TRootNode& node) { + // Block structure: + // <(1) TUint> + // Components: + // (1) Must be casted to EJsonPathMode. Jsonpath execution mode + WriteMode(node.GetMode()); + node.GetExpr()->Accept(*this); +} + +void TJsonPathBuilder::VisitContextObject(const TContextObjectNode& node) { + WriteZeroInputItem(EJsonPathItemType::ContextObject, node); +} + +void TJsonPathBuilder::VisitVariable(const TVariableNode& node) { + WriteZeroInputItem(EJsonPathItemType::Variable, node); + WriteString(node.GetName()); +} + +void TJsonPathBuilder::VisitLastArrayIndex(const TLastArrayIndexNode& node) { + WriteZeroInputItem(EJsonPathItemType::LastArrayIndex, node); +} + +void TJsonPathBuilder::VisitNumberLiteral(const TNumberLiteralNode& node) { + WriteZeroInputItem(EJsonPathItemType::NumberLiteral, node); + WriteDouble(node.GetValue()); +} + +void TJsonPathBuilder::VisitMemberAccess(const TMemberAccessNode& node) { + // Block structure: + // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) char[]> <(6) TUint> + // Components: + // (1) Must be casted to EJsonPathItemType. Member access item type + // (2) Row of the position in the source jsonpath + // (3) Column of the position in the source jsonpath + // (4) Length of member name string + // (5) Member name string + // (6) Offset of the input item + WriteType(EJsonPathItemType::MemberAccess); + WritePos(node); + WriteString(node.GetMember()); + + WriteNextPosition(); + node.GetInput()->Accept(*this); +} + +void TJsonPathBuilder::VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) { + WriteSingleInputItem(EJsonPathItemType::WildcardMemberAccess, node, node.GetInput()); +} + +void TJsonPathBuilder::VisitArrayAccess(const TArrayAccessNode& node) { + // Block structure: + // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) pair<TUint, TUint>[]> <(6) TUint> <(7) items> + // Components: + // (1) Must be casted to EJsonPathItemType. Array access item type + // (2) Row of the position in the source jsonpath + // (3) Column of the position in the source jsonpath + // (4) Count of subscripts stored + // (5) Array of pairs with offsets to subscript items. If subscript is a single index, only first element + // is set to it's offset and second is zero. If subscript is a range, both pair elements are valid offsets + // to the elements of range (lower and upper bound). + // (6) Offset of the input item + // (7) Array of subcsripts. For details about encoding see VisitArraySubscript + WriteType(EJsonPathItemType::ArrayAccess); + WritePos(node); + + // (4) Write count of subscripts stored + const auto& subscripts = node.GetSubscripts(); + const auto count = subscripts.size(); + WriteUint(count); + + // (5) We do not know sizes of each subscript. Write array of zeros for offsets + const auto indexStart = CurrentEndPos(); + TVector<TUint> offsets(2 * count); + WriteUintSequence(offsets); + + // (6) Reserve space for input offset to rewrite it later + const auto inputStart = CurrentEndPos(); + WriteFinishPosition(); + + // (7) Write all subscripts and record offset for each of them + for (size_t i = 0; i < count; i++) { + offsets[2 * i] = CurrentEndPos(); + subscripts[i].From->Accept(*this); + + if (subscripts[i].To) { + offsets[2 * i + 1] = CurrentEndPos(); + subscripts[i].To->Accept(*this); + } + } + + // (5) Rewrite offsets with correct values + RewriteUintSequence(offsets, indexStart); + + // (6) Rewrite input offset + RewriteUint(CurrentEndPos(), inputStart); + node.GetInput()->Accept(*this); +} + +void TJsonPathBuilder::VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) { + WriteSingleInputItem(EJsonPathItemType::WildcardArrayAccess, node, node.GetInput()); +} + +void TJsonPathBuilder::VisitUnaryOperation(const TUnaryOperationNode& node) { + EJsonPathItemType type; + switch (node.GetOp()) { + case EUnaryOperation::Plus: + type = EJsonPathItemType::UnaryPlus; + break; + case EUnaryOperation::Minus: + type = EJsonPathItemType::UnaryMinus; + break; + case EUnaryOperation::Not: + type = EJsonPathItemType::UnaryNot; + break; + } + + WriteSingleInputItem(type, node, node.GetExpr()); +} + +void TJsonPathBuilder::VisitBinaryOperation(const TBinaryOperationNode& node) { + EJsonPathItemType type; + switch (node.GetOp()) { + case EBinaryOperation::Add: + type = EJsonPathItemType::BinaryAdd; + break; + case EBinaryOperation::Substract: + type = EJsonPathItemType::BinarySubstract; + break; + case EBinaryOperation::Multiply: + type = EJsonPathItemType::BinaryMultiply; + break; + case EBinaryOperation::Divide: + type = EJsonPathItemType::BinaryDivide; + break; + case EBinaryOperation::Modulo: + type = EJsonPathItemType::BinaryModulo; + break; + case EBinaryOperation::Less: + type = EJsonPathItemType::BinaryLess; + break; + case EBinaryOperation::LessEqual: + type = EJsonPathItemType::BinaryLessEqual; + break; + case EBinaryOperation::Greater: + type = EJsonPathItemType::BinaryGreater; + break; + case EBinaryOperation::GreaterEqual: + type = EJsonPathItemType::BinaryGreaterEqual; + break; + case EBinaryOperation::Equal: + type = EJsonPathItemType::BinaryEqual; + break; + case EBinaryOperation::NotEqual: + type = EJsonPathItemType::BinaryNotEqual; + break; + case EBinaryOperation::And: + type = EJsonPathItemType::BinaryAnd; + break; + case EBinaryOperation::Or: + type = EJsonPathItemType::BinaryOr; + break; + } + + WriteTwoInputsItem(type, node, node.GetLeftExpr(), node.GetRightExpr()); +} + +void TJsonPathBuilder::VisitBooleanLiteral(const TBooleanLiteralNode& node) { + WriteZeroInputItem(EJsonPathItemType::BooleanLiteral, node); + WriteBool(node.GetValue()); +} + +void TJsonPathBuilder::VisitNullLiteral(const TNullLiteralNode& node) { + WriteZeroInputItem(EJsonPathItemType::NullLiteral, node); +} + +void TJsonPathBuilder::VisitStringLiteral(const TStringLiteralNode& node) { + WriteZeroInputItem(EJsonPathItemType::StringLiteral, node); + WriteString(node.GetValue()); +} + +void TJsonPathBuilder::VisitFilterObject(const TFilterObjectNode& node) { + WriteZeroInputItem(EJsonPathItemType::FilterObject, node); +} + +void TJsonPathBuilder::VisitFilterPredicate(const TFilterPredicateNode& node) { + WriteTwoInputsItem(EJsonPathItemType::FilterPredicate, node, node.GetPredicate(), node.GetInput()); +} + +void TJsonPathBuilder::VisitMethodCall(const TMethodCallNode& node) { + EJsonPathItemType type; + switch (node.GetType()) { + case EMethodType::Abs: + type = EJsonPathItemType::AbsMethod; + break; + case EMethodType::Floor: + type = EJsonPathItemType::FloorMethod; + break; + case EMethodType::Ceiling: + type = EJsonPathItemType::CeilingMethod; + break; + case EMethodType::Double: + type = EJsonPathItemType::DoubleMethod; + break; + case EMethodType::Type: + type = EJsonPathItemType::TypeMethod; + break; + case EMethodType::Size: + type = EJsonPathItemType::SizeMethod; + break; + case EMethodType::KeyValue: + type = EJsonPathItemType::KeyValueMethod; + break; + } + + WriteSingleInputItem(type, node, node.GetInput()); +} + +TJsonPathPtr TJsonPathBuilder::ShrinkAndGetResult() { + Result->ShrinkToFit(); + return Result; +} + +void TJsonPathBuilder::VisitStartsWithPredicate(const TStartsWithPredicateNode& node) { + WriteTwoInputsItem(EJsonPathItemType::StartsWithPredicate, node, node.GetPrefix(), node.GetInput()); +} + +void TJsonPathBuilder::VisitExistsPredicate(const TExistsPredicateNode& node) { + WriteSingleInputItem(EJsonPathItemType::ExistsPredicate, node, node.GetInput()); +} + +void TJsonPathBuilder::VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) { + WriteSingleInputItem(EJsonPathItemType::IsUnknownPredicate, node, node.GetInput()); +} + +void TJsonPathBuilder::VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) { + // Block structure: + // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) char[]> <(6) TUint> + // Components: + // (1) Must be casted to EJsonPathItemType. Member access item type + // (2) Row of the position in the source jsonpath + // (3) Column of the position in the source jsonpath + // (4) Length of serialized Hyperscan database + // (5) Serialized Hyperscan database + // (6) Offset of the input item + WriteType(EJsonPathItemType::LikeRegexPredicate); + WritePos(node); + + const TString serializedRegex = NHyperscan::Serialize(node.GetRegex()); + WriteString(serializedRegex); + + WriteNextPosition(); + node.GetInput()->Accept(*this); +} + +void TJsonPathBuilder::WriteZeroInputItem(EJsonPathItemType type, const TAstNode& node) { + // Block structure: + // <(1) TUint> <(2) TUint> <(3) TUint> + // Components: + // (1) Item type + // (2) Row of the position in the source jsonpath + // (3) Column of the position in the source jsonpath + WriteType(type); + WritePos(node); +} + +void TJsonPathBuilder::WriteSingleInputItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr input) { + // Block structure: + // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) item> + // Components: + // (1) Item type + // (2) Row of the position in the source jsonpath + // (3) Column of the position in the source jsonpath + // (4) Offset of the input item + // (5) Input item + WriteZeroInputItem(type, node); + + WriteNextPosition(); + input->Accept(*this); +} + +void TJsonPathBuilder::WriteTwoInputsItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr firstInput, const TAstNodePtr secondInput) { + // Block structure: + // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) TUint> <(6) item> <(7) item> + // Components: + // (1) Item type + // (2) Row of the position in the source jsonpath + // (3) Column of the position in the source jsonpath + // (4) Offset of the first input + // (5) Offset of the second input + // (6) JsonPath item representing first input + // (7) JsonPath item representing right input + WriteZeroInputItem(type, node); + + // (4) and (5) Fill offsets with zeros + const auto indexStart = CurrentEndPos(); + WriteUint(0); + WriteUint(0); + + // (6) Write first input and record it's offset + const auto firstInputStart = CurrentEndPos(); + firstInput->Accept(*this); + + // (7) Write second input and record it's offset + const auto secondInputStart = CurrentEndPos(); + secondInput->Accept(*this); + + // (4) and (5) Rewrite offsets with correct values + RewriteUintSequence({firstInputStart, secondInputStart}, indexStart); +} + +void TJsonPathBuilder::WritePos(const TAstNode& node) { + WriteUint(node.GetPos().Row); + WriteUint(node.GetPos().Column); +} + +void TJsonPathBuilder::WriteType(EJsonPathItemType type) { + WriteUint(static_cast<TUint>(type)); +} + +void TJsonPathBuilder::WriteMode(EJsonPathMode mode) { + WriteUint(static_cast<TUint>(mode)); +} + +void TJsonPathBuilder::WriteNextPosition() { + WriteUint(CurrentEndPos() + sizeof(TUint)); +} + +void TJsonPathBuilder::WriteFinishPosition() { + WriteUint(0); +} + +void TJsonPathBuilder::WriteString(TStringBuf value) { + WriteUint(value.size()); + Result->Append(value.Data(), value.size()); +} + +void TJsonPathBuilder::RewriteUintSequence(const TVector<TUint>& sequence, TUint offset) { + const auto length = sequence.size() * sizeof(TUint); + Y_ASSERT(offset + length < CurrentEndPos()); + + MemCopy(Result->Data() + offset, reinterpret_cast<const char*>(sequence.data()), length); +} + +void TJsonPathBuilder::WriteUintSequence(const TVector<TUint>& sequence) { + const auto length = sequence.size() * sizeof(TUint); + Result->Append(reinterpret_cast<const char*>(sequence.data()), length); +} + +void TJsonPathBuilder::RewriteUint(TUint value, TUint offset) { + Y_ASSERT(offset + sizeof(TUint) < CurrentEndPos()); + + MemCopy(Result->Data() + offset, reinterpret_cast<const char*>(&value), sizeof(TUint)); +} + +void TJsonPathBuilder::WriteUint(TUint value) { + WritePOD(value); +} + +void TJsonPathBuilder::WriteDouble(double value) { + WritePOD(value); +} + +void TJsonPathBuilder::WriteBool(bool value) { + WritePOD(value); +} + +TUint TJsonPathBuilder::CurrentEndPos() const { + return Result->Size(); +} + + } diff --git a/ydb/library/yql/minikql/jsonpath/binary.h b/ydb/library/yql/minikql/jsonpath/binary.h index 29ae84c741..b1e12a0a24 100644 --- a/ydb/library/yql/minikql/jsonpath/binary.h +++ b/ydb/library/yql/minikql/jsonpath/binary.h @@ -1,280 +1,280 @@ -#pragma once - -#include "ast_nodes.h" - -#include <library/cpp/regex/hyperscan/hyperscan.h> - -#include <util/system/unaligned_mem.h> -#include <util/generic/buffer.h> -#include <util/generic/ptr.h> -#include <util/generic/maybe.h> -#include <util/generic/hash.h> - -#include <variant> -#include <type_traits> - -namespace NYql::NJsonPath { - -class TJsonPath : public TSimpleRefCount<TJsonPath>, public TBuffer { -}; - -using TJsonPathPtr = TIntrusivePtr<TJsonPath>; -using TUint = ui64; - -enum class EJsonPathItemType { - MemberAccess = 0, - WildcardMemberAccess = 1, - ArrayAccess = 2, - WildcardArrayAccess = 3, - ContextObject = 4, - NumberLiteral = 5, - LastArrayIndex = 6, - UnaryPlus = 7, - UnaryMinus = 8, - BinaryAdd = 9, - BinarySubstract = 10, - BinaryMultiply = 11, - BinaryDivide = 12, - BinaryModulo = 13, - Variable = 14, - BinaryLess = 15, - BinaryLessEqual = 16, - BinaryGreater = 17, - BinaryGreaterEqual = 18, - BinaryEqual = 19, - BinaryNotEqual = 20, - BinaryAnd = 21, - BinaryOr = 22, - UnaryNot = 23, - BooleanLiteral = 24, - NullLiteral = 25, - StringLiteral = 26, - FilterObject = 27, - FilterPredicate = 28, - AbsMethod = 29, - FloorMethod = 30, - CeilingMethod = 31, - DoubleMethod = 32, - TypeMethod = 33, - SizeMethod = 34, - KeyValueMethod = 35, - StartsWithPredicate = 36, - ExistsPredicate = 37, - IsUnknownPredicate = 38, - LikeRegexPredicate = 39, -}; - -struct TArraySubscriptOffsets { - TUint FromOffset = 0; - TUint ToOffset = 0; - - bool IsRange() const; -}; - -struct TBinaryOpArgumentsOffset { - TUint LeftOffset = 0; - TUint RightOffset = 0; -}; - -struct TFilterPredicateOffset { - TUint Offset = 0; -}; - -struct TStartsWithPrefixOffset { - TUint Offset = 0; -}; - -struct THyperscanRegex { - NHyperscan::TDatabase Regex; - NHyperscan::TScratch Scratch; -}; - -struct TJsonPathItem { - // Position in the source jsonpath - TPosition Pos; - - // Type of item - EJsonPathItemType Type; - - // Offset in buffer pointing to the input item - TMaybe<TUint> InputItemOffset; - - // Data associated with this item. To determine which variant - // type was filled callee must examine Type field. - // WARNING: Some item types do not fill Data field at all! You must - // check item type before accesing this field. - std::variant< - TStringBuf, - TVector<TArraySubscriptOffsets>, - TBinaryOpArgumentsOffset, - TFilterPredicateOffset, - TStartsWithPrefixOffset, - THyperscanRegex, - double, - bool - > Data; - - const TStringBuf GetString() const; - const TVector<TArraySubscriptOffsets>& GetSubscripts() const; - const TBinaryOpArgumentsOffset& GetBinaryOpArguments() const; - const THyperscanRegex& GetRegex() const; - double GetNumber() const; - bool GetBoolean() const; - TFilterPredicateOffset GetFilterPredicateOffset() const; - TStartsWithPrefixOffset GetStartsWithPrefixOffset() const; - - // Pointer to the binary representation of jsonpath. - // We do not use this directly but Data field can reference to it. - // For example if this item is a string then Data contains TStringBuf - // pointing to some part inside buffer. We must ensure that it is not - // destructed while this item is alive so we keep shared pointer to it. - const TJsonPathPtr JsonPath; -}; - -class TJsonPathBuilder : public IAstNodeVisitor { -public: - TJsonPathBuilder() - : Result(new TJsonPath()) - { - } - - void VisitRoot(const TRootNode& node) override; - - void VisitContextObject(const TContextObjectNode& node) override; - - void VisitVariable(const TVariableNode& node) override; - - void VisitLastArrayIndex(const TLastArrayIndexNode& node) override; - - void VisitNumberLiteral(const TNumberLiteralNode& node) override; - - void VisitMemberAccess(const TMemberAccessNode& node) override; - - void VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) override; - - void VisitArrayAccess(const TArrayAccessNode& node) override; - - void VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) override; - - void VisitUnaryOperation(const TUnaryOperationNode& node) override; - - void VisitBinaryOperation(const TBinaryOperationNode& node) override; - - void VisitBooleanLiteral(const TBooleanLiteralNode& node) override; - - void VisitNullLiteral(const TNullLiteralNode& node) override; - - void VisitStringLiteral(const TStringLiteralNode& node) override; - - void VisitFilterObject(const TFilterObjectNode& node) override; - - void VisitFilterPredicate(const TFilterPredicateNode& node) override; - - void VisitMethodCall(const TMethodCallNode& node) override; - - void VisitStartsWithPredicate(const TStartsWithPredicateNode& node) override; - - void VisitExistsPredicate(const TExistsPredicateNode& node) override; - - void VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) override; - - void VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) override; - - TJsonPathPtr ShrinkAndGetResult(); - -private: - void WriteZeroInputItem(EJsonPathItemType type, const TAstNode& node); - - void WriteSingleInputItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr input); - - void WriteTwoInputsItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr firstInput, const TAstNodePtr secondInput); - - void WritePos(const TAstNode& node); - - void WriteType(EJsonPathItemType type); - - void WriteMode(EJsonPathMode mode); - - void WriteNextPosition(); - - void WriteFinishPosition(); - - void WriteString(TStringBuf value); - - void RewriteUintSequence(const TVector<TUint>& sequence, TUint offset); - - void WriteUintSequence(const TVector<TUint>& sequence); - - void RewriteUint(TUint value, TUint offset); - - void WriteUint(TUint value); - - void WriteDouble(double value); - - void WriteBool(bool value); - - template <typename T> - void WritePOD(const T& value) { - static_assert(std::is_pod_v<T>, "Type must be POD"); - Result->Append(reinterpret_cast<const char*>(&value), sizeof(T)); - } - - TUint CurrentEndPos() const; - - TJsonPathPtr Result; -}; - -class TJsonPathReader { -public: - TJsonPathReader(const TJsonPathPtr path); - - const TJsonPathItem& ReadFirst(); - - const TJsonPathItem& ReadInput(const TJsonPathItem& node); - - const TJsonPathItem& ReadFromSubscript(const TArraySubscriptOffsets& subscript); - - const TJsonPathItem& ReadToSubscript(const TArraySubscriptOffsets& subscript); - - const TJsonPathItem& ReadLeftOperand(const TJsonPathItem& node); - - const TJsonPathItem& ReadRightOperand(const TJsonPathItem& node); - - const TJsonPathItem& ReadFilterPredicate(const TJsonPathItem& node); - - const TJsonPathItem& ReadPrefix(const TJsonPathItem& node); - - EJsonPathMode GetMode() const; - -private: - const TJsonPathItem& ReadFromPos(TUint pos); - - TUint ReadUint(TUint& pos); - - double ReadDouble(TUint& pos); - - bool ReadBool(TUint& pos); - - EJsonPathItemType ReadType(TUint& pos); - - EJsonPathMode ReadMode(TUint& pos); - - const TStringBuf ReadString(TUint& pos); - - TVector<TArraySubscriptOffsets> ReadSubscripts(TUint& pos); - - template <typename T> - T ReadPOD(TUint& pos) { - static_assert(std::is_pod_v<T>, "Type must be POD"); - T value = ReadUnaligned<T>(Path->Begin() + pos); - pos += sizeof(T); - return std::move(value); - } - - const TJsonPathPtr Path; - TUint InitialPos; - EJsonPathMode Mode; - THashMap<TUint, TJsonPathItem> ItemCache; -}; - -}
\ No newline at end of file +#pragma once + +#include "ast_nodes.h" + +#include <library/cpp/regex/hyperscan/hyperscan.h> + +#include <util/system/unaligned_mem.h> +#include <util/generic/buffer.h> +#include <util/generic/ptr.h> +#include <util/generic/maybe.h> +#include <util/generic/hash.h> + +#include <variant> +#include <type_traits> + +namespace NYql::NJsonPath { + +class TJsonPath : public TSimpleRefCount<TJsonPath>, public TBuffer { +}; + +using TJsonPathPtr = TIntrusivePtr<TJsonPath>; +using TUint = ui64; + +enum class EJsonPathItemType { + MemberAccess = 0, + WildcardMemberAccess = 1, + ArrayAccess = 2, + WildcardArrayAccess = 3, + ContextObject = 4, + NumberLiteral = 5, + LastArrayIndex = 6, + UnaryPlus = 7, + UnaryMinus = 8, + BinaryAdd = 9, + BinarySubstract = 10, + BinaryMultiply = 11, + BinaryDivide = 12, + BinaryModulo = 13, + Variable = 14, + BinaryLess = 15, + BinaryLessEqual = 16, + BinaryGreater = 17, + BinaryGreaterEqual = 18, + BinaryEqual = 19, + BinaryNotEqual = 20, + BinaryAnd = 21, + BinaryOr = 22, + UnaryNot = 23, + BooleanLiteral = 24, + NullLiteral = 25, + StringLiteral = 26, + FilterObject = 27, + FilterPredicate = 28, + AbsMethod = 29, + FloorMethod = 30, + CeilingMethod = 31, + DoubleMethod = 32, + TypeMethod = 33, + SizeMethod = 34, + KeyValueMethod = 35, + StartsWithPredicate = 36, + ExistsPredicate = 37, + IsUnknownPredicate = 38, + LikeRegexPredicate = 39, +}; + +struct TArraySubscriptOffsets { + TUint FromOffset = 0; + TUint ToOffset = 0; + + bool IsRange() const; +}; + +struct TBinaryOpArgumentsOffset { + TUint LeftOffset = 0; + TUint RightOffset = 0; +}; + +struct TFilterPredicateOffset { + TUint Offset = 0; +}; + +struct TStartsWithPrefixOffset { + TUint Offset = 0; +}; + +struct THyperscanRegex { + NHyperscan::TDatabase Regex; + NHyperscan::TScratch Scratch; +}; + +struct TJsonPathItem { + // Position in the source jsonpath + TPosition Pos; + + // Type of item + EJsonPathItemType Type; + + // Offset in buffer pointing to the input item + TMaybe<TUint> InputItemOffset; + + // Data associated with this item. To determine which variant + // type was filled callee must examine Type field. + // WARNING: Some item types do not fill Data field at all! You must + // check item type before accesing this field. + std::variant< + TStringBuf, + TVector<TArraySubscriptOffsets>, + TBinaryOpArgumentsOffset, + TFilterPredicateOffset, + TStartsWithPrefixOffset, + THyperscanRegex, + double, + bool + > Data; + + const TStringBuf GetString() const; + const TVector<TArraySubscriptOffsets>& GetSubscripts() const; + const TBinaryOpArgumentsOffset& GetBinaryOpArguments() const; + const THyperscanRegex& GetRegex() const; + double GetNumber() const; + bool GetBoolean() const; + TFilterPredicateOffset GetFilterPredicateOffset() const; + TStartsWithPrefixOffset GetStartsWithPrefixOffset() const; + + // Pointer to the binary representation of jsonpath. + // We do not use this directly but Data field can reference to it. + // For example if this item is a string then Data contains TStringBuf + // pointing to some part inside buffer. We must ensure that it is not + // destructed while this item is alive so we keep shared pointer to it. + const TJsonPathPtr JsonPath; +}; + +class TJsonPathBuilder : public IAstNodeVisitor { +public: + TJsonPathBuilder() + : Result(new TJsonPath()) + { + } + + void VisitRoot(const TRootNode& node) override; + + void VisitContextObject(const TContextObjectNode& node) override; + + void VisitVariable(const TVariableNode& node) override; + + void VisitLastArrayIndex(const TLastArrayIndexNode& node) override; + + void VisitNumberLiteral(const TNumberLiteralNode& node) override; + + void VisitMemberAccess(const TMemberAccessNode& node) override; + + void VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) override; + + void VisitArrayAccess(const TArrayAccessNode& node) override; + + void VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) override; + + void VisitUnaryOperation(const TUnaryOperationNode& node) override; + + void VisitBinaryOperation(const TBinaryOperationNode& node) override; + + void VisitBooleanLiteral(const TBooleanLiteralNode& node) override; + + void VisitNullLiteral(const TNullLiteralNode& node) override; + + void VisitStringLiteral(const TStringLiteralNode& node) override; + + void VisitFilterObject(const TFilterObjectNode& node) override; + + void VisitFilterPredicate(const TFilterPredicateNode& node) override; + + void VisitMethodCall(const TMethodCallNode& node) override; + + void VisitStartsWithPredicate(const TStartsWithPredicateNode& node) override; + + void VisitExistsPredicate(const TExistsPredicateNode& node) override; + + void VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) override; + + void VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) override; + + TJsonPathPtr ShrinkAndGetResult(); + +private: + void WriteZeroInputItem(EJsonPathItemType type, const TAstNode& node); + + void WriteSingleInputItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr input); + + void WriteTwoInputsItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr firstInput, const TAstNodePtr secondInput); + + void WritePos(const TAstNode& node); + + void WriteType(EJsonPathItemType type); + + void WriteMode(EJsonPathMode mode); + + void WriteNextPosition(); + + void WriteFinishPosition(); + + void WriteString(TStringBuf value); + + void RewriteUintSequence(const TVector<TUint>& sequence, TUint offset); + + void WriteUintSequence(const TVector<TUint>& sequence); + + void RewriteUint(TUint value, TUint offset); + + void WriteUint(TUint value); + + void WriteDouble(double value); + + void WriteBool(bool value); + + template <typename T> + void WritePOD(const T& value) { + static_assert(std::is_pod_v<T>, "Type must be POD"); + Result->Append(reinterpret_cast<const char*>(&value), sizeof(T)); + } + + TUint CurrentEndPos() const; + + TJsonPathPtr Result; +}; + +class TJsonPathReader { +public: + TJsonPathReader(const TJsonPathPtr path); + + const TJsonPathItem& ReadFirst(); + + const TJsonPathItem& ReadInput(const TJsonPathItem& node); + + const TJsonPathItem& ReadFromSubscript(const TArraySubscriptOffsets& subscript); + + const TJsonPathItem& ReadToSubscript(const TArraySubscriptOffsets& subscript); + + const TJsonPathItem& ReadLeftOperand(const TJsonPathItem& node); + + const TJsonPathItem& ReadRightOperand(const TJsonPathItem& node); + + const TJsonPathItem& ReadFilterPredicate(const TJsonPathItem& node); + + const TJsonPathItem& ReadPrefix(const TJsonPathItem& node); + + EJsonPathMode GetMode() const; + +private: + const TJsonPathItem& ReadFromPos(TUint pos); + + TUint ReadUint(TUint& pos); + + double ReadDouble(TUint& pos); + + bool ReadBool(TUint& pos); + + EJsonPathItemType ReadType(TUint& pos); + + EJsonPathMode ReadMode(TUint& pos); + + const TStringBuf ReadString(TUint& pos); + + TVector<TArraySubscriptOffsets> ReadSubscripts(TUint& pos); + + template <typename T> + T ReadPOD(TUint& pos) { + static_assert(std::is_pod_v<T>, "Type must be POD"); + T value = ReadUnaligned<T>(Path->Begin() + pos); + pos += sizeof(T); + return std::move(value); + } + + const TJsonPathPtr Path; + TUint InitialPos; + EJsonPathMode Mode; + THashMap<TUint, TJsonPathItem> ItemCache; +}; + +}
\ No newline at end of file diff --git a/ydb/library/yql/minikql/jsonpath/executor.cpp b/ydb/library/yql/minikql/jsonpath/executor.cpp index a559a38278..ac816c8479 100644 --- a/ydb/library/yql/minikql/jsonpath/executor.cpp +++ b/ydb/library/yql/minikql/jsonpath/executor.cpp @@ -1,1067 +1,1067 @@ -#include "executor.h" -#include "parse_double.h" - +#include "executor.h" +#include "parse_double.h" + #include <ydb/library/yql/core/issue/protos/issue_id.pb.h> #include <ydb/library/yql/minikql/dom/node.h> - -#include <library/cpp/regex/hyperscan/hyperscan.h> - -#include <util/generic/scope.h> -#include <util/generic/maybe.h> -#include <util/system/compiler.h> - -#include <cmath> - -namespace NYql::NJsonPath { - -using namespace NJson; -using namespace NUdf; -using namespace NDom; -using namespace NHyperscan; - -namespace { - -bool IsObjectOrArray(const TValue& value) { - return value.IsArray() || value.IsObject(); -} - -TIssue MakeError(TPosition pos, TIssueCode code, const TStringBuf message) { - TIssue error(pos, message); - error.SetCode(code, TSeverityIds::S_ERROR); - return error; -} - -TIssue MakeError(const TJsonPathItem& item, TIssueCode code, const TStringBuf message) { - return MakeError(item.Pos, code, message); -} - -} - -TResult::TResult(TJsonNodes&& nodes) - : Result(std::move(nodes)) -{ -} - -TResult::TResult(const TJsonNodes& nodes) - : Result(nodes) -{ -} - -TResult::TResult(TIssue&& issue) - : Result(std::move(issue)) -{ -} - -const TJsonNodes& TResult::GetNodes() const { - return std::get<TJsonNodes>(Result); -} - -TJsonNodes& TResult::GetNodes() { - return std::get<TJsonNodes>(Result); -} - -const TIssue& TResult::GetError() const { - return std::get<TIssue>(Result); -} - -bool TResult::IsError() const { - return std::holds_alternative<TIssue>(Result); -} - -TExecutor::TExecutor( - const TJsonPathPtr path, - const TJsonNodes& input, - const TVariablesMap& variables, - const IValueBuilder* valueBuilder) - : Reader(path) - , Input(input) - , Variables(variables) - , ValueBuilder(valueBuilder) -{ -} - -bool TExecutor::IsZero(double value) { - return -EPSILON <= value && value <= EPSILON; -} - -bool TExecutor::IsLess(double a, double b) { - return (b - a) > EPSILON; -} - -bool TExecutor::IsGreater(double a, double b) { - return (a - b) > EPSILON; -} - -bool TExecutor::IsEqual(double a, double b) { - return IsZero(a - b); -} - -bool TExecutor::IsStrict() const { - return Reader.GetMode() == EJsonPathMode::Strict; -} - -bool TExecutor::IsLax() const { - return Reader.GetMode() == EJsonPathMode::Lax; -} - -TResult TExecutor::Execute() { - return Execute(Reader.ReadFirst()); -} - -TResult TExecutor::Execute(const TJsonPathItem& item) { - switch (item.Type) { - case EJsonPathItemType::MemberAccess: - return MemberAccess(item); - case EJsonPathItemType::WildcardMemberAccess: - return WildcardMemberAccess(item); - case EJsonPathItemType::ContextObject: - return ContextObject(); - case EJsonPathItemType::Variable: - return Variable(item); - case EJsonPathItemType::NumberLiteral: - return NumberLiteral(item); - case EJsonPathItemType::ArrayAccess: - return ArrayAccess(item); - case EJsonPathItemType::WildcardArrayAccess: - return WildcardArrayAccess(item); - case EJsonPathItemType::LastArrayIndex: - return LastArrayIndex(item); - case EJsonPathItemType::UnaryMinus: - case EJsonPathItemType::UnaryPlus: - return UnaryArithmeticOp(item); - case EJsonPathItemType::BinaryAdd: - case EJsonPathItemType::BinarySubstract: - case EJsonPathItemType::BinaryMultiply: - case EJsonPathItemType::BinaryDivide: - case EJsonPathItemType::BinaryModulo: - return BinaryArithmeticOp(item); - case EJsonPathItemType::BinaryAnd: - case EJsonPathItemType::BinaryOr: - return BinaryLogicalOp(item); - case EJsonPathItemType::UnaryNot: - return UnaryLogicalOp(item); - case EJsonPathItemType::BooleanLiteral: - return BooleanLiteral(item); - case EJsonPathItemType::NullLiteral: - return NullLiteral(); - case EJsonPathItemType::StringLiteral: - return StringLiteral(item); - case EJsonPathItemType::FilterObject: - return FilterObject(item); - case EJsonPathItemType::FilterPredicate: - return FilterPredicate(item); - case EJsonPathItemType::BinaryLess: - case EJsonPathItemType::BinaryLessEqual: - case EJsonPathItemType::BinaryGreater: - case EJsonPathItemType::BinaryGreaterEqual: - case EJsonPathItemType::BinaryEqual: - case EJsonPathItemType::BinaryNotEqual: - return CompareOp(item); - case EJsonPathItemType::AbsMethod: - case EJsonPathItemType::FloorMethod: - case EJsonPathItemType::CeilingMethod: - return NumericMethod(item); - case EJsonPathItemType::DoubleMethod: - return DoubleMethod(item); - case EJsonPathItemType::TypeMethod: - return TypeMethod(item); - case EJsonPathItemType::SizeMethod: - return SizeMethod(item); - case EJsonPathItemType::KeyValueMethod: - return KeyValueMethod(item); - case EJsonPathItemType::StartsWithPredicate: - return StartsWithPredicate(item); - case EJsonPathItemType::IsUnknownPredicate: - return IsUnknownPredicate(item); - case EJsonPathItemType::ExistsPredicate: - return ExistsPredicate(item); - case EJsonPathItemType::LikeRegexPredicate: - return LikeRegexPredicate(item); - } -} - -TResult TExecutor::ContextObject() { - return Input; -} - -TResult TExecutor::Variable(const TJsonPathItem& item) { - const auto it = Variables.find(item.GetString()); - if (it == Variables.end()) { - return MakeError(item, TIssuesIds::JSONPATH_UNDEFINED_VARIABLE, TStringBuilder() << "Undefined variable '" << item.GetString() << "'"); - } - - return TJsonNodes({it->second}); -} - -TResult TExecutor::LastArrayIndex(const TJsonPathItem& item) { - if (ArraySubscriptSource.empty()) { - return MakeError(item, TIssuesIds::JSONPATH_LAST_OUTSIDE_OF_ARRAY_SUBSCRIPT, "'last' is only allowed inside array subscripts"); - } - - const auto& array = ArraySubscriptSource.top(); - const i64 arraySize = array.GetSize(); - - // NOTE: For empty arrays `last` equals `-1`. This is intended, PostgreSQL 12 has the same behaviour - return TJsonNodes({TValue(MakeDouble(static_cast<double>(arraySize - 1)))}); -} - -TResult TExecutor::NumberLiteral(const TJsonPathItem& item) { - return TJsonNodes({TValue(MakeDouble(item.GetNumber()))}); -} - -TResult TExecutor::MemberAccess(const TJsonPathItem& item) { - const auto input = Execute(Reader.ReadInput(item)); - if (input.IsError()) { - return input; - } - TJsonNodes result; - for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { - if (!node.IsObject()) { - if (IsStrict()) { - return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_OBJECT, "Expected object"); - } else { - continue; - } - } - - if (const auto payload = node.Lookup(item.GetString())) { - result.push_back(*payload); - continue; - } - - if (IsStrict()) { - return MakeError(item, TIssuesIds::JSONPATH_MEMBER_NOT_FOUND, "Member not found"); - } - } - - return std::move(result); -} - -TResult TExecutor::WildcardMemberAccess(const TJsonPathItem& item) { - const auto input = Execute(Reader.ReadInput(item)); - if (input.IsError()) { - return input; - } - TJsonNodes result; - for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { - if (!node.IsObject()) { - if (IsStrict()) { - return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_OBJECT, "Expected object"); - } else { - continue; - } - } - - TValue key; - TValue value; - auto it = node.GetObjectIterator(); - while (it.Next(key, value)) { - result.push_back(value); - } - } - - return std::move(result); -} - -TMaybe<TIssue> TExecutor::EnsureSingleSubscript(TPosition pos, const TJsonNodes& index, i64& result) { - if (index.size() != 1) { - return MakeError(pos, TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX, "Expected single number item for array index"); - } - - const auto& indexValue = index[0]; - if (!indexValue.IsNumber()) { - return MakeError(pos, TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX, "Array index must be number"); - } - - result = static_cast<i64>(std::floor(indexValue.GetNumber())); - return Nothing(); -} - -TMaybe<TIssue> TExecutor::EnsureArraySubscripts(const TJsonPathItem& item, TVector<TArraySubscript>& result) { - for (const auto& subscript : item.GetSubscripts()) { - const auto& fromItem = Reader.ReadFromSubscript(subscript); - const auto fromResult = Execute(fromItem); - if (fromResult.IsError()) { - return fromResult.GetError(); - } - - i64 fromIndex = 0; - TMaybe<TIssue> error = EnsureSingleSubscript(fromItem.Pos, fromResult.GetNodes(), fromIndex); - if (error) { - return error; - } - - if (!subscript.IsRange()) { - result.emplace_back(fromIndex, fromItem.Pos); - continue; - } - - const auto& toItem = Reader.ReadToSubscript(subscript); - const auto toResult = Execute(toItem); - if (toResult.IsError()) { - return toResult.GetError(); - } - - i64 toIndex = 0; - error = EnsureSingleSubscript(toItem.Pos, toResult.GetNodes(), toIndex); - if (error) { - return error; - } - - result.emplace_back(fromIndex, fromItem.Pos, toIndex, toItem.Pos); - } - return Nothing(); -} - -TResult TExecutor::ArrayAccess(const TJsonPathItem& item) { - const auto input = Execute(Reader.ReadInput(item)); - if (input.IsError()) { - return input; - } - TJsonNodes result; - for (const auto& node : OptionalArrayWrapNodes(input.GetNodes())) { - if (!node.IsArray()) { - return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_ARRAY, "Expected array"); - } - - ArraySubscriptSource.push(node); - Y_DEFER { - ArraySubscriptSource.pop(); - }; - - // Check for "hard" errors in array subscripts. These are forbidden even in lax mode - // NOTE: We intentionally execute subscripts expressions for each array in the input - // because they can contain `last` keyword which value is different for each array - TVector<TArraySubscript> subscripts; - TMaybe<TIssue> error = EnsureArraySubscripts(item, subscripts); - if (error) { - return std::move(*error); - } - - const ui64 arraySize = node.GetSize(); - for (const auto& idx : subscripts) { - // Check bounds for first subscript - if (idx.GetFrom() < 0 || idx.GetFrom() >= static_cast<i64>(arraySize)) { - if (IsStrict()) { - return MakeError(idx.GetFromPos(), TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS, "Array index out of bounds"); - } else { - continue; - } - } - - // If there is no second subcripts, just return corresponding array element - if (!idx.IsRange()) { - result.push_back(node.GetElement(idx.GetFrom())); - continue; - } - - // Check bounds for second subscript - if (idx.GetTo() < 0 || idx.GetTo() >= static_cast<i64>(arraySize)) { - if (IsStrict()) { - return MakeError(idx.GetToPos(), TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS, "Array index out of bounds"); - } else { - continue; - } - } - - // In strict mode invalid ranges are forbidden - if (idx.GetFrom() > idx.GetTo() && IsStrict()) { - return MakeError(idx.GetFromPos(), TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX_RANGE, "Range lower bound is greater than upper bound"); - } - - for (i64 i = idx.GetFrom(); i <= idx.GetTo(); i++) { - result.push_back(node.GetElement(i)); - } - } - } - return std::move(result); -} - -TResult TExecutor::WildcardArrayAccess(const TJsonPathItem& item) { - const auto input = Execute(Reader.ReadInput(item)); - if (input.IsError()) { - return input; - } - TJsonNodes result; - for (const auto& node : OptionalArrayWrapNodes(input.GetNodes())) { - if (!node.IsArray()) { - return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_ARRAY, "Expected array"); - } - - auto it = node.GetArrayIterator(); - TValue value; - while (it.Next(value)) { - result.push_back(value); - } - } - return std::move(result); -} - -TResult TExecutor::UnaryArithmeticOp(const TJsonPathItem& item) { - const auto& operandItem = Reader.ReadInput(item); - const auto operandsResult = Execute(operandItem); - if (operandsResult.IsError()) { - return operandsResult; - } - - const auto& operands = operandsResult.GetNodes(); - TJsonNodes result; - result.reserve(operands.size()); - for (const auto& operand : operands) { - if (!operand.IsNumber()) { - return MakeError( - operandItem, TIssuesIds::JSONPATH_INVALID_UNARY_OPERATION_ARGUMENT_TYPE, - TStringBuilder() << "Unsupported type for unary operations" - ); - } - - if (item.Type == EJsonPathItemType::UnaryPlus) { - result.push_back(operand); - continue; - } - - const auto value = operand.GetNumber(); - result.push_back(TValue(MakeDouble(-value))); - } - - return std::move(result); -} - -TMaybe<TIssue> TExecutor::EnsureBinaryArithmeticOpArgument(TPosition pos, const TJsonNodes& nodes, double& result) { - if (nodes.size() != 1) { - return MakeError(pos, TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT, "Expected exactly 1 item as an operand for binary operation"); - } - - const auto& value = nodes[0]; - if (!value.IsNumber()) { - return MakeError( - pos, TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT_TYPE, - TStringBuilder() << "Unsupported type for binary operations" - ); - } - - result = value.GetNumber(); - return Nothing(); -} - -TResult TExecutor::BinaryArithmeticOp(const TJsonPathItem& item) { - const auto& leftItem = Reader.ReadLeftOperand(item); - const auto leftResult = Execute(leftItem); - if (leftResult.IsError()) { - return leftResult; - } - - double left = 0; - TMaybe<TIssue> error = EnsureBinaryArithmeticOpArgument(leftItem.Pos, leftResult.GetNodes(), left); - if (error) { - return std::move(*error); - } - - const auto& rightItem = Reader.ReadRightOperand(item); - const auto rightResult = Execute(rightItem); - if (rightResult.IsError()) { - return rightResult; - } - - double right = 0; - error = EnsureBinaryArithmeticOpArgument(rightItem.Pos, rightResult.GetNodes(), right); - if (error) { - return std::move(*error); - } - - double result = 0; - switch (item.Type) { - case EJsonPathItemType::BinaryAdd: - result = left + right; - break; - case EJsonPathItemType::BinarySubstract: - result = left - right; - break; - case EJsonPathItemType::BinaryMultiply: - result = left * right; - break; - case EJsonPathItemType::BinaryDivide: - if (IsZero(right)) { - return MakeError(rightItem, TIssuesIds::JSONPATH_DIVISION_BY_ZERO, "Division by zero"); - } - result = left / right; - break; - case EJsonPathItemType::BinaryModulo: - if (IsZero(right)) { - return MakeError(rightItem, TIssuesIds::JSONPATH_DIVISION_BY_ZERO, "Division by zero"); - } - result = std::fmod(left, right); - break; - default: - YQL_ENSURE(false, "Expected binary arithmetic operation"); - } - - if (Y_UNLIKELY(std::isinf(result))) { - return MakeError(item, TIssuesIds::JSONPATH_BINARY_OPERATION_RESULT_INFINITY, "Binary operation result is infinity"); - } - - return TJsonNodes({TValue(MakeDouble(result))}); -} - -TMaybe<TIssue> TExecutor::EnsureLogicalOpArgument(TPosition pos, const TJsonNodes& nodes, TMaybe<bool>& result) { - if (nodes.size() != 1) { - return MakeError(pos, TIssuesIds::JSONPATH_INVALID_LOGICAL_OPERATION_ARGUMENT, "Expected exactly 1 item as an operand for logical operation"); - } - - const auto& value = nodes[0]; - if (value.IsNull()) { - result = Nothing(); - } else if (value.IsBool()) { - result = value.GetBool(); - } else { - return MakeError(pos, TIssuesIds::JSONPATH_INVALID_LOGICAL_OPERATION_ARGUMENT, "Unsupported type for logical operation"); - } - - return Nothing(); -} - -TResult TExecutor::BinaryLogicalOp(const TJsonPathItem& item) { - const auto& leftItem = Reader.ReadLeftOperand(item); - const auto leftResult = Execute(leftItem); - if (leftResult.IsError()) { - return leftResult; - } - - TMaybe<bool> left; - TMaybe<TIssue> error = EnsureLogicalOpArgument(leftItem.Pos, leftResult.GetNodes(), left); - if (error) { - return std::move(*error); - } - - const auto& rightItem = Reader.ReadRightOperand(item); - const auto rightResult = Execute(rightItem); - if (rightResult.IsError()) { - return rightResult; - } - - TMaybe<bool> right; - error = EnsureLogicalOpArgument(rightItem.Pos, rightResult.GetNodes(), right); - if (error) { - return std::move(*error); - } - - switch (item.Type) { - case EJsonPathItemType::BinaryAnd: { - /* - AND truth table (taken from SQL JSON standard) - - | && | true | false | null | - | ----- | ----- | ----- | ----- | - | true | true | false | null | - | false | false | false | false | - | null | null | false | null | - */ - if (left.Defined() && right.Defined()) { - return TJsonNodes({TValue(MakeBool(*left && *right))}); - } - - const bool falseVsNull = !left.GetOrElse(true) && !right.Defined(); - const bool nullVsFalse = !right.GetOrElse(true) && !left.Defined(); - if (falseVsNull || nullVsFalse) { - return TJsonNodes({TValue(MakeBool(false))}); - } - return TJsonNodes({TValue(MakeEntity())}); - } - case EJsonPathItemType::BinaryOr: { - /* - OR truth table (taken from SQL JSON standard) - - | || | true | false | null | - | ----- | ----- | ----- | ----- | - | true | true | true | true | - | false | true | false | null | - | null | true | null | null | - */ - if (left.Defined() && right.Defined()) { - return TJsonNodes({TValue(MakeBool(*left || *right))}); - } - - const bool trueVsNull = left.GetOrElse(false) && !right.Defined(); - const bool nullVsTrue = right.GetOrElse(false) && !left.Defined(); - if (trueVsNull || nullVsTrue) { - return TJsonNodes({TValue(MakeBool(true))}); - } - return TJsonNodes({TValue(MakeEntity())}); - } - default: - YQL_ENSURE(false, "Expected binary logical operation"); - } -} - -TResult TExecutor::UnaryLogicalOp(const TJsonPathItem& item) { - /* - NOT truth table (taken from SQL JSON standard) - - | x | !x | - | ----- | ----- | - | true | false | - | false | true | - | null | null | - */ - const auto& operandItem = Reader.ReadInput(item); - const auto operandResult = Execute(operandItem); - if (operandResult.IsError()) { - return operandResult; - } - - TMaybe<bool> operand; - TMaybe<TIssue> error = EnsureLogicalOpArgument(operandItem.Pos, operandResult.GetNodes(), operand); - if (error) { - return std::move(*error); - } - - if (!operand.Defined()) { - return TJsonNodes({TValue(MakeEntity())}); - } - - return TJsonNodes({TValue(MakeBool(!(*operand)))}); -} - -TResult TExecutor::BooleanLiteral(const TJsonPathItem& item) { - return TJsonNodes({TValue(MakeBool(item.GetBoolean()))}); -} - -TResult TExecutor::NullLiteral() { - return TJsonNodes({TValue(MakeEntity())}); -} - -TResult TExecutor::StringLiteral(const TJsonPathItem& item) { - return TJsonNodes({TValue(MakeString(item.GetString(), ValueBuilder))}); -} - -TMaybe<bool> TExecutor::CompareValues(const TValue& left, const TValue& right, EJsonPathItemType operation) { - if (IsObjectOrArray(left) || IsObjectOrArray(right)) { - // Comparisons of objects and arrays are prohibited - return Nothing(); - } - - if (left.IsNull() && right.IsNull()) { - // null == null is true, but all other comparisons are false - return operation == EJsonPathItemType::BinaryEqual; - } - - if (left.IsNull() || right.IsNull()) { - // All operations between null and non-null are false - return false; - } - - auto doCompare = [&operation](const auto& left, const auto& right) { - switch (operation) { - case EJsonPathItemType::BinaryEqual: - return left == right; - case EJsonPathItemType::BinaryNotEqual: - return left != right; - case EJsonPathItemType::BinaryLess: - return left < right; - case EJsonPathItemType::BinaryLessEqual: - return left <= right; - case EJsonPathItemType::BinaryGreater: - return left > right; - case EJsonPathItemType::BinaryGreaterEqual: - return left >= right; - default: - YQL_ENSURE(false, "Expected compare operation"); - } - }; - - if (left.IsBool() && right.IsBool()) { - return doCompare(left.GetBool(), right.GetBool()); - } else if (left.IsString() && right.IsString()) { - // NOTE: Strings are compared as byte arrays. - // YQL does the same thing for UTF-8 strings and according to SQL/JSON - // standard JsonPath must use the same semantics. - // - // However this is not correct in logical meaning. Let us consider strings: - // - U+00e9 (LATIN SMALL LETTER E WITH ACUTE), 'é' - // - U+0065 (LATIN SMALL LETTER E) U+0301 (COMBINING ACUTE ACCENT), `é` - // Even though these two strings are different byte sequences, they are identical - // from UTF-8 perspective. - return doCompare(left.GetString(), right.GetString()); - } - - if (!left.IsNumber() || !right.IsNumber()) { - return Nothing(); - } - - const auto leftNumber = left.GetNumber(); - const auto rightNumber = right.GetNumber(); - switch (operation) { - case EJsonPathItemType::BinaryEqual: - return IsEqual(leftNumber, rightNumber); - case EJsonPathItemType::BinaryNotEqual: - return !IsEqual(leftNumber, rightNumber); - case EJsonPathItemType::BinaryLess: - return IsLess(leftNumber, rightNumber); - case EJsonPathItemType::BinaryLessEqual: - return !IsGreater(leftNumber, rightNumber); - case EJsonPathItemType::BinaryGreater: - return IsGreater(leftNumber, rightNumber); - case EJsonPathItemType::BinaryGreaterEqual: - return !IsLess(leftNumber, rightNumber); - default: - YQL_ENSURE(false, "Expected compare operation"); - } -} - -TResult TExecutor::CompareOp(const TJsonPathItem& item) { - const auto& leftItem = Reader.ReadLeftOperand(item); - const auto leftResult = Execute(leftItem); - if (leftResult.IsError()) { - return TJsonNodes({TValue(MakeEntity())}); - } - - const auto& rightItem = Reader.ReadRightOperand(item); - const auto rightResult = Execute(rightItem); - if (rightResult.IsError()) { - return TJsonNodes({TValue(MakeEntity())}); - } - - const auto leftNodes = OptionalUnwrapArrays(leftResult.GetNodes()); - const auto rightNodes = OptionalUnwrapArrays(rightResult.GetNodes()); - bool error = false; - bool found = false; - for (const auto& left : leftNodes) { - for (const auto& right : rightNodes) { - const auto result = CompareValues(left, right, item.Type); - if (!result.Defined()) { - error = true; - } else { - found |= *result; - } - - if (IsLax() && (error || found)) { - break; - } - } - - if (IsLax() && (error || found)) { - break; - } - } - - if (error) { - return TJsonNodes({TValue(MakeEntity())}); - } - return TJsonNodes({TValue(MakeBool(found))}); -} - -TResult TExecutor::FilterObject(const TJsonPathItem& item) { - if (CurrentFilterObject.empty()) { - return MakeError(item, TIssuesIds::JSONPATH_FILTER_OBJECT_OUTSIDE_OF_FILTER, "'@' is only allowed inside filters"); - } - - return TJsonNodes({CurrentFilterObject.top()}); -} - -TResult TExecutor::FilterPredicate(const TJsonPathItem& item) { - const auto input = Execute(Reader.ReadInput(item)); - if (input.IsError()) { - return input; - } - const auto& predicateItem = Reader.ReadFilterPredicate(item); - TJsonNodes result; - for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { - CurrentFilterObject.push(node); - Y_DEFER { - CurrentFilterObject.pop(); - }; - - const auto predicateResult = Execute(predicateItem); - if (predicateResult.IsError()) { - continue; - } - - const auto& predicateNodes = predicateResult.GetNodes(); - if (predicateNodes.size() != 1) { - continue; - } - - const auto& value = predicateNodes[0]; - if (value.IsBool() && value.GetBool()) { - result.push_back(node); - continue; - } - } - return std::move(result); -} - -TResult TExecutor::NumericMethod(const TJsonPathItem& item) { - const auto& input = Execute(Reader.ReadInput(item)); - if (input.IsError()) { - return input; - } - TJsonNodes result; - for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { - if (!node.IsNumber()) { - return MakeError(item, TIssuesIds::JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT, "Unsupported type for numeric method"); - } - - double applied = node.GetNumber(); - switch (item.Type) { - case EJsonPathItemType::AbsMethod: - applied = std::fabs(applied); - break; - case EJsonPathItemType::FloorMethod: - applied = std::floor(applied); - break; - case EJsonPathItemType::CeilingMethod: - applied = std::ceil(applied); - break; - default: - YQL_ENSURE(false, "Expected numeric method"); - } - result.push_back(TValue(MakeDouble(applied))); - } - return std::move(result); -} - -TResult TExecutor::DoubleMethod(const TJsonPathItem& item) { - const auto& input = Execute(Reader.ReadInput(item)); - if (input.IsError()) { - return input; - } - TJsonNodes result; - for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { - if (!node.IsString()) { - return MakeError(item, TIssuesIds::JSONPATH_INVALID_DOUBLE_METHOD_ARGUMENT, "Unsupported type for double() method"); - } - - const double parsed = ParseDouble(node.GetString()); - if (std::isnan(parsed)) { - return MakeError(item, TIssuesIds::JSONPATH_INVALID_NUMBER_STRING, "Error parsing number from string"); - } - - if (std::isinf(parsed)) { - return MakeError(item, TIssuesIds::JSONPATH_INFINITE_NUMBER_STRING, "Parsed number is infinity"); - } - - result.push_back(TValue(MakeDouble(parsed))); - } - return std::move(result); -} - -TResult TExecutor::TypeMethod(const TJsonPathItem& item) { - const auto& input = Execute(Reader.ReadInput(item)); - if (input.IsError()) { - return input; - } - TJsonNodes result; - for (const auto& node : input.GetNodes()) { - TStringBuf type; - switch (node.GetType()) { - case EValueType::Null: - type = "null"; - break; - case EValueType::Bool: - type = "boolean"; - break; - case EValueType::Number: - type = "number"; - break; - case EValueType::String: - type = "string"; - break; - case EValueType::Array: - type = "array"; - break; - case EValueType::Object: - type = "object"; - break; - } - result.push_back(TValue(MakeString(type, ValueBuilder))); - } - return std::move(result); -} - -TResult TExecutor::SizeMethod(const TJsonPathItem& item) { - const auto& input = Execute(Reader.ReadInput(item)); - if (input.IsError()) { - return input; - } - TJsonNodes result; - for (const auto& node : input.GetNodes()) { - ui64 size = 1; - if (node.IsArray()) { - size = node.GetSize(); - } - result.push_back(TValue(MakeDouble(static_cast<double>(size)))); - } - return std::move(result); -} - -TResult TExecutor::KeyValueMethod(const TJsonPathItem& item) { - const auto& input = Execute(Reader.ReadInput(item)); - if (input.IsError()) { - return input; - } - TJsonNodes result; - TPair row[2]; - TPair& nameEntry = row[0]; - TPair& valueEntry = row[1]; - for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { - if (!node.IsObject()) { - return MakeError(item, TIssuesIds::JSONPATH_INVALID_KEYVALUE_METHOD_ARGUMENT, "Unsupported type for keyvalue() method"); - } - - TValue key; - TValue value; - auto it = node.GetObjectIterator(); - while (it.Next(key, value)) { - nameEntry.first = MakeString("name", ValueBuilder); - nameEntry.second = key.ConvertToUnboxedValue(ValueBuilder); - - valueEntry.first = MakeString("value", ValueBuilder); - valueEntry.second = value.ConvertToUnboxedValue(ValueBuilder); - - result.push_back(TValue(MakeDict(row, 2))); - } - } - return std::move(result); -} - -TResult TExecutor::StartsWithPredicate(const TJsonPathItem& item) { - const auto& input = Execute(Reader.ReadInput(item)); - if (input.IsError()) { - return input; - } - - const auto& inputNodes = input.GetNodes(); - if (inputNodes.size() != 1) { - return MakeError(item, TIssuesIds::JSONPATH_INVALID_STARTS_WITH_ARGUMENT, "Expected exactly 1 item as input argument for starts with predicate"); - } - - const auto& inputString = inputNodes[0]; - if (!inputString.IsString()) { - return MakeError(item, TIssuesIds::JSONPATH_INVALID_STARTS_WITH_ARGUMENT, "Type of input argument for starts with predicate must be string"); - } - - const auto prefix = Execute(Reader.ReadPrefix(item)); - if (prefix.IsError()) { - return prefix; - } - - bool error = false; - bool found = false; - for (const auto& node : prefix.GetNodes()) { - if (node.IsString()) { - found |= inputString.GetString().StartsWith(node.GetString()); - } else { - error = true; - } - - if (IsLax() && (found || error)) { - break; - } - } - - if (error) { - return TJsonNodes({TValue(MakeEntity())}); - } - return TJsonNodes({TValue(MakeBool(found))}); -} - -TResult TExecutor::IsUnknownPredicate(const TJsonPathItem& item) { - const auto input = Execute(Reader.ReadInput(item)); - if (input.IsError()) { - return input; - } - - const auto& nodes = input.GetNodes(); - if (nodes.size() != 1) { - return MakeError(item, TIssuesIds::JSONPATH_INVALID_IS_UNKNOWN_ARGUMENT, "Expected exactly 1 item as an argument for is unknown predicate"); - } - - const auto& node = nodes[0]; - if (node.IsNull()) { - return TJsonNodes({TValue(MakeBool(true))}); - } - - if (!node.IsBool()) { - return MakeError(item, TIssuesIds::JSONPATH_INVALID_IS_UNKNOWN_ARGUMENT, "is unknown predicate supports only bool and null types for its argument"); - } - return TJsonNodes({TValue(MakeBool(false))}); -} - -TResult TExecutor::ExistsPredicate(const TJsonPathItem& item) { - const auto input = Execute(Reader.ReadInput(item)); - if (input.IsError()) { - return TJsonNodes({TValue(MakeEntity())}); - } - - const auto& nodes = input.GetNodes(); - return TJsonNodes({TValue(MakeBool(!nodes.empty()))}); -} - -TResult TExecutor::LikeRegexPredicate(const TJsonPathItem& item) { - const auto input = Execute(Reader.ReadInput(item)); - if (input.IsError()) { - return input; - } - - const auto& regex = item.GetRegex(); - bool error = false; - bool found = false; - for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { - if (node.IsString()) { - found |= Matches(regex.Regex, regex.Scratch, node.GetString()); - } else { - error = true; - } - - if (IsLax() && (found || error)) { - break; - } - } - - if (error) { - return TJsonNodes({TValue(MakeEntity())}); - } - return TJsonNodes({TValue(MakeBool(found))}); -} - -TJsonNodes TExecutor::OptionalUnwrapArrays(const TJsonNodes& input) { - if (IsStrict()) { - return input; - } - - TJsonNodes result; - for (const auto& node : input) { - if (!node.IsArray()) { - result.push_back(node); - continue; - } - - auto it = node.GetArrayIterator(); - TValue value; - while (it.Next(value)) { - result.push_back(value); - } - } - return result; -} - -TJsonNodes TExecutor::OptionalArrayWrapNodes(const TJsonNodes& input) { - if (IsStrict()) { - return input; - } - - TJsonNodes result; - for (const auto& node : input) { - if (node.IsArray()) { - result.push_back(node); - continue; - } - - TUnboxedValue nodeCopy(node.ConvertToUnboxedValue(ValueBuilder)); - result.push_back(TValue(MakeList(&nodeCopy, 1, ValueBuilder))); - } - return result; -} - + +#include <library/cpp/regex/hyperscan/hyperscan.h> + +#include <util/generic/scope.h> +#include <util/generic/maybe.h> +#include <util/system/compiler.h> + +#include <cmath> + +namespace NYql::NJsonPath { + +using namespace NJson; +using namespace NUdf; +using namespace NDom; +using namespace NHyperscan; + +namespace { + +bool IsObjectOrArray(const TValue& value) { + return value.IsArray() || value.IsObject(); +} + +TIssue MakeError(TPosition pos, TIssueCode code, const TStringBuf message) { + TIssue error(pos, message); + error.SetCode(code, TSeverityIds::S_ERROR); + return error; +} + +TIssue MakeError(const TJsonPathItem& item, TIssueCode code, const TStringBuf message) { + return MakeError(item.Pos, code, message); +} + +} + +TResult::TResult(TJsonNodes&& nodes) + : Result(std::move(nodes)) +{ +} + +TResult::TResult(const TJsonNodes& nodes) + : Result(nodes) +{ +} + +TResult::TResult(TIssue&& issue) + : Result(std::move(issue)) +{ +} + +const TJsonNodes& TResult::GetNodes() const { + return std::get<TJsonNodes>(Result); +} + +TJsonNodes& TResult::GetNodes() { + return std::get<TJsonNodes>(Result); +} + +const TIssue& TResult::GetError() const { + return std::get<TIssue>(Result); +} + +bool TResult::IsError() const { + return std::holds_alternative<TIssue>(Result); +} + +TExecutor::TExecutor( + const TJsonPathPtr path, + const TJsonNodes& input, + const TVariablesMap& variables, + const IValueBuilder* valueBuilder) + : Reader(path) + , Input(input) + , Variables(variables) + , ValueBuilder(valueBuilder) +{ +} + +bool TExecutor::IsZero(double value) { + return -EPSILON <= value && value <= EPSILON; +} + +bool TExecutor::IsLess(double a, double b) { + return (b - a) > EPSILON; +} + +bool TExecutor::IsGreater(double a, double b) { + return (a - b) > EPSILON; +} + +bool TExecutor::IsEqual(double a, double b) { + return IsZero(a - b); +} + +bool TExecutor::IsStrict() const { + return Reader.GetMode() == EJsonPathMode::Strict; +} + +bool TExecutor::IsLax() const { + return Reader.GetMode() == EJsonPathMode::Lax; +} + +TResult TExecutor::Execute() { + return Execute(Reader.ReadFirst()); +} + +TResult TExecutor::Execute(const TJsonPathItem& item) { + switch (item.Type) { + case EJsonPathItemType::MemberAccess: + return MemberAccess(item); + case EJsonPathItemType::WildcardMemberAccess: + return WildcardMemberAccess(item); + case EJsonPathItemType::ContextObject: + return ContextObject(); + case EJsonPathItemType::Variable: + return Variable(item); + case EJsonPathItemType::NumberLiteral: + return NumberLiteral(item); + case EJsonPathItemType::ArrayAccess: + return ArrayAccess(item); + case EJsonPathItemType::WildcardArrayAccess: + return WildcardArrayAccess(item); + case EJsonPathItemType::LastArrayIndex: + return LastArrayIndex(item); + case EJsonPathItemType::UnaryMinus: + case EJsonPathItemType::UnaryPlus: + return UnaryArithmeticOp(item); + case EJsonPathItemType::BinaryAdd: + case EJsonPathItemType::BinarySubstract: + case EJsonPathItemType::BinaryMultiply: + case EJsonPathItemType::BinaryDivide: + case EJsonPathItemType::BinaryModulo: + return BinaryArithmeticOp(item); + case EJsonPathItemType::BinaryAnd: + case EJsonPathItemType::BinaryOr: + return BinaryLogicalOp(item); + case EJsonPathItemType::UnaryNot: + return UnaryLogicalOp(item); + case EJsonPathItemType::BooleanLiteral: + return BooleanLiteral(item); + case EJsonPathItemType::NullLiteral: + return NullLiteral(); + case EJsonPathItemType::StringLiteral: + return StringLiteral(item); + case EJsonPathItemType::FilterObject: + return FilterObject(item); + case EJsonPathItemType::FilterPredicate: + return FilterPredicate(item); + case EJsonPathItemType::BinaryLess: + case EJsonPathItemType::BinaryLessEqual: + case EJsonPathItemType::BinaryGreater: + case EJsonPathItemType::BinaryGreaterEqual: + case EJsonPathItemType::BinaryEqual: + case EJsonPathItemType::BinaryNotEqual: + return CompareOp(item); + case EJsonPathItemType::AbsMethod: + case EJsonPathItemType::FloorMethod: + case EJsonPathItemType::CeilingMethod: + return NumericMethod(item); + case EJsonPathItemType::DoubleMethod: + return DoubleMethod(item); + case EJsonPathItemType::TypeMethod: + return TypeMethod(item); + case EJsonPathItemType::SizeMethod: + return SizeMethod(item); + case EJsonPathItemType::KeyValueMethod: + return KeyValueMethod(item); + case EJsonPathItemType::StartsWithPredicate: + return StartsWithPredicate(item); + case EJsonPathItemType::IsUnknownPredicate: + return IsUnknownPredicate(item); + case EJsonPathItemType::ExistsPredicate: + return ExistsPredicate(item); + case EJsonPathItemType::LikeRegexPredicate: + return LikeRegexPredicate(item); + } +} + +TResult TExecutor::ContextObject() { + return Input; +} + +TResult TExecutor::Variable(const TJsonPathItem& item) { + const auto it = Variables.find(item.GetString()); + if (it == Variables.end()) { + return MakeError(item, TIssuesIds::JSONPATH_UNDEFINED_VARIABLE, TStringBuilder() << "Undefined variable '" << item.GetString() << "'"); + } + + return TJsonNodes({it->second}); +} + +TResult TExecutor::LastArrayIndex(const TJsonPathItem& item) { + if (ArraySubscriptSource.empty()) { + return MakeError(item, TIssuesIds::JSONPATH_LAST_OUTSIDE_OF_ARRAY_SUBSCRIPT, "'last' is only allowed inside array subscripts"); + } + + const auto& array = ArraySubscriptSource.top(); + const i64 arraySize = array.GetSize(); + + // NOTE: For empty arrays `last` equals `-1`. This is intended, PostgreSQL 12 has the same behaviour + return TJsonNodes({TValue(MakeDouble(static_cast<double>(arraySize - 1)))}); +} + +TResult TExecutor::NumberLiteral(const TJsonPathItem& item) { + return TJsonNodes({TValue(MakeDouble(item.GetNumber()))}); +} + +TResult TExecutor::MemberAccess(const TJsonPathItem& item) { + const auto input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + TJsonNodes result; + for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { + if (!node.IsObject()) { + if (IsStrict()) { + return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_OBJECT, "Expected object"); + } else { + continue; + } + } + + if (const auto payload = node.Lookup(item.GetString())) { + result.push_back(*payload); + continue; + } + + if (IsStrict()) { + return MakeError(item, TIssuesIds::JSONPATH_MEMBER_NOT_FOUND, "Member not found"); + } + } + + return std::move(result); +} + +TResult TExecutor::WildcardMemberAccess(const TJsonPathItem& item) { + const auto input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + TJsonNodes result; + for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { + if (!node.IsObject()) { + if (IsStrict()) { + return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_OBJECT, "Expected object"); + } else { + continue; + } + } + + TValue key; + TValue value; + auto it = node.GetObjectIterator(); + while (it.Next(key, value)) { + result.push_back(value); + } + } + + return std::move(result); +} + +TMaybe<TIssue> TExecutor::EnsureSingleSubscript(TPosition pos, const TJsonNodes& index, i64& result) { + if (index.size() != 1) { + return MakeError(pos, TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX, "Expected single number item for array index"); + } + + const auto& indexValue = index[0]; + if (!indexValue.IsNumber()) { + return MakeError(pos, TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX, "Array index must be number"); + } + + result = static_cast<i64>(std::floor(indexValue.GetNumber())); + return Nothing(); +} + +TMaybe<TIssue> TExecutor::EnsureArraySubscripts(const TJsonPathItem& item, TVector<TArraySubscript>& result) { + for (const auto& subscript : item.GetSubscripts()) { + const auto& fromItem = Reader.ReadFromSubscript(subscript); + const auto fromResult = Execute(fromItem); + if (fromResult.IsError()) { + return fromResult.GetError(); + } + + i64 fromIndex = 0; + TMaybe<TIssue> error = EnsureSingleSubscript(fromItem.Pos, fromResult.GetNodes(), fromIndex); + if (error) { + return error; + } + + if (!subscript.IsRange()) { + result.emplace_back(fromIndex, fromItem.Pos); + continue; + } + + const auto& toItem = Reader.ReadToSubscript(subscript); + const auto toResult = Execute(toItem); + if (toResult.IsError()) { + return toResult.GetError(); + } + + i64 toIndex = 0; + error = EnsureSingleSubscript(toItem.Pos, toResult.GetNodes(), toIndex); + if (error) { + return error; + } + + result.emplace_back(fromIndex, fromItem.Pos, toIndex, toItem.Pos); + } + return Nothing(); +} + +TResult TExecutor::ArrayAccess(const TJsonPathItem& item) { + const auto input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + TJsonNodes result; + for (const auto& node : OptionalArrayWrapNodes(input.GetNodes())) { + if (!node.IsArray()) { + return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_ARRAY, "Expected array"); + } + + ArraySubscriptSource.push(node); + Y_DEFER { + ArraySubscriptSource.pop(); + }; + + // Check for "hard" errors in array subscripts. These are forbidden even in lax mode + // NOTE: We intentionally execute subscripts expressions for each array in the input + // because they can contain `last` keyword which value is different for each array + TVector<TArraySubscript> subscripts; + TMaybe<TIssue> error = EnsureArraySubscripts(item, subscripts); + if (error) { + return std::move(*error); + } + + const ui64 arraySize = node.GetSize(); + for (const auto& idx : subscripts) { + // Check bounds for first subscript + if (idx.GetFrom() < 0 || idx.GetFrom() >= static_cast<i64>(arraySize)) { + if (IsStrict()) { + return MakeError(idx.GetFromPos(), TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS, "Array index out of bounds"); + } else { + continue; + } + } + + // If there is no second subcripts, just return corresponding array element + if (!idx.IsRange()) { + result.push_back(node.GetElement(idx.GetFrom())); + continue; + } + + // Check bounds for second subscript + if (idx.GetTo() < 0 || idx.GetTo() >= static_cast<i64>(arraySize)) { + if (IsStrict()) { + return MakeError(idx.GetToPos(), TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS, "Array index out of bounds"); + } else { + continue; + } + } + + // In strict mode invalid ranges are forbidden + if (idx.GetFrom() > idx.GetTo() && IsStrict()) { + return MakeError(idx.GetFromPos(), TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX_RANGE, "Range lower bound is greater than upper bound"); + } + + for (i64 i = idx.GetFrom(); i <= idx.GetTo(); i++) { + result.push_back(node.GetElement(i)); + } + } + } + return std::move(result); +} + +TResult TExecutor::WildcardArrayAccess(const TJsonPathItem& item) { + const auto input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + TJsonNodes result; + for (const auto& node : OptionalArrayWrapNodes(input.GetNodes())) { + if (!node.IsArray()) { + return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_ARRAY, "Expected array"); + } + + auto it = node.GetArrayIterator(); + TValue value; + while (it.Next(value)) { + result.push_back(value); + } + } + return std::move(result); +} + +TResult TExecutor::UnaryArithmeticOp(const TJsonPathItem& item) { + const auto& operandItem = Reader.ReadInput(item); + const auto operandsResult = Execute(operandItem); + if (operandsResult.IsError()) { + return operandsResult; + } + + const auto& operands = operandsResult.GetNodes(); + TJsonNodes result; + result.reserve(operands.size()); + for (const auto& operand : operands) { + if (!operand.IsNumber()) { + return MakeError( + operandItem, TIssuesIds::JSONPATH_INVALID_UNARY_OPERATION_ARGUMENT_TYPE, + TStringBuilder() << "Unsupported type for unary operations" + ); + } + + if (item.Type == EJsonPathItemType::UnaryPlus) { + result.push_back(operand); + continue; + } + + const auto value = operand.GetNumber(); + result.push_back(TValue(MakeDouble(-value))); + } + + return std::move(result); +} + +TMaybe<TIssue> TExecutor::EnsureBinaryArithmeticOpArgument(TPosition pos, const TJsonNodes& nodes, double& result) { + if (nodes.size() != 1) { + return MakeError(pos, TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT, "Expected exactly 1 item as an operand for binary operation"); + } + + const auto& value = nodes[0]; + if (!value.IsNumber()) { + return MakeError( + pos, TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT_TYPE, + TStringBuilder() << "Unsupported type for binary operations" + ); + } + + result = value.GetNumber(); + return Nothing(); +} + +TResult TExecutor::BinaryArithmeticOp(const TJsonPathItem& item) { + const auto& leftItem = Reader.ReadLeftOperand(item); + const auto leftResult = Execute(leftItem); + if (leftResult.IsError()) { + return leftResult; + } + + double left = 0; + TMaybe<TIssue> error = EnsureBinaryArithmeticOpArgument(leftItem.Pos, leftResult.GetNodes(), left); + if (error) { + return std::move(*error); + } + + const auto& rightItem = Reader.ReadRightOperand(item); + const auto rightResult = Execute(rightItem); + if (rightResult.IsError()) { + return rightResult; + } + + double right = 0; + error = EnsureBinaryArithmeticOpArgument(rightItem.Pos, rightResult.GetNodes(), right); + if (error) { + return std::move(*error); + } + + double result = 0; + switch (item.Type) { + case EJsonPathItemType::BinaryAdd: + result = left + right; + break; + case EJsonPathItemType::BinarySubstract: + result = left - right; + break; + case EJsonPathItemType::BinaryMultiply: + result = left * right; + break; + case EJsonPathItemType::BinaryDivide: + if (IsZero(right)) { + return MakeError(rightItem, TIssuesIds::JSONPATH_DIVISION_BY_ZERO, "Division by zero"); + } + result = left / right; + break; + case EJsonPathItemType::BinaryModulo: + if (IsZero(right)) { + return MakeError(rightItem, TIssuesIds::JSONPATH_DIVISION_BY_ZERO, "Division by zero"); + } + result = std::fmod(left, right); + break; + default: + YQL_ENSURE(false, "Expected binary arithmetic operation"); + } + + if (Y_UNLIKELY(std::isinf(result))) { + return MakeError(item, TIssuesIds::JSONPATH_BINARY_OPERATION_RESULT_INFINITY, "Binary operation result is infinity"); + } + + return TJsonNodes({TValue(MakeDouble(result))}); +} + +TMaybe<TIssue> TExecutor::EnsureLogicalOpArgument(TPosition pos, const TJsonNodes& nodes, TMaybe<bool>& result) { + if (nodes.size() != 1) { + return MakeError(pos, TIssuesIds::JSONPATH_INVALID_LOGICAL_OPERATION_ARGUMENT, "Expected exactly 1 item as an operand for logical operation"); + } + + const auto& value = nodes[0]; + if (value.IsNull()) { + result = Nothing(); + } else if (value.IsBool()) { + result = value.GetBool(); + } else { + return MakeError(pos, TIssuesIds::JSONPATH_INVALID_LOGICAL_OPERATION_ARGUMENT, "Unsupported type for logical operation"); + } + + return Nothing(); +} + +TResult TExecutor::BinaryLogicalOp(const TJsonPathItem& item) { + const auto& leftItem = Reader.ReadLeftOperand(item); + const auto leftResult = Execute(leftItem); + if (leftResult.IsError()) { + return leftResult; + } + + TMaybe<bool> left; + TMaybe<TIssue> error = EnsureLogicalOpArgument(leftItem.Pos, leftResult.GetNodes(), left); + if (error) { + return std::move(*error); + } + + const auto& rightItem = Reader.ReadRightOperand(item); + const auto rightResult = Execute(rightItem); + if (rightResult.IsError()) { + return rightResult; + } + + TMaybe<bool> right; + error = EnsureLogicalOpArgument(rightItem.Pos, rightResult.GetNodes(), right); + if (error) { + return std::move(*error); + } + + switch (item.Type) { + case EJsonPathItemType::BinaryAnd: { + /* + AND truth table (taken from SQL JSON standard) + + | && | true | false | null | + | ----- | ----- | ----- | ----- | + | true | true | false | null | + | false | false | false | false | + | null | null | false | null | + */ + if (left.Defined() && right.Defined()) { + return TJsonNodes({TValue(MakeBool(*left && *right))}); + } + + const bool falseVsNull = !left.GetOrElse(true) && !right.Defined(); + const bool nullVsFalse = !right.GetOrElse(true) && !left.Defined(); + if (falseVsNull || nullVsFalse) { + return TJsonNodes({TValue(MakeBool(false))}); + } + return TJsonNodes({TValue(MakeEntity())}); + } + case EJsonPathItemType::BinaryOr: { + /* + OR truth table (taken from SQL JSON standard) + + | || | true | false | null | + | ----- | ----- | ----- | ----- | + | true | true | true | true | + | false | true | false | null | + | null | true | null | null | + */ + if (left.Defined() && right.Defined()) { + return TJsonNodes({TValue(MakeBool(*left || *right))}); + } + + const bool trueVsNull = left.GetOrElse(false) && !right.Defined(); + const bool nullVsTrue = right.GetOrElse(false) && !left.Defined(); + if (trueVsNull || nullVsTrue) { + return TJsonNodes({TValue(MakeBool(true))}); + } + return TJsonNodes({TValue(MakeEntity())}); + } + default: + YQL_ENSURE(false, "Expected binary logical operation"); + } +} + +TResult TExecutor::UnaryLogicalOp(const TJsonPathItem& item) { + /* + NOT truth table (taken from SQL JSON standard) + + | x | !x | + | ----- | ----- | + | true | false | + | false | true | + | null | null | + */ + const auto& operandItem = Reader.ReadInput(item); + const auto operandResult = Execute(operandItem); + if (operandResult.IsError()) { + return operandResult; + } + + TMaybe<bool> operand; + TMaybe<TIssue> error = EnsureLogicalOpArgument(operandItem.Pos, operandResult.GetNodes(), operand); + if (error) { + return std::move(*error); + } + + if (!operand.Defined()) { + return TJsonNodes({TValue(MakeEntity())}); + } + + return TJsonNodes({TValue(MakeBool(!(*operand)))}); +} + +TResult TExecutor::BooleanLiteral(const TJsonPathItem& item) { + return TJsonNodes({TValue(MakeBool(item.GetBoolean()))}); +} + +TResult TExecutor::NullLiteral() { + return TJsonNodes({TValue(MakeEntity())}); +} + +TResult TExecutor::StringLiteral(const TJsonPathItem& item) { + return TJsonNodes({TValue(MakeString(item.GetString(), ValueBuilder))}); +} + +TMaybe<bool> TExecutor::CompareValues(const TValue& left, const TValue& right, EJsonPathItemType operation) { + if (IsObjectOrArray(left) || IsObjectOrArray(right)) { + // Comparisons of objects and arrays are prohibited + return Nothing(); + } + + if (left.IsNull() && right.IsNull()) { + // null == null is true, but all other comparisons are false + return operation == EJsonPathItemType::BinaryEqual; + } + + if (left.IsNull() || right.IsNull()) { + // All operations between null and non-null are false + return false; + } + + auto doCompare = [&operation](const auto& left, const auto& right) { + switch (operation) { + case EJsonPathItemType::BinaryEqual: + return left == right; + case EJsonPathItemType::BinaryNotEqual: + return left != right; + case EJsonPathItemType::BinaryLess: + return left < right; + case EJsonPathItemType::BinaryLessEqual: + return left <= right; + case EJsonPathItemType::BinaryGreater: + return left > right; + case EJsonPathItemType::BinaryGreaterEqual: + return left >= right; + default: + YQL_ENSURE(false, "Expected compare operation"); + } + }; + + if (left.IsBool() && right.IsBool()) { + return doCompare(left.GetBool(), right.GetBool()); + } else if (left.IsString() && right.IsString()) { + // NOTE: Strings are compared as byte arrays. + // YQL does the same thing for UTF-8 strings and according to SQL/JSON + // standard JsonPath must use the same semantics. + // + // However this is not correct in logical meaning. Let us consider strings: + // - U+00e9 (LATIN SMALL LETTER E WITH ACUTE), 'é' + // - U+0065 (LATIN SMALL LETTER E) U+0301 (COMBINING ACUTE ACCENT), `é` + // Even though these two strings are different byte sequences, they are identical + // from UTF-8 perspective. + return doCompare(left.GetString(), right.GetString()); + } + + if (!left.IsNumber() || !right.IsNumber()) { + return Nothing(); + } + + const auto leftNumber = left.GetNumber(); + const auto rightNumber = right.GetNumber(); + switch (operation) { + case EJsonPathItemType::BinaryEqual: + return IsEqual(leftNumber, rightNumber); + case EJsonPathItemType::BinaryNotEqual: + return !IsEqual(leftNumber, rightNumber); + case EJsonPathItemType::BinaryLess: + return IsLess(leftNumber, rightNumber); + case EJsonPathItemType::BinaryLessEqual: + return !IsGreater(leftNumber, rightNumber); + case EJsonPathItemType::BinaryGreater: + return IsGreater(leftNumber, rightNumber); + case EJsonPathItemType::BinaryGreaterEqual: + return !IsLess(leftNumber, rightNumber); + default: + YQL_ENSURE(false, "Expected compare operation"); + } +} + +TResult TExecutor::CompareOp(const TJsonPathItem& item) { + const auto& leftItem = Reader.ReadLeftOperand(item); + const auto leftResult = Execute(leftItem); + if (leftResult.IsError()) { + return TJsonNodes({TValue(MakeEntity())}); + } + + const auto& rightItem = Reader.ReadRightOperand(item); + const auto rightResult = Execute(rightItem); + if (rightResult.IsError()) { + return TJsonNodes({TValue(MakeEntity())}); + } + + const auto leftNodes = OptionalUnwrapArrays(leftResult.GetNodes()); + const auto rightNodes = OptionalUnwrapArrays(rightResult.GetNodes()); + bool error = false; + bool found = false; + for (const auto& left : leftNodes) { + for (const auto& right : rightNodes) { + const auto result = CompareValues(left, right, item.Type); + if (!result.Defined()) { + error = true; + } else { + found |= *result; + } + + if (IsLax() && (error || found)) { + break; + } + } + + if (IsLax() && (error || found)) { + break; + } + } + + if (error) { + return TJsonNodes({TValue(MakeEntity())}); + } + return TJsonNodes({TValue(MakeBool(found))}); +} + +TResult TExecutor::FilterObject(const TJsonPathItem& item) { + if (CurrentFilterObject.empty()) { + return MakeError(item, TIssuesIds::JSONPATH_FILTER_OBJECT_OUTSIDE_OF_FILTER, "'@' is only allowed inside filters"); + } + + return TJsonNodes({CurrentFilterObject.top()}); +} + +TResult TExecutor::FilterPredicate(const TJsonPathItem& item) { + const auto input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + const auto& predicateItem = Reader.ReadFilterPredicate(item); + TJsonNodes result; + for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { + CurrentFilterObject.push(node); + Y_DEFER { + CurrentFilterObject.pop(); + }; + + const auto predicateResult = Execute(predicateItem); + if (predicateResult.IsError()) { + continue; + } + + const auto& predicateNodes = predicateResult.GetNodes(); + if (predicateNodes.size() != 1) { + continue; + } + + const auto& value = predicateNodes[0]; + if (value.IsBool() && value.GetBool()) { + result.push_back(node); + continue; + } + } + return std::move(result); +} + +TResult TExecutor::NumericMethod(const TJsonPathItem& item) { + const auto& input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + TJsonNodes result; + for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { + if (!node.IsNumber()) { + return MakeError(item, TIssuesIds::JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT, "Unsupported type for numeric method"); + } + + double applied = node.GetNumber(); + switch (item.Type) { + case EJsonPathItemType::AbsMethod: + applied = std::fabs(applied); + break; + case EJsonPathItemType::FloorMethod: + applied = std::floor(applied); + break; + case EJsonPathItemType::CeilingMethod: + applied = std::ceil(applied); + break; + default: + YQL_ENSURE(false, "Expected numeric method"); + } + result.push_back(TValue(MakeDouble(applied))); + } + return std::move(result); +} + +TResult TExecutor::DoubleMethod(const TJsonPathItem& item) { + const auto& input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + TJsonNodes result; + for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { + if (!node.IsString()) { + return MakeError(item, TIssuesIds::JSONPATH_INVALID_DOUBLE_METHOD_ARGUMENT, "Unsupported type for double() method"); + } + + const double parsed = ParseDouble(node.GetString()); + if (std::isnan(parsed)) { + return MakeError(item, TIssuesIds::JSONPATH_INVALID_NUMBER_STRING, "Error parsing number from string"); + } + + if (std::isinf(parsed)) { + return MakeError(item, TIssuesIds::JSONPATH_INFINITE_NUMBER_STRING, "Parsed number is infinity"); + } + + result.push_back(TValue(MakeDouble(parsed))); + } + return std::move(result); +} + +TResult TExecutor::TypeMethod(const TJsonPathItem& item) { + const auto& input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + TJsonNodes result; + for (const auto& node : input.GetNodes()) { + TStringBuf type; + switch (node.GetType()) { + case EValueType::Null: + type = "null"; + break; + case EValueType::Bool: + type = "boolean"; + break; + case EValueType::Number: + type = "number"; + break; + case EValueType::String: + type = "string"; + break; + case EValueType::Array: + type = "array"; + break; + case EValueType::Object: + type = "object"; + break; + } + result.push_back(TValue(MakeString(type, ValueBuilder))); + } + return std::move(result); +} + +TResult TExecutor::SizeMethod(const TJsonPathItem& item) { + const auto& input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + TJsonNodes result; + for (const auto& node : input.GetNodes()) { + ui64 size = 1; + if (node.IsArray()) { + size = node.GetSize(); + } + result.push_back(TValue(MakeDouble(static_cast<double>(size)))); + } + return std::move(result); +} + +TResult TExecutor::KeyValueMethod(const TJsonPathItem& item) { + const auto& input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + TJsonNodes result; + TPair row[2]; + TPair& nameEntry = row[0]; + TPair& valueEntry = row[1]; + for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { + if (!node.IsObject()) { + return MakeError(item, TIssuesIds::JSONPATH_INVALID_KEYVALUE_METHOD_ARGUMENT, "Unsupported type for keyvalue() method"); + } + + TValue key; + TValue value; + auto it = node.GetObjectIterator(); + while (it.Next(key, value)) { + nameEntry.first = MakeString("name", ValueBuilder); + nameEntry.second = key.ConvertToUnboxedValue(ValueBuilder); + + valueEntry.first = MakeString("value", ValueBuilder); + valueEntry.second = value.ConvertToUnboxedValue(ValueBuilder); + + result.push_back(TValue(MakeDict(row, 2))); + } + } + return std::move(result); +} + +TResult TExecutor::StartsWithPredicate(const TJsonPathItem& item) { + const auto& input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + + const auto& inputNodes = input.GetNodes(); + if (inputNodes.size() != 1) { + return MakeError(item, TIssuesIds::JSONPATH_INVALID_STARTS_WITH_ARGUMENT, "Expected exactly 1 item as input argument for starts with predicate"); + } + + const auto& inputString = inputNodes[0]; + if (!inputString.IsString()) { + return MakeError(item, TIssuesIds::JSONPATH_INVALID_STARTS_WITH_ARGUMENT, "Type of input argument for starts with predicate must be string"); + } + + const auto prefix = Execute(Reader.ReadPrefix(item)); + if (prefix.IsError()) { + return prefix; + } + + bool error = false; + bool found = false; + for (const auto& node : prefix.GetNodes()) { + if (node.IsString()) { + found |= inputString.GetString().StartsWith(node.GetString()); + } else { + error = true; + } + + if (IsLax() && (found || error)) { + break; + } + } + + if (error) { + return TJsonNodes({TValue(MakeEntity())}); + } + return TJsonNodes({TValue(MakeBool(found))}); +} + +TResult TExecutor::IsUnknownPredicate(const TJsonPathItem& item) { + const auto input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + + const auto& nodes = input.GetNodes(); + if (nodes.size() != 1) { + return MakeError(item, TIssuesIds::JSONPATH_INVALID_IS_UNKNOWN_ARGUMENT, "Expected exactly 1 item as an argument for is unknown predicate"); + } + + const auto& node = nodes[0]; + if (node.IsNull()) { + return TJsonNodes({TValue(MakeBool(true))}); + } + + if (!node.IsBool()) { + return MakeError(item, TIssuesIds::JSONPATH_INVALID_IS_UNKNOWN_ARGUMENT, "is unknown predicate supports only bool and null types for its argument"); + } + return TJsonNodes({TValue(MakeBool(false))}); +} + +TResult TExecutor::ExistsPredicate(const TJsonPathItem& item) { + const auto input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return TJsonNodes({TValue(MakeEntity())}); + } + + const auto& nodes = input.GetNodes(); + return TJsonNodes({TValue(MakeBool(!nodes.empty()))}); +} + +TResult TExecutor::LikeRegexPredicate(const TJsonPathItem& item) { + const auto input = Execute(Reader.ReadInput(item)); + if (input.IsError()) { + return input; + } + + const auto& regex = item.GetRegex(); + bool error = false; + bool found = false; + for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) { + if (node.IsString()) { + found |= Matches(regex.Regex, regex.Scratch, node.GetString()); + } else { + error = true; + } + + if (IsLax() && (found || error)) { + break; + } + } + + if (error) { + return TJsonNodes({TValue(MakeEntity())}); + } + return TJsonNodes({TValue(MakeBool(found))}); +} + +TJsonNodes TExecutor::OptionalUnwrapArrays(const TJsonNodes& input) { + if (IsStrict()) { + return input; + } + + TJsonNodes result; + for (const auto& node : input) { + if (!node.IsArray()) { + result.push_back(node); + continue; + } + + auto it = node.GetArrayIterator(); + TValue value; + while (it.Next(value)) { + result.push_back(value); + } + } + return result; +} + +TJsonNodes TExecutor::OptionalArrayWrapNodes(const TJsonNodes& input) { + if (IsStrict()) { + return input; + } + + TJsonNodes result; + for (const auto& node : input) { + if (node.IsArray()) { + result.push_back(node); + continue; + } + + TUnboxedValue nodeCopy(node.ConvertToUnboxedValue(ValueBuilder)); + result.push_back(TValue(MakeList(&nodeCopy, 1, ValueBuilder))); + } + return result; +} + } diff --git a/ydb/library/yql/minikql/jsonpath/executor.h b/ydb/library/yql/minikql/jsonpath/executor.h index 782ab1818d..52d6204a74 100644 --- a/ydb/library/yql/minikql/jsonpath/executor.h +++ b/ydb/library/yql/minikql/jsonpath/executor.h @@ -1,198 +1,198 @@ -#pragma once - -#include "binary.h" -#include "value.h" - +#pragma once + +#include "binary.h" +#include "value.h" + #include <ydb/library/yql/public/issue/yql_issue.h> #include <ydb/library/yql/utils/yql_panic.h> #include <ydb/library/yql/public/udf/udf_value.h> #include <ydb/library/yql/public/udf/udf_value_builder.h> #include <ydb/library/yql/public/udf/udf_allocator.h> - + #include <library/cpp/json/json_value.h> #include <library/cpp/containers/stack_vector/stack_vec.h> - -#include <util/generic/ptr.h> -#include <util/generic/stack.h> -#include <util/generic/hash.h> -#include <util/generic/maybe.h> - -#include <variant> - -namespace NYql::NJsonPath { - -using TJsonNodes = TSmallVec<TValue>; - -class TResult { -public: - TResult(TJsonNodes&& nodes); - - TResult(const TJsonNodes& nodes); - - TResult(TIssue&& issue); - - const TJsonNodes& GetNodes() const; - - TJsonNodes& GetNodes(); - - const TIssue& GetError() const; - - bool IsError() const; - -private: - std::variant<TJsonNodes, TIssue> Result; -}; - -class TArraySubscript { -public: - TArraySubscript(i64 from, TPosition fromPos) - : From(from) - , FromPos(fromPos) - , HasTo(false) - { - } - - TArraySubscript(i64 from, TPosition fromPos, i64 to, TPosition toPos) - : From(from) - , FromPos(fromPos) - , To(to) - , ToPos(toPos) - , HasTo(true) - { - } - - i64 GetFrom() const { - return From; - } - - TPosition GetFromPos() const { - return FromPos; - } - - i64 GetTo() const { - YQL_ENSURE(IsRange()); - return To; - } - - TPosition GetToPos() const { - return ToPos; - } - - bool IsRange() const { - return HasTo; - } - -private: + +#include <util/generic/ptr.h> +#include <util/generic/stack.h> +#include <util/generic/hash.h> +#include <util/generic/maybe.h> + +#include <variant> + +namespace NYql::NJsonPath { + +using TJsonNodes = TSmallVec<TValue>; + +class TResult { +public: + TResult(TJsonNodes&& nodes); + + TResult(const TJsonNodes& nodes); + + TResult(TIssue&& issue); + + const TJsonNodes& GetNodes() const; + + TJsonNodes& GetNodes(); + + const TIssue& GetError() const; + + bool IsError() const; + +private: + std::variant<TJsonNodes, TIssue> Result; +}; + +class TArraySubscript { +public: + TArraySubscript(i64 from, TPosition fromPos) + : From(from) + , FromPos(fromPos) + , HasTo(false) + { + } + + TArraySubscript(i64 from, TPosition fromPos, i64 to, TPosition toPos) + : From(from) + , FromPos(fromPos) + , To(to) + , ToPos(toPos) + , HasTo(true) + { + } + + i64 GetFrom() const { + return From; + } + + TPosition GetFromPos() const { + return FromPos; + } + + i64 GetTo() const { + YQL_ENSURE(IsRange()); + return To; + } + + TPosition GetToPos() const { + return ToPos; + } + + bool IsRange() const { + return HasTo; + } + +private: i64 From = 0; - TPosition FromPos; + TPosition FromPos; i64 To = 0; - TPosition ToPos; - bool HasTo; -}; - -using TVariablesMap = THashMap<TString, TValue>; - -class TExecutor { -public: - TExecutor( - const TJsonPathPtr path, - const TJsonNodes& input, - const TVariablesMap& variables, - const NUdf::IValueBuilder* valueBuilder); - - TResult Execute(); - -private: - constexpr static double EPSILON = 1e-20; - - static bool IsZero(double value); - - static bool IsEqual(double a, double b); - - static bool IsLess(double a, double b); - - static bool IsGreater(double a, double b); - - bool IsStrict() const; - - bool IsLax() const; - - TResult Execute(const TJsonPathItem& item); - - TResult ContextObject(); - - TResult Variable(const TJsonPathItem& item); - - TResult LastArrayIndex(const TJsonPathItem& item); - - TResult NumberLiteral(const TJsonPathItem& item); - - TResult MemberAccess(const TJsonPathItem& item); - - TResult WildcardMemberAccess(const TJsonPathItem& item); - - TMaybe<TIssue> EnsureSingleSubscript(TPosition pos, const TJsonNodes& index, i64& result); - - TMaybe<TIssue> EnsureArraySubscripts(const TJsonPathItem& item, TVector<TArraySubscript>& result); - - TResult ArrayAccess(const TJsonPathItem& item); - - TResult WildcardArrayAccess(const TJsonPathItem& item); - - TResult UnaryArithmeticOp(const TJsonPathItem& item); - - TMaybe<TIssue> EnsureBinaryArithmeticOpArgument(TPosition pos, const TJsonNodes& nodes, double& result); - - TResult BinaryArithmeticOp(const TJsonPathItem& item); - - TMaybe<TIssue> EnsureLogicalOpArgument(TPosition pos, const TJsonNodes& nodes, TMaybe<bool>& result); - - TResult BinaryLogicalOp(const TJsonPathItem& item); - - TResult UnaryLogicalOp(const TJsonPathItem& item); - - TResult BooleanLiteral(const TJsonPathItem& item); - - TResult NullLiteral(); - - TResult StringLiteral(const TJsonPathItem& item); - - TMaybe<bool> CompareValues(const TValue& left, const TValue& right, EJsonPathItemType operation); - - TResult CompareOp(const TJsonPathItem& item); - - TResult FilterObject(const TJsonPathItem& item); - - TResult FilterPredicate(const TJsonPathItem& item); - - TResult NumericMethod(const TJsonPathItem& item); - - TResult DoubleMethod(const TJsonPathItem& item); - - TResult TypeMethod(const TJsonPathItem& item); - - TResult SizeMethod(const TJsonPathItem& item); - - TResult KeyValueMethod(const TJsonPathItem& item); - - TResult StartsWithPredicate(const TJsonPathItem& item); - - TResult IsUnknownPredicate(const TJsonPathItem& item); - - TResult ExistsPredicate(const TJsonPathItem& item); - - TResult LikeRegexPredicate(const TJsonPathItem& item); - - TJsonNodes OptionalUnwrapArrays(const TJsonNodes& input); - - TJsonNodes OptionalArrayWrapNodes(const TJsonNodes& input); - - TStack<TValue> ArraySubscriptSource; - TStack<TValue> CurrentFilterObject; - TJsonPathReader Reader; - TJsonNodes Input; - const TVariablesMap& Variables; - const NUdf::IValueBuilder* ValueBuilder; -}; - -} + TPosition ToPos; + bool HasTo; +}; + +using TVariablesMap = THashMap<TString, TValue>; + +class TExecutor { +public: + TExecutor( + const TJsonPathPtr path, + const TJsonNodes& input, + const TVariablesMap& variables, + const NUdf::IValueBuilder* valueBuilder); + + TResult Execute(); + +private: + constexpr static double EPSILON = 1e-20; + + static bool IsZero(double value); + + static bool IsEqual(double a, double b); + + static bool IsLess(double a, double b); + + static bool IsGreater(double a, double b); + + bool IsStrict() const; + + bool IsLax() const; + + TResult Execute(const TJsonPathItem& item); + + TResult ContextObject(); + + TResult Variable(const TJsonPathItem& item); + + TResult LastArrayIndex(const TJsonPathItem& item); + + TResult NumberLiteral(const TJsonPathItem& item); + + TResult MemberAccess(const TJsonPathItem& item); + + TResult WildcardMemberAccess(const TJsonPathItem& item); + + TMaybe<TIssue> EnsureSingleSubscript(TPosition pos, const TJsonNodes& index, i64& result); + + TMaybe<TIssue> EnsureArraySubscripts(const TJsonPathItem& item, TVector<TArraySubscript>& result); + + TResult ArrayAccess(const TJsonPathItem& item); + + TResult WildcardArrayAccess(const TJsonPathItem& item); + + TResult UnaryArithmeticOp(const TJsonPathItem& item); + + TMaybe<TIssue> EnsureBinaryArithmeticOpArgument(TPosition pos, const TJsonNodes& nodes, double& result); + + TResult BinaryArithmeticOp(const TJsonPathItem& item); + + TMaybe<TIssue> EnsureLogicalOpArgument(TPosition pos, const TJsonNodes& nodes, TMaybe<bool>& result); + + TResult BinaryLogicalOp(const TJsonPathItem& item); + + TResult UnaryLogicalOp(const TJsonPathItem& item); + + TResult BooleanLiteral(const TJsonPathItem& item); + + TResult NullLiteral(); + + TResult StringLiteral(const TJsonPathItem& item); + + TMaybe<bool> CompareValues(const TValue& left, const TValue& right, EJsonPathItemType operation); + + TResult CompareOp(const TJsonPathItem& item); + + TResult FilterObject(const TJsonPathItem& item); + + TResult FilterPredicate(const TJsonPathItem& item); + + TResult NumericMethod(const TJsonPathItem& item); + + TResult DoubleMethod(const TJsonPathItem& item); + + TResult TypeMethod(const TJsonPathItem& item); + + TResult SizeMethod(const TJsonPathItem& item); + + TResult KeyValueMethod(const TJsonPathItem& item); + + TResult StartsWithPredicate(const TJsonPathItem& item); + + TResult IsUnknownPredicate(const TJsonPathItem& item); + + TResult ExistsPredicate(const TJsonPathItem& item); + + TResult LikeRegexPredicate(const TJsonPathItem& item); + + TJsonNodes OptionalUnwrapArrays(const TJsonNodes& input); + + TJsonNodes OptionalArrayWrapNodes(const TJsonNodes& input); + + TStack<TValue> ArraySubscriptSource; + TStack<TValue> CurrentFilterObject; + TJsonPathReader Reader; + TJsonNodes Input; + const TVariablesMap& Variables; + const NUdf::IValueBuilder* ValueBuilder; +}; + +} diff --git a/ydb/library/yql/minikql/jsonpath/jsonpath.cpp b/ydb/library/yql/minikql/jsonpath/jsonpath.cpp index 985ea72c36..da4682a1e8 100644 --- a/ydb/library/yql/minikql/jsonpath/jsonpath.cpp +++ b/ydb/library/yql/minikql/jsonpath/jsonpath.cpp @@ -1,129 +1,129 @@ -#include "jsonpath.h" - -#include "binary.h" -#include "ast_builder.h" -#include "executor.h" -#include "type_check.h" -#include "value.h" - +#include "jsonpath.h" + +#include "binary.h" +#include "ast_builder.h" +#include "executor.h" +#include "type_check.h" +#include "value.h" + #include <ydb/library/yql/core/issue/protos/issue_id.pb.h> #include <ydb/library/yql/parser/proto_ast/gen/jsonpath/JsonPathLexer.h> #include <ydb/library/yql/parser/proto_ast/gen/jsonpath/JsonPathParser.h> #include <ydb/library/yql/parser/proto_ast/gen/jsonpath/JsonPathParser.pb.h> #include <ydb/library/yql/parser/proto_ast/proto_ast.h> - + #include <google/protobuf/message.h> - -#include <util/string/strip.h> - -#if defined(_tsan_enabled_) -#include <util/system/mutex.h> -#endif - -using namespace NYql; -using namespace NYql::NUdf; -using namespace NJson; - -namespace { - -#if defined(_tsan_enabled_) -TMutex SanitizerJsonPathTranslationMutex; -#endif - -class TParseErrorsCollector : public NProtoAST::IErrorCollector { -public: - TParseErrorsCollector(TIssues& issues, size_t maxErrors) - : IErrorCollector(maxErrors) - , Issues(issues) - { - } - -private: - void AddError(ui32 line, ui32 column, const TString& message) override { - Issues.AddIssue(TPosition(column, line, "jsonpath"), StripString(message)); - Issues.back().SetCode(TIssuesIds::JSONPATH_PARSE_ERROR, TSeverityIds::S_ERROR); - } - - TIssues& Issues; -}; - -} - -namespace NYql::NJsonPath { - -const TAstNodePtr ParseJsonPathAst(const TStringBuf path, TIssues& issues, size_t maxParseErrors) { - if (!IsUtf(path)) { - issues.AddIssue(TPosition(1, 1, "jsonpath"), "JsonPath must be UTF-8 encoded string"); - issues.back().SetCode(TIssuesIds::JSONPATH_PARSE_ERROR, TSeverityIds::S_ERROR); - return {}; - } - - google::protobuf::Arena arena; - const google::protobuf::Message* rawAst; - { - #if defined(_tsan_enabled_) - TGuard<TMutex> guard(SanitizerJsonPathTranslationMutex); - #endif + +#include <util/string/strip.h> + +#if defined(_tsan_enabled_) +#include <util/system/mutex.h> +#endif + +using namespace NYql; +using namespace NYql::NUdf; +using namespace NJson; + +namespace { + +#if defined(_tsan_enabled_) +TMutex SanitizerJsonPathTranslationMutex; +#endif + +class TParseErrorsCollector : public NProtoAST::IErrorCollector { +public: + TParseErrorsCollector(TIssues& issues, size_t maxErrors) + : IErrorCollector(maxErrors) + , Issues(issues) + { + } + +private: + void AddError(ui32 line, ui32 column, const TString& message) override { + Issues.AddIssue(TPosition(column, line, "jsonpath"), StripString(message)); + Issues.back().SetCode(TIssuesIds::JSONPATH_PARSE_ERROR, TSeverityIds::S_ERROR); + } + + TIssues& Issues; +}; + +} + +namespace NYql::NJsonPath { + +const TAstNodePtr ParseJsonPathAst(const TStringBuf path, TIssues& issues, size_t maxParseErrors) { + if (!IsUtf(path)) { + issues.AddIssue(TPosition(1, 1, "jsonpath"), "JsonPath must be UTF-8 encoded string"); + issues.back().SetCode(TIssuesIds::JSONPATH_PARSE_ERROR, TSeverityIds::S_ERROR); + return {}; + } + + google::protobuf::Arena arena; + const google::protobuf::Message* rawAst; + { + #if defined(_tsan_enabled_) + TGuard<TMutex> guard(SanitizerJsonPathTranslationMutex); + #endif NProtoAST::TProtoASTBuilder<NALP::JsonPathParser, NALP::JsonPathLexer> builder(path, "JsonPath", &arena); - TParseErrorsCollector collector(issues, maxParseErrors); - rawAst = builder.BuildAST(collector); - } - - if (rawAst == nullptr) { - return nullptr; - } - - const google::protobuf::Descriptor* descriptor = rawAst->GetDescriptor(); - if (descriptor && descriptor->name() != "TJsonPathParserAST") { - return nullptr; - } - - const auto* protoAst = static_cast<const NJsonPathGenerated::TJsonPathParserAST*>(rawAst); - TAstBuilder astBuilder(issues); - TAstNodePtr ast = astBuilder.Build(*protoAst); - if (!issues.Empty()) { - return nullptr; - } - - // At this point AST is guaranteed to be valid. We return it even if - // type checker finds some logical errors. - TJsonPathTypeChecker checker(issues); - ast->Accept(checker); - return ast; -} - -const TJsonPathPtr PackBinaryJsonPath(const TAstNodePtr ast) { - TJsonPathBuilder builder; - ast->Accept(builder); - return builder.ShrinkAndGetResult(); -} - -const TJsonPathPtr ParseJsonPath(const TStringBuf path, TIssues& issues, size_t maxParseErrors) { - const auto ast = ParseJsonPathAst(path, issues, maxParseErrors); - if (!issues.Empty()) { - return {}; - } - return PackBinaryJsonPath(ast); -} - -TResult ExecuteJsonPath( - const TJsonPathPtr jsonPath, - const TValue& json, - const TVariablesMap& variables, - const NUdf::IValueBuilder* valueBuilder) { - TExecutor executor(jsonPath, {json}, variables, valueBuilder); - return executor.Execute(); -} - -TVariablesMap DictToVariables(const NUdf::TUnboxedValue& dict) { - TVariablesMap variables; - TUnboxedValue key; - TUnboxedValue payload; - auto it = dict.GetDictIterator(); - while (it.NextPair(key, payload)) { - variables[key.AsStringRef()] = TValue(payload); - } - return variables; -} - -} + TParseErrorsCollector collector(issues, maxParseErrors); + rawAst = builder.BuildAST(collector); + } + + if (rawAst == nullptr) { + return nullptr; + } + + const google::protobuf::Descriptor* descriptor = rawAst->GetDescriptor(); + if (descriptor && descriptor->name() != "TJsonPathParserAST") { + return nullptr; + } + + const auto* protoAst = static_cast<const NJsonPathGenerated::TJsonPathParserAST*>(rawAst); + TAstBuilder astBuilder(issues); + TAstNodePtr ast = astBuilder.Build(*protoAst); + if (!issues.Empty()) { + return nullptr; + } + + // At this point AST is guaranteed to be valid. We return it even if + // type checker finds some logical errors. + TJsonPathTypeChecker checker(issues); + ast->Accept(checker); + return ast; +} + +const TJsonPathPtr PackBinaryJsonPath(const TAstNodePtr ast) { + TJsonPathBuilder builder; + ast->Accept(builder); + return builder.ShrinkAndGetResult(); +} + +const TJsonPathPtr ParseJsonPath(const TStringBuf path, TIssues& issues, size_t maxParseErrors) { + const auto ast = ParseJsonPathAst(path, issues, maxParseErrors); + if (!issues.Empty()) { + return {}; + } + return PackBinaryJsonPath(ast); +} + +TResult ExecuteJsonPath( + const TJsonPathPtr jsonPath, + const TValue& json, + const TVariablesMap& variables, + const NUdf::IValueBuilder* valueBuilder) { + TExecutor executor(jsonPath, {json}, variables, valueBuilder); + return executor.Execute(); +} + +TVariablesMap DictToVariables(const NUdf::TUnboxedValue& dict) { + TVariablesMap variables; + TUnboxedValue key; + TUnboxedValue payload; + auto it = dict.GetDictIterator(); + while (it.NextPair(key, payload)) { + variables[key.AsStringRef()] = TValue(payload); + } + return variables; +} + +} diff --git a/ydb/library/yql/minikql/jsonpath/jsonpath.h b/ydb/library/yql/minikql/jsonpath/jsonpath.h index 35c27e99b6..0d41633aa6 100644 --- a/ydb/library/yql/minikql/jsonpath/jsonpath.h +++ b/ydb/library/yql/minikql/jsonpath/jsonpath.h @@ -1,24 +1,24 @@ -#pragma once - -#include "executor.h" - +#pragma once + +#include "executor.h" + #include <ydb/library/yql/public/udf/udf_value.h> #include <ydb/library/yql/public/udf/udf_value_builder.h> - -namespace NYql::NJsonPath { - -const TAstNodePtr ParseJsonPathAst(const TStringBuf path, TIssues& issues, size_t maxParseErrors); - -const TJsonPathPtr PackBinaryJsonPath(const TAstNodePtr ast, TIssues& issues); - -const TJsonPathPtr ParseJsonPath(const TStringBuf path, TIssues& issues, size_t maxParseErrors); - -TVariablesMap DictToVariables(const NUdf::TUnboxedValue& dict); - -TResult ExecuteJsonPath( - const TJsonPathPtr jsonPath, - const TValue& json, - const TVariablesMap& variables, - const NUdf::IValueBuilder* valueBuilder); - -} + +namespace NYql::NJsonPath { + +const TAstNodePtr ParseJsonPathAst(const TStringBuf path, TIssues& issues, size_t maxParseErrors); + +const TJsonPathPtr PackBinaryJsonPath(const TAstNodePtr ast, TIssues& issues); + +const TJsonPathPtr ParseJsonPath(const TStringBuf path, TIssues& issues, size_t maxParseErrors); + +TVariablesMap DictToVariables(const NUdf::TUnboxedValue& dict); + +TResult ExecuteJsonPath( + const TJsonPathPtr jsonPath, + const TValue& json, + const TVariablesMap& variables, + const NUdf::IValueBuilder* valueBuilder); + +} diff --git a/ydb/library/yql/minikql/jsonpath/parse_double.cpp b/ydb/library/yql/minikql/jsonpath/parse_double.cpp index ebf2ff48f7..e9bde573dc 100644 --- a/ydb/library/yql/minikql/jsonpath/parse_double.cpp +++ b/ydb/library/yql/minikql/jsonpath/parse_double.cpp @@ -1,33 +1,33 @@ - -#include "parse_double.h" - -#include <contrib/libs/double-conversion/double-conversion.h> - + +#include "parse_double.h" + +#include <contrib/libs/double-conversion/double-conversion.h> + #include <cmath> -namespace NYql::NJsonPath { - -using double_conversion::StringToDoubleConverter; - -double ParseDouble(const TStringBuf literal) { - // FromString<double> from util/string/cast.h is permissive to junk in string. - // In our case junk in string means bug in grammar. - // See https://a.yandex-team.ru/arc/trunk/arcadia/util/string/cast.cpp?rev=6456750#L692 - struct TStringToNumberConverter: public StringToDoubleConverter { - inline TStringToNumberConverter() - : StringToDoubleConverter( - NO_FLAGS, - /* empty_string_value */ 0.0, - /* junk_string_value */ NAN, - /* infinity_symbol */ nullptr, - /* nan_symbol */ nullptr - ) - { - } - }; - - int parsedCharactersCount = 0; - return Singleton<TStringToNumberConverter>()->StringToDouble(literal.data(), literal.length(), &parsedCharactersCount); -} - +namespace NYql::NJsonPath { + +using double_conversion::StringToDoubleConverter; + +double ParseDouble(const TStringBuf literal) { + // FromString<double> from util/string/cast.h is permissive to junk in string. + // In our case junk in string means bug in grammar. + // See https://a.yandex-team.ru/arc/trunk/arcadia/util/string/cast.cpp?rev=6456750#L692 + struct TStringToNumberConverter: public StringToDoubleConverter { + inline TStringToNumberConverter() + : StringToDoubleConverter( + NO_FLAGS, + /* empty_string_value */ 0.0, + /* junk_string_value */ NAN, + /* infinity_symbol */ nullptr, + /* nan_symbol */ nullptr + ) + { + } + }; + + int parsedCharactersCount = 0; + return Singleton<TStringToNumberConverter>()->StringToDouble(literal.data(), literal.length(), &parsedCharactersCount); +} + } diff --git a/ydb/library/yql/minikql/jsonpath/parse_double.h b/ydb/library/yql/minikql/jsonpath/parse_double.h index 8481bf7e82..e3c943d930 100644 --- a/ydb/library/yql/minikql/jsonpath/parse_double.h +++ b/ydb/library/yql/minikql/jsonpath/parse_double.h @@ -1,10 +1,10 @@ -#include <util/generic/string.h> - -namespace NYql::NJsonPath { - -// Parses double literal. Respects exponential format like `-23.5e-10`. -// On parsing error returns NaN double value (can be checked using `std::isnan`). -// On double overflow returns INF double value (can be checked using `std::isinf`). -double ParseDouble(const TStringBuf literal); - -}
\ No newline at end of file +#include <util/generic/string.h> + +namespace NYql::NJsonPath { + +// Parses double literal. Respects exponential format like `-23.5e-10`. +// On parsing error returns NaN double value (can be checked using `std::isnan`). +// On double overflow returns INF double value (can be checked using `std::isinf`). +double ParseDouble(const TStringBuf literal); + +}
\ No newline at end of file diff --git a/ydb/library/yql/minikql/jsonpath/type_check.cpp b/ydb/library/yql/minikql/jsonpath/type_check.cpp index dc87f44c92..1018307baf 100644 --- a/ydb/library/yql/minikql/jsonpath/type_check.cpp +++ b/ydb/library/yql/minikql/jsonpath/type_check.cpp @@ -1,132 +1,132 @@ -#include "type_check.h" - +#include "type_check.h" + #include <ydb/library/yql/core/issue/protos/issue_id.pb.h> - -namespace NYql::NJsonPath { - -TJsonPathTypeChecker::TJsonPathTypeChecker(TIssues& issues) - : Issues(issues) -{ -} - -void TJsonPathTypeChecker::VisitRoot(const TRootNode& node) { - node.GetExpr()->Accept(*this); -} - -void TJsonPathTypeChecker::VisitContextObject(const TContextObjectNode& node) { - Y_UNUSED(node); -} - -void TJsonPathTypeChecker::VisitVariable(const TVariableNode& node) { - Y_UNUSED(node); -} - -void TJsonPathTypeChecker::VisitLastArrayIndex(const TLastArrayIndexNode& node) { - Y_UNUSED(node); -} - -void TJsonPathTypeChecker::VisitNumberLiteral(const TNumberLiteralNode& node) { - Y_UNUSED(node); -} - -void TJsonPathTypeChecker::VisitMemberAccess(const TMemberAccessNode& node) { - node.GetInput()->Accept(*this); -} - -void TJsonPathTypeChecker::VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) { - node.GetInput()->Accept(*this); -} - -void TJsonPathTypeChecker::VisitArrayAccess(const TArrayAccessNode& node) { - node.GetInput()->Accept(*this); - - for (const auto& subscript : node.GetSubscripts()) { - subscript.From->Accept(*this); - if (subscript.To) { - subscript.To->Accept(*this); - } - } -} - -void TJsonPathTypeChecker::VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) { - node.GetInput()->Accept(*this); -} - -void TJsonPathTypeChecker::VisitUnaryOperation(const TUnaryOperationNode& node) { - if (node.GetOp() == EUnaryOperation::Not && node.GetExpr()->GetReturnType() != EReturnType::Bool) { - Error(node.GetExpr(), "Logical not needs boolean argument"); - } - - node.GetExpr()->Accept(*this); -} - -void TJsonPathTypeChecker::VisitBinaryOperation(const TBinaryOperationNode& node) { - if (node.GetOp() == EBinaryOperation::And || node.GetOp() == EBinaryOperation::Or) { - if (node.GetLeftExpr()->GetReturnType() != EReturnType::Bool) { - Error(node.GetLeftExpr(), "Left argument of logical operation needs to be boolean"); - } - if (node.GetRightExpr()->GetReturnType() != EReturnType::Bool) { - Error(node.GetRightExpr(), "Right argument of logical operation needs to be boolean"); - } - } - - node.GetLeftExpr()->Accept(*this); - node.GetRightExpr()->Accept(*this); -} - -void TJsonPathTypeChecker::VisitBooleanLiteral(const TBooleanLiteralNode& node) { - Y_UNUSED(node); -} - -void TJsonPathTypeChecker::VisitNullLiteral(const TNullLiteralNode& node) { - Y_UNUSED(node); -} - -void TJsonPathTypeChecker::VisitStringLiteral(const TStringLiteralNode& node) { - Y_UNUSED(node); -} - -void TJsonPathTypeChecker::VisitFilterObject(const TFilterObjectNode& node) { - Y_UNUSED(node); -} - -void TJsonPathTypeChecker::VisitFilterPredicate(const TFilterPredicateNode& node) { - node.GetInput()->Accept(*this); - - if (node.GetPredicate()->GetReturnType() != EReturnType::Bool) { - Error(node.GetPredicate(), "Filter must return boolean value"); - } - - node.GetPredicate()->Accept(*this); -} - -void TJsonPathTypeChecker::VisitMethodCall(const TMethodCallNode& node) { - node.GetInput()->Accept(*this); -} - -void TJsonPathTypeChecker::VisitStartsWithPredicate(const TStartsWithPredicateNode& node) { - node.GetInput()->Accept(*this); - node.GetPrefix()->Accept(*this); -} - -void TJsonPathTypeChecker::VisitExistsPredicate(const TExistsPredicateNode& node) { - node.GetInput()->Accept(*this); -} - -void TJsonPathTypeChecker::VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) { - if (node.GetInput()->GetReturnType() != EReturnType::Bool) { - Error(node.GetInput(), "is unknown predicate expectes boolean argument"); - } - node.GetInput()->Accept(*this); -} - -void TJsonPathTypeChecker::VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) { - node.GetInput()->Accept(*this); -} - -void TJsonPathTypeChecker::Error(const TAstNodePtr node, const TStringBuf message) { - Issues.AddIssue(node->GetPos(), message); - Issues.back().SetCode(TIssuesIds::JSONPATH_TYPE_CHECK_ERROR, TSeverityIds::S_ERROR); -} - + +namespace NYql::NJsonPath { + +TJsonPathTypeChecker::TJsonPathTypeChecker(TIssues& issues) + : Issues(issues) +{ +} + +void TJsonPathTypeChecker::VisitRoot(const TRootNode& node) { + node.GetExpr()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitContextObject(const TContextObjectNode& node) { + Y_UNUSED(node); +} + +void TJsonPathTypeChecker::VisitVariable(const TVariableNode& node) { + Y_UNUSED(node); +} + +void TJsonPathTypeChecker::VisitLastArrayIndex(const TLastArrayIndexNode& node) { + Y_UNUSED(node); +} + +void TJsonPathTypeChecker::VisitNumberLiteral(const TNumberLiteralNode& node) { + Y_UNUSED(node); +} + +void TJsonPathTypeChecker::VisitMemberAccess(const TMemberAccessNode& node) { + node.GetInput()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) { + node.GetInput()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitArrayAccess(const TArrayAccessNode& node) { + node.GetInput()->Accept(*this); + + for (const auto& subscript : node.GetSubscripts()) { + subscript.From->Accept(*this); + if (subscript.To) { + subscript.To->Accept(*this); + } + } +} + +void TJsonPathTypeChecker::VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) { + node.GetInput()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitUnaryOperation(const TUnaryOperationNode& node) { + if (node.GetOp() == EUnaryOperation::Not && node.GetExpr()->GetReturnType() != EReturnType::Bool) { + Error(node.GetExpr(), "Logical not needs boolean argument"); + } + + node.GetExpr()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitBinaryOperation(const TBinaryOperationNode& node) { + if (node.GetOp() == EBinaryOperation::And || node.GetOp() == EBinaryOperation::Or) { + if (node.GetLeftExpr()->GetReturnType() != EReturnType::Bool) { + Error(node.GetLeftExpr(), "Left argument of logical operation needs to be boolean"); + } + if (node.GetRightExpr()->GetReturnType() != EReturnType::Bool) { + Error(node.GetRightExpr(), "Right argument of logical operation needs to be boolean"); + } + } + + node.GetLeftExpr()->Accept(*this); + node.GetRightExpr()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitBooleanLiteral(const TBooleanLiteralNode& node) { + Y_UNUSED(node); +} + +void TJsonPathTypeChecker::VisitNullLiteral(const TNullLiteralNode& node) { + Y_UNUSED(node); +} + +void TJsonPathTypeChecker::VisitStringLiteral(const TStringLiteralNode& node) { + Y_UNUSED(node); +} + +void TJsonPathTypeChecker::VisitFilterObject(const TFilterObjectNode& node) { + Y_UNUSED(node); +} + +void TJsonPathTypeChecker::VisitFilterPredicate(const TFilterPredicateNode& node) { + node.GetInput()->Accept(*this); + + if (node.GetPredicate()->GetReturnType() != EReturnType::Bool) { + Error(node.GetPredicate(), "Filter must return boolean value"); + } + + node.GetPredicate()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitMethodCall(const TMethodCallNode& node) { + node.GetInput()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitStartsWithPredicate(const TStartsWithPredicateNode& node) { + node.GetInput()->Accept(*this); + node.GetPrefix()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitExistsPredicate(const TExistsPredicateNode& node) { + node.GetInput()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) { + if (node.GetInput()->GetReturnType() != EReturnType::Bool) { + Error(node.GetInput(), "is unknown predicate expectes boolean argument"); + } + node.GetInput()->Accept(*this); +} + +void TJsonPathTypeChecker::VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) { + node.GetInput()->Accept(*this); +} + +void TJsonPathTypeChecker::Error(const TAstNodePtr node, const TStringBuf message) { + Issues.AddIssue(node->GetPos(), message); + Issues.back().SetCode(TIssuesIds::JSONPATH_TYPE_CHECK_ERROR, TSeverityIds::S_ERROR); +} + } diff --git a/ydb/library/yql/minikql/jsonpath/type_check.h b/ydb/library/yql/minikql/jsonpath/type_check.h index 0a02828a6e..98d32968a1 100644 --- a/ydb/library/yql/minikql/jsonpath/type_check.h +++ b/ydb/library/yql/minikql/jsonpath/type_check.h @@ -1,59 +1,59 @@ -#pragma once - -#include "ast_nodes.h" - -namespace NYql::NJsonPath { - -class TJsonPathTypeChecker : public IAstNodeVisitor { -public: - TJsonPathTypeChecker(TIssues& Issues); - - void VisitRoot(const TRootNode& node) override; - - void VisitContextObject(const TContextObjectNode& node) override; - - void VisitVariable(const TVariableNode& node) override; - - void VisitLastArrayIndex(const TLastArrayIndexNode& node) override; - - void VisitNumberLiteral(const TNumberLiteralNode& node) override; - - void VisitMemberAccess(const TMemberAccessNode& node) override; - - void VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) override; - - void VisitArrayAccess(const TArrayAccessNode& node) override; - - void VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) override; - - void VisitUnaryOperation(const TUnaryOperationNode& node) override; - - void VisitBinaryOperation(const TBinaryOperationNode& node) override; - - void VisitBooleanLiteral(const TBooleanLiteralNode& node) override; - - void VisitNullLiteral(const TNullLiteralNode& node) override; - - void VisitStringLiteral(const TStringLiteralNode& node) override; - - void VisitFilterObject(const TFilterObjectNode& node) override; - - void VisitFilterPredicate(const TFilterPredicateNode& node) override; - - void VisitMethodCall(const TMethodCallNode& node) override; - - void VisitStartsWithPredicate(const TStartsWithPredicateNode& node) override; - - void VisitExistsPredicate(const TExistsPredicateNode& node) override; - - void VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) override; - - void VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) override; - - void Error(const TAstNodePtr node, const TStringBuf message); - -private: - TIssues& Issues; -}; - -}
\ No newline at end of file +#pragma once + +#include "ast_nodes.h" + +namespace NYql::NJsonPath { + +class TJsonPathTypeChecker : public IAstNodeVisitor { +public: + TJsonPathTypeChecker(TIssues& Issues); + + void VisitRoot(const TRootNode& node) override; + + void VisitContextObject(const TContextObjectNode& node) override; + + void VisitVariable(const TVariableNode& node) override; + + void VisitLastArrayIndex(const TLastArrayIndexNode& node) override; + + void VisitNumberLiteral(const TNumberLiteralNode& node) override; + + void VisitMemberAccess(const TMemberAccessNode& node) override; + + void VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) override; + + void VisitArrayAccess(const TArrayAccessNode& node) override; + + void VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) override; + + void VisitUnaryOperation(const TUnaryOperationNode& node) override; + + void VisitBinaryOperation(const TBinaryOperationNode& node) override; + + void VisitBooleanLiteral(const TBooleanLiteralNode& node) override; + + void VisitNullLiteral(const TNullLiteralNode& node) override; + + void VisitStringLiteral(const TStringLiteralNode& node) override; + + void VisitFilterObject(const TFilterObjectNode& node) override; + + void VisitFilterPredicate(const TFilterPredicateNode& node) override; + + void VisitMethodCall(const TMethodCallNode& node) override; + + void VisitStartsWithPredicate(const TStartsWithPredicateNode& node) override; + + void VisitExistsPredicate(const TExistsPredicateNode& node) override; + + void VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) override; + + void VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) override; + + void Error(const TAstNodePtr node, const TStringBuf message); + +private: + TIssues& Issues; +}; + +}
\ No newline at end of file diff --git a/ydb/library/yql/minikql/jsonpath/ut/common_ut.cpp b/ydb/library/yql/minikql/jsonpath/ut/common_ut.cpp index 6999564f71..121e47767c 100644 --- a/ydb/library/yql/minikql/jsonpath/ut/common_ut.cpp +++ b/ydb/library/yql/minikql/jsonpath/ut/common_ut.cpp @@ -1,968 +1,968 @@ -#include "test_base.h" - -#include <util/string/builder.h> - -#include <cmath> - -class TJsonPathCommonTest : public TJsonPathTestBase { -public: - TJsonPathCommonTest() - : TJsonPathTestBase() - { - } - - UNIT_TEST_SUITE(TJsonPathCommonTest); - UNIT_TEST(TestPrimary); - UNIT_TEST(TestMemberAccess); - UNIT_TEST(TestWildcardMemberAccess); - UNIT_TEST(TestArrayAccess); - UNIT_TEST(TestLastArrayIndex); - UNIT_TEST(TestLastArrayIndexInvalid); - UNIT_TEST(TestNonIntegerArrayIndex); - UNIT_TEST(TestWildcardArrayAccess); - UNIT_TEST(TestUnaryOperations); - UNIT_TEST(TestUnaryOperationsErrors); - UNIT_TEST(TestBinaryArithmeticOperations); - UNIT_TEST(TestBinaryArithmeticOperationsErrors); - UNIT_TEST(TestParseErrors); - UNIT_TEST(TestVariables); - UNIT_TEST(TestDivisionByZero); - UNIT_TEST(TestInfinityResult); - UNIT_TEST(TestLogicalOperations); - UNIT_TEST(TestCompareOperations); - UNIT_TEST(TestFilter); - UNIT_TEST(TestFilterInvalid); - UNIT_TEST(TestNumericMethods); - UNIT_TEST(TestNumericMethodsErrors); - UNIT_TEST(TestDoubleMethod); - UNIT_TEST(TestDoubleMethodErrors); - UNIT_TEST(TestTypeMethod); - UNIT_TEST(TestSizeMethod); - UNIT_TEST(TestKeyValueMethod); - UNIT_TEST(TestKeyValueMethodErrors); - UNIT_TEST(TestStartsWithPredicate); - UNIT_TEST(TestStartsWithPredicateErrors); - UNIT_TEST(TestExistsPredicate); - UNIT_TEST(TestIsUnknownPredicate); - UNIT_TEST(TestLikeRegexPredicate); - UNIT_TEST_SUITE_END(); - - void TestPrimary() { - const TVector<TMultiOutputTestCase> testCases = { - // Context object $ must return whole JSON when used alone - {R"({"key": 123})", "$", {R"({"key":123})"}}, - {R"([1, 2, 3])", "$", {R"([1,2,3])"}}, - {"1.234", "$", {"1.234"}}, - {R"("some string")", "$", {R"("some string")"}}, - - // Literal must not depend on input - {R"({"key": 123})", "123", {"123"}}, - {R"([1, 2, 3])", "123", {"123"}}, - {"1.234", "123", {"123"}}, - {R"("some string")", "123", {"123"}}, - - // Check various ways to define number literal - {"1", "123.4", {"123.4"}}, - {"1", "0.567", {"0.567"}}, - - {"1", "1234e-1", {"123.4"}}, - {"1", "567e-3", {"0.567"}}, - {"1", "123.4e-1", {"12.34"}}, - - {"1", "123e3", {"123000"}}, - {"1", "123e+3", {"123000"}}, - {"1", "1.23e+1", {"12.3"}}, - {"1", "1.23e1", {"12.3"}}, - - {"1", "12e0", {"12"}}, - {"1", "12.3e0", {"12.3"}}, - - {"1", "0", {"0"}}, - {"1", "0.0", {"0"}}, - {"1", "0.0e0", {"0"}}, - - // Check boolean and null literals - {"1", "null", {"null"}}, - {"1", "false", {"false"}}, - {"1", "true", {"true"}}, - - // Check string literals - {"1", "\"string\"", {"\"string\""}}, - {"1", "\" space another space \"", {"\" space another space \""}}, +#include "test_base.h" + +#include <util/string/builder.h> + +#include <cmath> + +class TJsonPathCommonTest : public TJsonPathTestBase { +public: + TJsonPathCommonTest() + : TJsonPathTestBase() + { + } + + UNIT_TEST_SUITE(TJsonPathCommonTest); + UNIT_TEST(TestPrimary); + UNIT_TEST(TestMemberAccess); + UNIT_TEST(TestWildcardMemberAccess); + UNIT_TEST(TestArrayAccess); + UNIT_TEST(TestLastArrayIndex); + UNIT_TEST(TestLastArrayIndexInvalid); + UNIT_TEST(TestNonIntegerArrayIndex); + UNIT_TEST(TestWildcardArrayAccess); + UNIT_TEST(TestUnaryOperations); + UNIT_TEST(TestUnaryOperationsErrors); + UNIT_TEST(TestBinaryArithmeticOperations); + UNIT_TEST(TestBinaryArithmeticOperationsErrors); + UNIT_TEST(TestParseErrors); + UNIT_TEST(TestVariables); + UNIT_TEST(TestDivisionByZero); + UNIT_TEST(TestInfinityResult); + UNIT_TEST(TestLogicalOperations); + UNIT_TEST(TestCompareOperations); + UNIT_TEST(TestFilter); + UNIT_TEST(TestFilterInvalid); + UNIT_TEST(TestNumericMethods); + UNIT_TEST(TestNumericMethodsErrors); + UNIT_TEST(TestDoubleMethod); + UNIT_TEST(TestDoubleMethodErrors); + UNIT_TEST(TestTypeMethod); + UNIT_TEST(TestSizeMethod); + UNIT_TEST(TestKeyValueMethod); + UNIT_TEST(TestKeyValueMethodErrors); + UNIT_TEST(TestStartsWithPredicate); + UNIT_TEST(TestStartsWithPredicateErrors); + UNIT_TEST(TestExistsPredicate); + UNIT_TEST(TestIsUnknownPredicate); + UNIT_TEST(TestLikeRegexPredicate); + UNIT_TEST_SUITE_END(); + + void TestPrimary() { + const TVector<TMultiOutputTestCase> testCases = { + // Context object $ must return whole JSON when used alone + {R"({"key": 123})", "$", {R"({"key":123})"}}, + {R"([1, 2, 3])", "$", {R"([1,2,3])"}}, + {"1.234", "$", {"1.234"}}, + {R"("some string")", "$", {R"("some string")"}}, + + // Literal must not depend on input + {R"({"key": 123})", "123", {"123"}}, + {R"([1, 2, 3])", "123", {"123"}}, + {"1.234", "123", {"123"}}, + {R"("some string")", "123", {"123"}}, + + // Check various ways to define number literal + {"1", "123.4", {"123.4"}}, + {"1", "0.567", {"0.567"}}, + + {"1", "1234e-1", {"123.4"}}, + {"1", "567e-3", {"0.567"}}, + {"1", "123.4e-1", {"12.34"}}, + + {"1", "123e3", {"123000"}}, + {"1", "123e+3", {"123000"}}, + {"1", "1.23e+1", {"12.3"}}, + {"1", "1.23e1", {"12.3"}}, + + {"1", "12e0", {"12"}}, + {"1", "12.3e0", {"12.3"}}, + + {"1", "0", {"0"}}, + {"1", "0.0", {"0"}}, + {"1", "0.0e0", {"0"}}, + + // Check boolean and null literals + {"1", "null", {"null"}}, + {"1", "false", {"false"}}, + {"1", "true", {"true"}}, + + // Check string literals + {"1", "\"string\"", {"\"string\""}}, + {"1", "\" space another space \"", {"\" space another space \""}}, {"1", "\"привет\"", {"\"привет\""}}, // NOTE: escaping is added by library/cpp/json - {"1", "\"\r\n\t\"", {"\"\\r\\n\\t\""}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestMemberAccess() { - const TVector<TMultiOutputTestCase> testCases = { - {R"({"key": 123, "another_key": 456})", "$.key", {"123"}}, - {R"({"key": 123, "_another_28_key_$_": 456})", "$._another_28_key_$_", {"456"}}, - {R"({"key": 123, "another_key": 456})", " $.another_key ", {"456"}}, - - {R"({"key": 123, "another_key": 456})", "$.'key'", {"123"}}, - {R"({"key": 123, "_another_28_key_$_": 456})", "$.'_another_28_key_$_'", {"456"}}, - {R"({"key": 123, "another_key": 456})", " $.'another_key' ", {"456"}}, - - {R"({"key": 123, "another_key": 456})", "$.\"key\"", {"123"}}, - {R"({"key": 123, "_another_28_key_$_": 456})", "$.\"_another_28_key_$_\"", {"456"}}, - {R"({"key": 123, "another_key": 456})", " $.\"another_key\" ", {"456"}}, - - {R"({"key": 123, "another key": 456})", "$.'another key'", {"456"}}, - {R"({"key": 123, "another key": 456})", "$.\"another key\"", {"456"}}, - - {R"({"key": 123, "прием отбой": 456})", "$.'прием отбой'", {"456"}}, - {R"({"key": 123, "прием отбой": 456})", "$.\"прием отбой\"", {"456"}}, - - {R"({"key": {"another": 456}})", "$.key.another", {"456"}}, - {R"({"key": {"another key": 456}})", "$.'key'.\"another key\"", {"456"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestWildcardMemberAccess() { - const TVector<TMultiOutputTestCase> testCases = { - {R"({ - "first": 12, - "second": 72 - })", "$.*", {"12", "72"}}, - {R"({ - "friends": { - "Nik": {"age": 18}, - "Kate": {"age": 72} - } - })", "$.friends.*.age", {"72", "18"}}, - {R"({ - "friends": { - "Nik": {"age": 18}, - "Kate": {"age": 72} - } - })", "$.*.*.*", {"72", "18"}}, - {R"({})", "$.*.key", {}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestArrayAccess() { - const TVector<TMultiOutputTestCase> testCases = { - {R"([1, 2, 3])", "$[0]", {"1"}}, - {R"([1, 2, 3, 4, 5, 6])", "$[0 to 2]", {"1", "2", "3"}}, - {R"([1, 2, 3, 4, 5, 6])", "$[5, 0 to 2, 0, 0, 3 to 5, 2]", {"6", "1", "2", "3", "1", "1", "4", "5", "6", "3"}}, - {R"({ - "friends": [ - {"name": "Nik", "age": 18}, - {"name": "Kate", "age": 72}, - {"name": "Foma", "age": 50}, - {"name": "Jora", "age": 60} - ] - })", "$.friends[1 to 3, 0].age", {"72", "50", "60", "18"}}, - {R"({ - "range": { - "from": 1, - "to": 2 - }, - "friends": [ - {"name": "Nik", "age": 18}, - {"name": "Kate", "age": 72}, - {"name": "Foma", "age": 50}, - {"name": "Jora", "age": 60} - ] - })", "$.friends[$.range.from to $.range.to].age", {"72", "50"}}, - {R"({ - "range": { - "from": [1, 3, 4], - "to": {"key1": 1, "key2": 2, "key3": 3} - }, - "friends": [ - {"name": "Nik", "age": 18}, - {"name": "Kate", "age": 72}, - {"name": "Foma", "age": 50}, - {"name": "Jora", "age": 60} - ] - })", "$.friends[$.range.from[1] to $.range.to.key3].age", {"60"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestLastArrayIndex() { - const TVector<TMultiOutputTestCase> testCases = { - {R"([1, 2, 3])", "$[last]", {"3"}}, - {R"([1, 2, 3])", "$[1 to last]", {"2", "3"}}, - {R"([1, 2, 3])", "$[last to last]", {"3"}}, - {R"([1, 2, 3, 5, 6])", "$[1, last, last, 0, 2 to last, 3]", {"2", "6", "6", "1", "3", "5", "6", "5"}}, - {R"([ - [1, 2, 3, 4], - [5, 6, 7, 8] - ])", "$[*][last]", {"4", "8"}}, - {R"({ - "ranges": [ - {"from": 1, "to": 3}, - {"from": 0, "to": 1} - ], - "friends": [ - {"name": "Nik", "age": 18}, - {"name": "Kate", "age": 72}, - {"name": "Foma", "age": 50}, - {"name": "Jora", "age": 60} - ] - })", "$.friends[last, $.ranges[last].from to $.ranges[last].to, 2 to last].age", {"60", "18", "72", "50", "60"}}, - {R"({ - "ranges": [ - {"from": 1.23, "to": 3.75}, - {"from": 0.58, "to": 1.00001} - ], - "friends": [ - {"name": "Nik", "age": 18}, - {"name": "Kate", "age": 72}, - {"name": "Foma", "age": 50}, - {"name": "Jora", "age": 60} - ] - })", "$.friends[last, $.ranges[last].from to $.ranges[last].to, 2 to last].age", {"60", "18", "72", "50", "60"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestLastArrayIndexInvalid() { - const TVector<TRuntimeErrorTestCase> testCases = { - {R"({})", "last", C(TIssuesIds::JSONPATH_LAST_OUTSIDE_OF_ARRAY_SUBSCRIPT)}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); - } - } - } - - void TestNonIntegerArrayIndex() { - const TVector<TRuntimeErrorTestCase> testCases = { - {R"({ - "range": { - "from": [1, 3, 4], - "to": {"key1": 1, "key2": 2, "key3": 3} - }, - "friends": [1, 2, 3] - })", "$.friends[$.range.from[*] to $.range.to.*]", C(TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX)}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); - } - } - } - - void TestWildcardArrayAccess() { - const TVector<TMultiOutputTestCase> testCases = { - {R"([1, 2, 3])", "$[*]", {"1", "2", "3"}}, - {R"([[1], [2], [3, 4, 5]])", "$[*][*]", {"1", "2", "3", "4", "5"}}, - {R"({ - "friends": [ - {"name": "Nik", "age": 18}, - {"name": "Kate", "age": 72}, - {"name": "Foma", "age": 50}, - {"name": "Jora", "age": 60} - ] - })", "$.friends[*].age", {"18", "72", "50", "60"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestUnaryOperations() { - const TVector<TMultiOutputTestCase> testCases = { - {R"([])", "-3", {"-3"}}, - {R"([])", "+3", {"3"}}, - {R"(-1)", "-$", {"1"}}, - {R"(-1)", "+$", {"-1"}}, - {R"({ - "range": { - "from": -1, - "to": -2 - }, - "array": [1, 2, 3, 4] - })", "$.array[-$.range.from to -$.range.to]", {"2", "3"}}, - {R"({ - "range": { - "from": 1, - "to": -2 - }, - "array": [1, 2, 3, 4] - })", "$.array[+$.range.from to -$.range.to]", {"2", "3"}}, - {R"({ - "range": { - "from": -1, - "to": 2 - }, - "array": [1, 2, 3, 4] - })", "$.array[-$.range.from to +$.range.to]", {"2", "3"}}, - {R"({ - "range": { - "from": 1, - "to": 2 - }, - "array": [1, 2, 3, 4] - })", "$.array[+$.range.from to +$.range.to]", {"2", "3"}}, - {R"([1, 2, 3])", "-$[*]", {"-1", "-2", "-3"}}, - {"10000000000000000000000000", "-$", {"-1e+25"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestUnaryOperationsErrors() { - const TVector<TRuntimeErrorTestCase> testCases = { - {R"({})", "-$", C(TIssuesIds::JSONPATH_INVALID_UNARY_OPERATION_ARGUMENT_TYPE)}, - {R"([1, 2, [], 4])", "-$[*]", C(TIssuesIds::JSONPATH_INVALID_UNARY_OPERATION_ARGUMENT_TYPE)}, - {R"([1, 2, {}, 4])", "-$[*]", C(TIssuesIds::JSONPATH_INVALID_UNARY_OPERATION_ARGUMENT_TYPE)}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); - } - } - } - - void TestBinaryArithmeticOperations() { - const TVector<TMultiOutputTestCase> testCases = { - {"[]", "1 + 2", {"3"}}, - {"[]", "1 - 2", {"-1"}}, - {"[]", "10 * 5", {"50"}}, - {"[]", "10 / 5", {"2"}}, - {"[]", "13 % 5", {"3"}}, - - {"[]", "20 * 2 + 5", {"45"}}, - {"[]", "20 / 2 + 5", {"15"}}, - {"[]", "20 % 2 + 5", {"5"}}, - - {"[]", "20 * (2 + 5)", {"140"}}, - {"[]", "20 / (2 + 3)", {"4"}}, - {"[]", "20 % (2 + 5)", {"6"}}, - - {"[]", "5 / 2", {"2.5"}}, - {"[5.24 , 2.62]", "$[0] / $[1]", {"2"}}, - {"[5.24, 2.62]", "$[0] % $[1]", {"0"}}, - {"[3.753, 2.35]", "$[0] % $[1]", {"1.403"}}, - - {"[]", "- 1 + 1", {"0"}}, - {"[]", "+ 1 + 1", {"2"}}, - - {"[1, 2, 3, 4]", "$[last, last-1, last-2, last-3]", {"4", "3", "2", "1"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestBinaryArithmeticOperationsErrors() { - const TVector<TRuntimeErrorTestCase> testCases = { - {"[1, 2, 3]", "$[*] + 1", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT)}, - {"[1, 2, 3]", "1 + $[*]", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT)}, - {"[1, 2, 3]", "$[*] + $[*]", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT)}, - - {"[1, 2, 3]", "$ + 1", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT_TYPE)}, - {"[1, 2, 3]", "1 + $", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT_TYPE)}, - {"[1, 2, 3]", "$ + $", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT_TYPE)}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); - } - } - } - - void TestParseErrors() { - const TVector<TString> testCases = { - "strict", - "strict smth.key", - "strict $.", - "strict $.$key", - "strict $.28key", - "strict $.ke^y", + {"1", "\"\r\n\t\"", {"\"\\r\\n\\t\""}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestMemberAccess() { + const TVector<TMultiOutputTestCase> testCases = { + {R"({"key": 123, "another_key": 456})", "$.key", {"123"}}, + {R"({"key": 123, "_another_28_key_$_": 456})", "$._another_28_key_$_", {"456"}}, + {R"({"key": 123, "another_key": 456})", " $.another_key ", {"456"}}, + + {R"({"key": 123, "another_key": 456})", "$.'key'", {"123"}}, + {R"({"key": 123, "_another_28_key_$_": 456})", "$.'_another_28_key_$_'", {"456"}}, + {R"({"key": 123, "another_key": 456})", " $.'another_key' ", {"456"}}, + + {R"({"key": 123, "another_key": 456})", "$.\"key\"", {"123"}}, + {R"({"key": 123, "_another_28_key_$_": 456})", "$.\"_another_28_key_$_\"", {"456"}}, + {R"({"key": 123, "another_key": 456})", " $.\"another_key\" ", {"456"}}, + + {R"({"key": 123, "another key": 456})", "$.'another key'", {"456"}}, + {R"({"key": 123, "another key": 456})", "$.\"another key\"", {"456"}}, + + {R"({"key": 123, "прием отбой": 456})", "$.'прием отбой'", {"456"}}, + {R"({"key": 123, "прием отбой": 456})", "$.\"прием отбой\"", {"456"}}, + + {R"({"key": {"another": 456}})", "$.key.another", {"456"}}, + {R"({"key": {"another key": 456}})", "$.'key'.\"another key\"", {"456"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestWildcardMemberAccess() { + const TVector<TMultiOutputTestCase> testCases = { + {R"({ + "first": 12, + "second": 72 + })", "$.*", {"12", "72"}}, + {R"({ + "friends": { + "Nik": {"age": 18}, + "Kate": {"age": 72} + } + })", "$.friends.*.age", {"72", "18"}}, + {R"({ + "friends": { + "Nik": {"age": 18}, + "Kate": {"age": 72} + } + })", "$.*.*.*", {"72", "18"}}, + {R"({})", "$.*.key", {}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestArrayAccess() { + const TVector<TMultiOutputTestCase> testCases = { + {R"([1, 2, 3])", "$[0]", {"1"}}, + {R"([1, 2, 3, 4, 5, 6])", "$[0 to 2]", {"1", "2", "3"}}, + {R"([1, 2, 3, 4, 5, 6])", "$[5, 0 to 2, 0, 0, 3 to 5, 2]", {"6", "1", "2", "3", "1", "1", "4", "5", "6", "3"}}, + {R"({ + "friends": [ + {"name": "Nik", "age": 18}, + {"name": "Kate", "age": 72}, + {"name": "Foma", "age": 50}, + {"name": "Jora", "age": 60} + ] + })", "$.friends[1 to 3, 0].age", {"72", "50", "60", "18"}}, + {R"({ + "range": { + "from": 1, + "to": 2 + }, + "friends": [ + {"name": "Nik", "age": 18}, + {"name": "Kate", "age": 72}, + {"name": "Foma", "age": 50}, + {"name": "Jora", "age": 60} + ] + })", "$.friends[$.range.from to $.range.to].age", {"72", "50"}}, + {R"({ + "range": { + "from": [1, 3, 4], + "to": {"key1": 1, "key2": 2, "key3": 3} + }, + "friends": [ + {"name": "Nik", "age": 18}, + {"name": "Kate", "age": 72}, + {"name": "Foma", "age": 50}, + {"name": "Jora", "age": 60} + ] + })", "$.friends[$.range.from[1] to $.range.to.key3].age", {"60"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestLastArrayIndex() { + const TVector<TMultiOutputTestCase> testCases = { + {R"([1, 2, 3])", "$[last]", {"3"}}, + {R"([1, 2, 3])", "$[1 to last]", {"2", "3"}}, + {R"([1, 2, 3])", "$[last to last]", {"3"}}, + {R"([1, 2, 3, 5, 6])", "$[1, last, last, 0, 2 to last, 3]", {"2", "6", "6", "1", "3", "5", "6", "5"}}, + {R"([ + [1, 2, 3, 4], + [5, 6, 7, 8] + ])", "$[*][last]", {"4", "8"}}, + {R"({ + "ranges": [ + {"from": 1, "to": 3}, + {"from": 0, "to": 1} + ], + "friends": [ + {"name": "Nik", "age": 18}, + {"name": "Kate", "age": 72}, + {"name": "Foma", "age": 50}, + {"name": "Jora", "age": 60} + ] + })", "$.friends[last, $.ranges[last].from to $.ranges[last].to, 2 to last].age", {"60", "18", "72", "50", "60"}}, + {R"({ + "ranges": [ + {"from": 1.23, "to": 3.75}, + {"from": 0.58, "to": 1.00001} + ], + "friends": [ + {"name": "Nik", "age": 18}, + {"name": "Kate", "age": 72}, + {"name": "Foma", "age": 50}, + {"name": "Jora", "age": 60} + ] + })", "$.friends[last, $.ranges[last].from to $.ranges[last].to, 2 to last].age", {"60", "18", "72", "50", "60"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestLastArrayIndexInvalid() { + const TVector<TRuntimeErrorTestCase> testCases = { + {R"({})", "last", C(TIssuesIds::JSONPATH_LAST_OUTSIDE_OF_ARRAY_SUBSCRIPT)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestNonIntegerArrayIndex() { + const TVector<TRuntimeErrorTestCase> testCases = { + {R"({ + "range": { + "from": [1, 3, 4], + "to": {"key1": 1, "key2": 2, "key3": 3} + }, + "friends": [1, 2, 3] + })", "$.friends[$.range.from[*] to $.range.to.*]", C(TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestWildcardArrayAccess() { + const TVector<TMultiOutputTestCase> testCases = { + {R"([1, 2, 3])", "$[*]", {"1", "2", "3"}}, + {R"([[1], [2], [3, 4, 5]])", "$[*][*]", {"1", "2", "3", "4", "5"}}, + {R"({ + "friends": [ + {"name": "Nik", "age": 18}, + {"name": "Kate", "age": 72}, + {"name": "Foma", "age": 50}, + {"name": "Jora", "age": 60} + ] + })", "$.friends[*].age", {"18", "72", "50", "60"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestUnaryOperations() { + const TVector<TMultiOutputTestCase> testCases = { + {R"([])", "-3", {"-3"}}, + {R"([])", "+3", {"3"}}, + {R"(-1)", "-$", {"1"}}, + {R"(-1)", "+$", {"-1"}}, + {R"({ + "range": { + "from": -1, + "to": -2 + }, + "array": [1, 2, 3, 4] + })", "$.array[-$.range.from to -$.range.to]", {"2", "3"}}, + {R"({ + "range": { + "from": 1, + "to": -2 + }, + "array": [1, 2, 3, 4] + })", "$.array[+$.range.from to -$.range.to]", {"2", "3"}}, + {R"({ + "range": { + "from": -1, + "to": 2 + }, + "array": [1, 2, 3, 4] + })", "$.array[-$.range.from to +$.range.to]", {"2", "3"}}, + {R"({ + "range": { + "from": 1, + "to": 2 + }, + "array": [1, 2, 3, 4] + })", "$.array[+$.range.from to +$.range.to]", {"2", "3"}}, + {R"([1, 2, 3])", "-$[*]", {"-1", "-2", "-3"}}, + {"10000000000000000000000000", "-$", {"-1e+25"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestUnaryOperationsErrors() { + const TVector<TRuntimeErrorTestCase> testCases = { + {R"({})", "-$", C(TIssuesIds::JSONPATH_INVALID_UNARY_OPERATION_ARGUMENT_TYPE)}, + {R"([1, 2, [], 4])", "-$[*]", C(TIssuesIds::JSONPATH_INVALID_UNARY_OPERATION_ARGUMENT_TYPE)}, + {R"([1, 2, {}, 4])", "-$[*]", C(TIssuesIds::JSONPATH_INVALID_UNARY_OPERATION_ARGUMENT_TYPE)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestBinaryArithmeticOperations() { + const TVector<TMultiOutputTestCase> testCases = { + {"[]", "1 + 2", {"3"}}, + {"[]", "1 - 2", {"-1"}}, + {"[]", "10 * 5", {"50"}}, + {"[]", "10 / 5", {"2"}}, + {"[]", "13 % 5", {"3"}}, + + {"[]", "20 * 2 + 5", {"45"}}, + {"[]", "20 / 2 + 5", {"15"}}, + {"[]", "20 % 2 + 5", {"5"}}, + + {"[]", "20 * (2 + 5)", {"140"}}, + {"[]", "20 / (2 + 3)", {"4"}}, + {"[]", "20 % (2 + 5)", {"6"}}, + + {"[]", "5 / 2", {"2.5"}}, + {"[5.24 , 2.62]", "$[0] / $[1]", {"2"}}, + {"[5.24, 2.62]", "$[0] % $[1]", {"0"}}, + {"[3.753, 2.35]", "$[0] % $[1]", {"1.403"}}, + + {"[]", "- 1 + 1", {"0"}}, + {"[]", "+ 1 + 1", {"2"}}, + + {"[1, 2, 3, 4]", "$[last, last-1, last-2, last-3]", {"4", "3", "2", "1"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestBinaryArithmeticOperationsErrors() { + const TVector<TRuntimeErrorTestCase> testCases = { + {"[1, 2, 3]", "$[*] + 1", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT)}, + {"[1, 2, 3]", "1 + $[*]", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT)}, + {"[1, 2, 3]", "$[*] + $[*]", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT)}, + + {"[1, 2, 3]", "$ + 1", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT_TYPE)}, + {"[1, 2, 3]", "1 + $", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT_TYPE)}, + {"[1, 2, 3]", "$ + $", C(TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT_TYPE)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestParseErrors() { + const TVector<TString> testCases = { + "strict", + "strict smth.key", + "strict $.", + "strict $.$key", + "strict $.28key", + "strict $.ke^y", "strict $.привет", "strict $._пока_28_ключ_$_", " strict $.пока ", - "lax", - "lax smth.key", - "lax $.", - "lax $.$key", - "lax $.28key", - "lax $.ke^y", + "lax", + "lax smth.key", + "lax $.", + "lax $.$key", + "lax $.28key", + "lax $.ke^y", "lax $.привет", "lax $._пока_28_ключ_$_", " lax $.пока ", - "12.", - "12..3", - "12.3e", - "12.3e++1", - "12.3e--1", - "1e100000000000000000000000000000000", - "true || false", - "1 && (true == true)", - "!true", - "$[*] ? (@.active) . id", - "!(1 > 2).type()", - "(null) is unknown", - "(12 * 12) is unknown", - R"($ like_regex "[[[")", - R"($ like_regex "[0-9]+" flag "x")", - "$.first fjrfrfq fqijrhfqiwrjhfqrf qrfqr", - }; - - for (const auto& testCase : testCases) { - RunParseErrorTestCase(testCase); - } - } - - void TestVariables() { - TVector<TVariablesTestCase> testCases = { - {"123", {{"var", "456"}}, "$ + $var", {"579"}}, - {"123", {{"var", "456"}}, "$var", {"456"}}, - {"123", {{"var", R"({"key": [1, 2, 3, 4, 5]})"}}, "$var.key[2 to last]", {"3", "4", "5"}}, - {"123", {{"to", "1"}, {"strict", "2"}}, "$to + $strict", {"3"}}, - }; - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunVariablesTestCase(testCase.Json, testCase.Variables, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestDivisionByZero() { - const TVector<TRuntimeErrorTestCase> testCases = { - {"0", "1 / $", C(TIssuesIds::JSONPATH_DIVISION_BY_ZERO)}, - {"0.00000000000000000001", "1 / $", C(TIssuesIds::JSONPATH_DIVISION_BY_ZERO)}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); - } - } - } - - void TestInfinityResult() { - const double step = 1000000000; - double current = step; - TStringBuilder literal; - TStringBuilder query; - literal << '"' << step; - query << step; - while (!std::isinf(current)) { - query << " * " << step; - literal << "000000000"; - current *= step; - } - literal << '"'; - - const TVector<TRuntimeErrorTestCase> testCases = { - {"0", TString(query), C(TIssuesIds::JSONPATH_BINARY_OPERATION_RESULT_INFINITY)}, - {TString(literal), "$.double()", C(TIssuesIds::JSONPATH_INFINITE_NUMBER_STRING)}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); - } - } - } - - void TestLogicalOperations() { - const TVector<TMultiOutputTestCase> testCases = { - // JsonPath does not allow to use boolean literals in boolean operators. - // Here we use their replacements: - // 1. "(1 < true)" for "null" - // 2. "(true == true)" for "true" - // 3. "(true != true)" for "false" - {"1", "(1 < true) || (1 < true)", {"null"}}, - {"1", "(1 < true) || (true != true)", {"null"}}, - {"1", "(1 < true) || (true == true)", {"true"}}, - {"1", "(true != true) || (1 < true)", {"null"}}, - {"1", "(true != true) || (true != true)", {"false"}}, - {"1", "(true != true) || (true == true)", {"true"}}, - {"1", "(true == true) || (1 < true)", {"true"}}, - {"1", "(true == true) || (true != true)", {"true"}}, - {"1", "(true == true) || (true == true)", {"true"}}, - - {"1", "(1 < true) && (1 < true)", {"null"}}, - {"1", "(1 < true) && (true != true)", {"false"}}, - {"1", "(1 < true) && (true == true)", {"null"}}, - {"1", "(true != true) && (1 < true)", {"false"}}, - {"1", "(true != true) && (true != true)", {"false"}}, - {"1", "(true != true) && (true == true)", {"false"}}, - {"1", "(true == true) && (1 < true)", {"null"}}, - {"1", "(true == true) && (true != true)", {"false"}}, - {"1", "(true == true) && (true == true)", {"true"}}, - - {"1", "(true != true) && (true != true) || (true == true)", {"true"}}, - {"1", "(true != true) && ((true != true) || (true == true))", {"false"}}, - {"1", "(true != true) || (true != true) || (true == true)", {"true"}}, - {"1", "(true == true) && (true == true) && (true == true) && (true != true)", {"false"}}, - - {"1", "!(1 < true)", {"null"}}, - {"1", "!(true != true)", {"true"}}, - {"1", "!(true == true)", {"false"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestCompareOperations() { - const TVector<TString> operations = {"==", "<", "<=", ">", ">=", "!=", "<>"}; - // All compare operations between null and non-null operands are false - for (const auto& op : operations) { - RunTestCase("1", TStringBuilder() << "null " << op << " 1", {"false"}); - RunTestCase("1", TStringBuilder() << "1 " << op << " null", {"false"}); - } - - // If one of the operands is not scalar, comparison results to null - for (const auto& op : operations) { - RunTestCase("[[]]", TStringBuilder() << "$ " << op << " 1", {"null"}); - RunTestCase("[[]]", TStringBuilder() << "1 " << op << " $", {"null"}); - RunTestCase("[[]]", TStringBuilder() << "$ " << op << " $", {"null"}); - - RunTestCase("{}", TStringBuilder() << "$ " << op << " 1", {"null"}); - RunTestCase("{}", TStringBuilder() << "1 " << op << " $", {"null"}); - RunTestCase("{}", TStringBuilder() << "$ " << op << " $", {"null"}); - } - - // If both operands are null, only == is true - for (const auto& op : operations) { - const TString result = op == "==" ? "true" : "false"; - RunTestCase("1", TStringBuilder() << "null " << op << " null", {result}); - } - - const TVector<TMultiOutputTestCase> testCases = { - // Check comparison of numbers - {"1", "1.23 < 4.56", {"true"}}, - {"1", "1.23 > 4.56", {"false"}}, - {"1", "1.23 <= 4.56", {"true"}}, - {"1", "1.23 >= 4.56", {"false"}}, - {"1", "1.23 == 1.23", {"true"}}, - {"1", "1.23 != 1.23", {"false"}}, - {"1", "1.23 <> 4.56", {"true"}}, - {"1", "1.00000000000000000001 == 1.00000000000000000002", {"true"}}, - - // Check numbers of different kinds (int64 vs double) - {"1", "1 < 2.33", {"true"}}, - {"1", "1 > 4.56", {"false"}}, - {"1", "1 <= 4.56", {"true"}}, - {"1", "1 >= 4.56", {"false"}}, - {"1", "1 == 1.23", {"false"}}, - {"1", "1 != 1.23", {"true"}}, - {"1", "1 <> 4.56", {"true"}}, - - // Check comparison of strings - {"1", R"("abc" < "def")", {"true"}}, - {"1", R"("abc" > "def")", {"false"}}, - {"1", R"("abc" <= "def")", {"true"}}, - {"1", R"("abc" >= "def")", {"false"}}, - {"1", R"("abc" == "abc")", {"true"}}, - {"1", R"("abc" != "abc")", {"false"}}, - {"1", R"("abc" <> "def")", {"true"}}, - - // Check comparison of UTF8 strings - // First string is U+00e9 (LATIN SMALL LETTER E WITH ACUTE), "é" - // Second string is U+0065 (LATIN SMALL LETTER E) U+0301 (COMBINING ACUTE ACCENT), "é" + "12.", + "12..3", + "12.3e", + "12.3e++1", + "12.3e--1", + "1e100000000000000000000000000000000", + "true || false", + "1 && (true == true)", + "!true", + "$[*] ? (@.active) . id", + "!(1 > 2).type()", + "(null) is unknown", + "(12 * 12) is unknown", + R"($ like_regex "[[[")", + R"($ like_regex "[0-9]+" flag "x")", + "$.first fjrfrfq fqijrhfqiwrjhfqrf qrfqr", + }; + + for (const auto& testCase : testCases) { + RunParseErrorTestCase(testCase); + } + } + + void TestVariables() { + TVector<TVariablesTestCase> testCases = { + {"123", {{"var", "456"}}, "$ + $var", {"579"}}, + {"123", {{"var", "456"}}, "$var", {"456"}}, + {"123", {{"var", R"({"key": [1, 2, 3, 4, 5]})"}}, "$var.key[2 to last]", {"3", "4", "5"}}, + {"123", {{"to", "1"}, {"strict", "2"}}, "$to + $strict", {"3"}}, + }; + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunVariablesTestCase(testCase.Json, testCase.Variables, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestDivisionByZero() { + const TVector<TRuntimeErrorTestCase> testCases = { + {"0", "1 / $", C(TIssuesIds::JSONPATH_DIVISION_BY_ZERO)}, + {"0.00000000000000000001", "1 / $", C(TIssuesIds::JSONPATH_DIVISION_BY_ZERO)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestInfinityResult() { + const double step = 1000000000; + double current = step; + TStringBuilder literal; + TStringBuilder query; + literal << '"' << step; + query << step; + while (!std::isinf(current)) { + query << " * " << step; + literal << "000000000"; + current *= step; + } + literal << '"'; + + const TVector<TRuntimeErrorTestCase> testCases = { + {"0", TString(query), C(TIssuesIds::JSONPATH_BINARY_OPERATION_RESULT_INFINITY)}, + {TString(literal), "$.double()", C(TIssuesIds::JSONPATH_INFINITE_NUMBER_STRING)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestLogicalOperations() { + const TVector<TMultiOutputTestCase> testCases = { + // JsonPath does not allow to use boolean literals in boolean operators. + // Here we use their replacements: + // 1. "(1 < true)" for "null" + // 2. "(true == true)" for "true" + // 3. "(true != true)" for "false" + {"1", "(1 < true) || (1 < true)", {"null"}}, + {"1", "(1 < true) || (true != true)", {"null"}}, + {"1", "(1 < true) || (true == true)", {"true"}}, + {"1", "(true != true) || (1 < true)", {"null"}}, + {"1", "(true != true) || (true != true)", {"false"}}, + {"1", "(true != true) || (true == true)", {"true"}}, + {"1", "(true == true) || (1 < true)", {"true"}}, + {"1", "(true == true) || (true != true)", {"true"}}, + {"1", "(true == true) || (true == true)", {"true"}}, + + {"1", "(1 < true) && (1 < true)", {"null"}}, + {"1", "(1 < true) && (true != true)", {"false"}}, + {"1", "(1 < true) && (true == true)", {"null"}}, + {"1", "(true != true) && (1 < true)", {"false"}}, + {"1", "(true != true) && (true != true)", {"false"}}, + {"1", "(true != true) && (true == true)", {"false"}}, + {"1", "(true == true) && (1 < true)", {"null"}}, + {"1", "(true == true) && (true != true)", {"false"}}, + {"1", "(true == true) && (true == true)", {"true"}}, + + {"1", "(true != true) && (true != true) || (true == true)", {"true"}}, + {"1", "(true != true) && ((true != true) || (true == true))", {"false"}}, + {"1", "(true != true) || (true != true) || (true == true)", {"true"}}, + {"1", "(true == true) && (true == true) && (true == true) && (true != true)", {"false"}}, + + {"1", "!(1 < true)", {"null"}}, + {"1", "!(true != true)", {"true"}}, + {"1", "!(true == true)", {"false"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestCompareOperations() { + const TVector<TString> operations = {"==", "<", "<=", ">", ">=", "!=", "<>"}; + // All compare operations between null and non-null operands are false + for (const auto& op : operations) { + RunTestCase("1", TStringBuilder() << "null " << op << " 1", {"false"}); + RunTestCase("1", TStringBuilder() << "1 " << op << " null", {"false"}); + } + + // If one of the operands is not scalar, comparison results to null + for (const auto& op : operations) { + RunTestCase("[[]]", TStringBuilder() << "$ " << op << " 1", {"null"}); + RunTestCase("[[]]", TStringBuilder() << "1 " << op << " $", {"null"}); + RunTestCase("[[]]", TStringBuilder() << "$ " << op << " $", {"null"}); + + RunTestCase("{}", TStringBuilder() << "$ " << op << " 1", {"null"}); + RunTestCase("{}", TStringBuilder() << "1 " << op << " $", {"null"}); + RunTestCase("{}", TStringBuilder() << "$ " << op << " $", {"null"}); + } + + // If both operands are null, only == is true + for (const auto& op : operations) { + const TString result = op == "==" ? "true" : "false"; + RunTestCase("1", TStringBuilder() << "null " << op << " null", {result}); + } + + const TVector<TMultiOutputTestCase> testCases = { + // Check comparison of numbers + {"1", "1.23 < 4.56", {"true"}}, + {"1", "1.23 > 4.56", {"false"}}, + {"1", "1.23 <= 4.56", {"true"}}, + {"1", "1.23 >= 4.56", {"false"}}, + {"1", "1.23 == 1.23", {"true"}}, + {"1", "1.23 != 1.23", {"false"}}, + {"1", "1.23 <> 4.56", {"true"}}, + {"1", "1.00000000000000000001 == 1.00000000000000000002", {"true"}}, + + // Check numbers of different kinds (int64 vs double) + {"1", "1 < 2.33", {"true"}}, + {"1", "1 > 4.56", {"false"}}, + {"1", "1 <= 4.56", {"true"}}, + {"1", "1 >= 4.56", {"false"}}, + {"1", "1 == 1.23", {"false"}}, + {"1", "1 != 1.23", {"true"}}, + {"1", "1 <> 4.56", {"true"}}, + + // Check comparison of strings + {"1", R"("abc" < "def")", {"true"}}, + {"1", R"("abc" > "def")", {"false"}}, + {"1", R"("abc" <= "def")", {"true"}}, + {"1", R"("abc" >= "def")", {"false"}}, + {"1", R"("abc" == "abc")", {"true"}}, + {"1", R"("abc" != "abc")", {"false"}}, + {"1", R"("abc" <> "def")", {"true"}}, + + // Check comparison of UTF8 strings + // First string is U+00e9 (LATIN SMALL LETTER E WITH ACUTE), "é" + // Second string is U+0065 (LATIN SMALL LETTER E) U+0301 (COMBINING ACUTE ACCENT), "é" {"1", R"("é" < "é")", {"false"}}, {"1", R"("é" > "é")", {"true"}}, {"1", R"("привет" == "привет")", {"true"}}, - - // Check cross-product comparison - {R"({ - "left": [1], - "right": [4, 5, 6] - })", "$.left[*] < $.right[*]", {"true"}}, - {R"({ - "left": [4, 5, 6], - "right": [1] - })", "$.left[*] < $.right[*]", {"false"}}, - {R"({ - "left": [1, 2, 3], - "right": [4, 5, 6] - })", "$.left[*] < $.right[*]", {"true"}}, - {R"({ - "left": [10, 30, 40], - "right": [1, 2, 15] - })", "$.left[*] < $.right[*]", {"true"}}, - {R"({ - "left": [10, 30, 40], - "right": [1, 2, 3] - })", "$.left[*] < $.right[*]", {"false"}}, - - // Check incomparable types - {"1", "1 < true", {"null"}}, - {"1", R"(true <> "def")", {"null"}}, - - // Check error in arguments - {R"({ - "array": [1, 2, 3, 4, 5], - "invalid_index": { - "key": 1 - } - })", "$.array[$.invalid_index] < 3", {"null"}}, - {R"({ - "array": [1, 2, 3, 4, 5], - "invalid_index": { - "key": 1 - } - })", "5 >= $.array[$.invalid_index]", {"null"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestFilter() { - const TVector<TMultiOutputTestCase> testCases = { - {"[1, 2, 3]", "$[*] ? (@ > 2)", {"3"}}, - {R"([ - {"age": 18}, - {"age": 25}, - {"age": 50}, - {"age": 5} - ])", "$[*] ? (@.age >= 18)", {R"({"age":18})", R"({"age":25})", R"({"age":50})"}}, - {R"([ - {"age": 18}, - {"age": 25}, - {"age": 50}, - {"age": 5} - ])", "$[*] ? (@.age >= 18) ? (@.age <= 30)", {R"({"age":18})", R"({"age":25})"}}, - {R"([ - {"age": 18}, - {"age": 25}, - {"age": 50}, - {"age": 5} - ])", "$[*] ? (@.age >= 18) ? (@.age <= 30) . age", {"18", "25"}}, - {R"([ - {"age": 18}, - {"age": 25}, - {"age": 50}, - {"age": 5} - ])", "$[*] ? (@.age >= 18 && @.age <= 30) . age", {"18", "25"}}, - {R"([ - {"age": 18}, - {"age": 25}, - {"age": 50}, - {"age": 5} - ])", "$[*] ? (@.age >= 18 || @.age <= 30) . age", {"18", "25", "50", "5"}}, - {R"([ - { - "id": 1, - "is_valid": false, - "days_till_doom": 11, - "age_estimation": 4 - }, - { - "id": 2, - "is_valid": true, - "days_till_doom": 5, - "age_estimation": 3 - }, - { - "id": 3, - "is_valid": true, - "days_till_doom": 20, - "age_estimation": 10 - }, - { - "id": 4, - "is_valid": true, - "days_till_doom": 30, - "age_estimation": 2 - } - ])", "$[*] ? (@.is_valid == true && @.days_till_doom > 10 && 2 * @.age_estimation <= 12).id", {"4"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestFilterInvalid() { - const TVector<TRuntimeErrorTestCase> testCases = { - {R"({})", "@", C(TIssuesIds::JSONPATH_FILTER_OBJECT_OUTSIDE_OF_FILTER)}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); - } - } - } - - void TestNumericMethods() { - const TVector<TMultiOutputTestCase> testCases = { - {"[-1.23, 4.56, 3, 0]", "$[*].abs()", {"1.23", "4.56", "3", "0"}}, - {"[-1.23, 4.56, 3, 0]", "$[*].floor()", {"-2", "4", "3", "0"}}, - {"[-1.23, 4.56, 3, 0]", "$[*].ceiling()", {"-1", "5", "3", "0"}}, - {"-123.45", "$.ceiling().abs().floor()", {"123"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestNumericMethodsErrors() { - const TVector<TRuntimeErrorTestCase> testCases = { - {R"(["1", true, null])", "$[*].abs()", C(TIssuesIds::JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT)}, - {R"(["1", true, null])", "$[*].floor()", C(TIssuesIds::JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT)}, - {R"(["1", true, null])", "$[*].ceiling()", C(TIssuesIds::JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT)}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); - } - } - } - - void TestDoubleMethod() { - const TVector<TMultiOutputTestCase> testCases = { - {R"([ - "123", "123.4", "0.567", "1234e-1", "567e-3", "123.4e-1", - "123e3", "123e+3", "1.23e+1", "1.23e1", - "12e0", "12.3e0", "0", "0.0", "0.0e0" - ])", "$[*].double()", { - "123", "123.4", "0.567", "123.4", "0.567", "12.34", - "123000", "123000", "12.3", "12.3", - "12", "12.3", "0", "0", "0", - }}, - {R"("-123.45e1")", "$.double().abs().floor()", {"1234"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestDoubleMethodErrors() { - const TVector<TRuntimeErrorTestCase> testCases = { - {R"(["1", true, null])", "$[*].double()", C(TIssuesIds::JSONPATH_INVALID_DOUBLE_METHOD_ARGUMENT)}, - {R"("hi stranger")", "$.double()", C(TIssuesIds::JSONPATH_INVALID_NUMBER_STRING)}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); - } - } - } - - void TestTypeMethod() { - const TVector<TMultiOutputTestCase> testCases = { - {"null", "$.type()", {"\"null\""}}, - {"true", "$.type()", {"\"boolean\""}}, - {"false", "$.type()", {"\"boolean\""}}, - {"1", "$.type()", {"\"number\""}}, - {"-1", "$.type()", {"\"number\""}}, - {"4.56", "$.type()", {"\"number\""}}, - {"-4.56", "$.type()", {"\"number\""}}, - {"\"some string\"", "$.type()", {"\"string\""}}, - {"[]", "$.type()", {"\"array\""}}, - {"[1, 2, 3, 4]", "$.type()", {"\"array\""}}, - {"{}", "$.type()", {"\"object\""}}, - {"{\"key\": 123}", "$.type()", {"\"object\""}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestSizeMethod() { - const TVector<TMultiOutputTestCase> testCases = { - {"null", "$.size()", {"1"}}, - {"true", "$.size()", {"1"}}, - {"false", "$.size()", {"1"}}, - {"1", "$.size()", {"1"}}, - {"-1", "$.size()", {"1"}}, - {"4.56", "$.size()", {"1"}}, - {"-4.56", "$.size()", {"1"}}, - {"\"some string\"", "$.size()", {"1"}}, - {"[]", "$.size()", {"0"}}, - {"[1, 2, 3, 4]", "$.size()", {"4"}}, - {"{}", "$.size()", {"1"}}, - {"{\"key\": 123}", "$.size()", {"1"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestKeyValueMethod() { - const TVector<TMultiOutputTestCase> testCases = { - {R"({ - "one": 1, - "two": 2, - "three": 3 - })", "$.keyvalue()", { - R"({"name":"one","value":1})", - R"({"name":"three","value":3})", - R"({"name":"two","value":2})", - }}, - {R"({ - "one": "string", - "two": [1, 2, 3, 4], - "three": [4, 5] - })", R"($.keyvalue() ? (@.value.type() == "array" && @.value.size() > 2).name)", {"\"two\""}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestKeyValueMethodErrors() { - const TVector<TRuntimeErrorTestCase> testCases = { - {"\"string\"", "$.keyvalue()", C(TIssuesIds::JSONPATH_INVALID_KEYVALUE_METHOD_ARGUMENT)}, - {"[1, 2, 3, 4]", "$.keyvalue()", C(TIssuesIds::JSONPATH_INVALID_KEYVALUE_METHOD_ARGUMENT)}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); - } - } - } - - void TestStartsWithPredicate() { - const TVector<TMultiOutputTestCase> testCases = { - {"1", R"("some string" starts with "some")", {"true"}}, - {"1", R"("some string" starts with "string")", {"false"}}, - {R"(["some string", "string"])", R"($[*] ? (@ starts with "string"))", {"\"string\""}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestStartsWithPredicateErrors() { - const TVector<TRuntimeErrorTestCase> testCases = { - {R"(["first", "second"])", R"($[*] starts with "first")", C(TIssuesIds::JSONPATH_INVALID_STARTS_WITH_ARGUMENT)}, - {"1", R"(1 starts with "string")", C(TIssuesIds::JSONPATH_INVALID_STARTS_WITH_ARGUMENT)}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); - } - } - } - - void TestExistsPredicate() { - const TVector<TMultiOutputTestCase> testCases = { - {R"({ - "key": 123 - })", "exists ($.key)", {"true"}}, - {"\"string\"", "exists ($ * 2)", {"null"}}, - {R"(["some string", 2])", "$[*] ? (exists (@ * 2))", {"2"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestIsUnknownPredicate() { - const TVector<TMultiOutputTestCase> testCases = { - {"1", "(1 < true) is unknown", {"true"}}, - {"1", "(true == true) is unknown", {"false"}}, - {"1", "(true == false) is unknown", {"false"}}, - {R"(["some string", -20])", "$[*] ? ((1 < @) is unknown)", {"\"some string\""}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestLikeRegexPredicate() { - const TVector<TMultiOutputTestCase> testCases = { - {R"(["string", "123", "456"])", R"($[*] like_regex "[0-9]+")", {"true"}}, - {R"(["string", "another string"])", R"($[*] like_regex "[0-9]+")", {"false"}}, - - // Case insensitive flag - {R"("AbCd")", R"($ like_regex "abcd")", {"false"}}, - {R"("AbCd")", R"($ like_regex "abcd" flag "i")", {"true"}}, - - {R"(["string", "123", "456"])", R"($[*] ? (@ like_regex "[0-9]+"))", {"\"123\"", "\"456\""}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : ALL_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } -}; - -UNIT_TEST_SUITE_REGISTRATION(TJsonPathCommonTest); + + // Check cross-product comparison + {R"({ + "left": [1], + "right": [4, 5, 6] + })", "$.left[*] < $.right[*]", {"true"}}, + {R"({ + "left": [4, 5, 6], + "right": [1] + })", "$.left[*] < $.right[*]", {"false"}}, + {R"({ + "left": [1, 2, 3], + "right": [4, 5, 6] + })", "$.left[*] < $.right[*]", {"true"}}, + {R"({ + "left": [10, 30, 40], + "right": [1, 2, 15] + })", "$.left[*] < $.right[*]", {"true"}}, + {R"({ + "left": [10, 30, 40], + "right": [1, 2, 3] + })", "$.left[*] < $.right[*]", {"false"}}, + + // Check incomparable types + {"1", "1 < true", {"null"}}, + {"1", R"(true <> "def")", {"null"}}, + + // Check error in arguments + {R"({ + "array": [1, 2, 3, 4, 5], + "invalid_index": { + "key": 1 + } + })", "$.array[$.invalid_index] < 3", {"null"}}, + {R"({ + "array": [1, 2, 3, 4, 5], + "invalid_index": { + "key": 1 + } + })", "5 >= $.array[$.invalid_index]", {"null"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestFilter() { + const TVector<TMultiOutputTestCase> testCases = { + {"[1, 2, 3]", "$[*] ? (@ > 2)", {"3"}}, + {R"([ + {"age": 18}, + {"age": 25}, + {"age": 50}, + {"age": 5} + ])", "$[*] ? (@.age >= 18)", {R"({"age":18})", R"({"age":25})", R"({"age":50})"}}, + {R"([ + {"age": 18}, + {"age": 25}, + {"age": 50}, + {"age": 5} + ])", "$[*] ? (@.age >= 18) ? (@.age <= 30)", {R"({"age":18})", R"({"age":25})"}}, + {R"([ + {"age": 18}, + {"age": 25}, + {"age": 50}, + {"age": 5} + ])", "$[*] ? (@.age >= 18) ? (@.age <= 30) . age", {"18", "25"}}, + {R"([ + {"age": 18}, + {"age": 25}, + {"age": 50}, + {"age": 5} + ])", "$[*] ? (@.age >= 18 && @.age <= 30) . age", {"18", "25"}}, + {R"([ + {"age": 18}, + {"age": 25}, + {"age": 50}, + {"age": 5} + ])", "$[*] ? (@.age >= 18 || @.age <= 30) . age", {"18", "25", "50", "5"}}, + {R"([ + { + "id": 1, + "is_valid": false, + "days_till_doom": 11, + "age_estimation": 4 + }, + { + "id": 2, + "is_valid": true, + "days_till_doom": 5, + "age_estimation": 3 + }, + { + "id": 3, + "is_valid": true, + "days_till_doom": 20, + "age_estimation": 10 + }, + { + "id": 4, + "is_valid": true, + "days_till_doom": 30, + "age_estimation": 2 + } + ])", "$[*] ? (@.is_valid == true && @.days_till_doom > 10 && 2 * @.age_estimation <= 12).id", {"4"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestFilterInvalid() { + const TVector<TRuntimeErrorTestCase> testCases = { + {R"({})", "@", C(TIssuesIds::JSONPATH_FILTER_OBJECT_OUTSIDE_OF_FILTER)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestNumericMethods() { + const TVector<TMultiOutputTestCase> testCases = { + {"[-1.23, 4.56, 3, 0]", "$[*].abs()", {"1.23", "4.56", "3", "0"}}, + {"[-1.23, 4.56, 3, 0]", "$[*].floor()", {"-2", "4", "3", "0"}}, + {"[-1.23, 4.56, 3, 0]", "$[*].ceiling()", {"-1", "5", "3", "0"}}, + {"-123.45", "$.ceiling().abs().floor()", {"123"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestNumericMethodsErrors() { + const TVector<TRuntimeErrorTestCase> testCases = { + {R"(["1", true, null])", "$[*].abs()", C(TIssuesIds::JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT)}, + {R"(["1", true, null])", "$[*].floor()", C(TIssuesIds::JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT)}, + {R"(["1", true, null])", "$[*].ceiling()", C(TIssuesIds::JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestDoubleMethod() { + const TVector<TMultiOutputTestCase> testCases = { + {R"([ + "123", "123.4", "0.567", "1234e-1", "567e-3", "123.4e-1", + "123e3", "123e+3", "1.23e+1", "1.23e1", + "12e0", "12.3e0", "0", "0.0", "0.0e0" + ])", "$[*].double()", { + "123", "123.4", "0.567", "123.4", "0.567", "12.34", + "123000", "123000", "12.3", "12.3", + "12", "12.3", "0", "0", "0", + }}, + {R"("-123.45e1")", "$.double().abs().floor()", {"1234"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestDoubleMethodErrors() { + const TVector<TRuntimeErrorTestCase> testCases = { + {R"(["1", true, null])", "$[*].double()", C(TIssuesIds::JSONPATH_INVALID_DOUBLE_METHOD_ARGUMENT)}, + {R"("hi stranger")", "$.double()", C(TIssuesIds::JSONPATH_INVALID_NUMBER_STRING)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestTypeMethod() { + const TVector<TMultiOutputTestCase> testCases = { + {"null", "$.type()", {"\"null\""}}, + {"true", "$.type()", {"\"boolean\""}}, + {"false", "$.type()", {"\"boolean\""}}, + {"1", "$.type()", {"\"number\""}}, + {"-1", "$.type()", {"\"number\""}}, + {"4.56", "$.type()", {"\"number\""}}, + {"-4.56", "$.type()", {"\"number\""}}, + {"\"some string\"", "$.type()", {"\"string\""}}, + {"[]", "$.type()", {"\"array\""}}, + {"[1, 2, 3, 4]", "$.type()", {"\"array\""}}, + {"{}", "$.type()", {"\"object\""}}, + {"{\"key\": 123}", "$.type()", {"\"object\""}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestSizeMethod() { + const TVector<TMultiOutputTestCase> testCases = { + {"null", "$.size()", {"1"}}, + {"true", "$.size()", {"1"}}, + {"false", "$.size()", {"1"}}, + {"1", "$.size()", {"1"}}, + {"-1", "$.size()", {"1"}}, + {"4.56", "$.size()", {"1"}}, + {"-4.56", "$.size()", {"1"}}, + {"\"some string\"", "$.size()", {"1"}}, + {"[]", "$.size()", {"0"}}, + {"[1, 2, 3, 4]", "$.size()", {"4"}}, + {"{}", "$.size()", {"1"}}, + {"{\"key\": 123}", "$.size()", {"1"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestKeyValueMethod() { + const TVector<TMultiOutputTestCase> testCases = { + {R"({ + "one": 1, + "two": 2, + "three": 3 + })", "$.keyvalue()", { + R"({"name":"one","value":1})", + R"({"name":"three","value":3})", + R"({"name":"two","value":2})", + }}, + {R"({ + "one": "string", + "two": [1, 2, 3, 4], + "three": [4, 5] + })", R"($.keyvalue() ? (@.value.type() == "array" && @.value.size() > 2).name)", {"\"two\""}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestKeyValueMethodErrors() { + const TVector<TRuntimeErrorTestCase> testCases = { + {"\"string\"", "$.keyvalue()", C(TIssuesIds::JSONPATH_INVALID_KEYVALUE_METHOD_ARGUMENT)}, + {"[1, 2, 3, 4]", "$.keyvalue()", C(TIssuesIds::JSONPATH_INVALID_KEYVALUE_METHOD_ARGUMENT)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestStartsWithPredicate() { + const TVector<TMultiOutputTestCase> testCases = { + {"1", R"("some string" starts with "some")", {"true"}}, + {"1", R"("some string" starts with "string")", {"false"}}, + {R"(["some string", "string"])", R"($[*] ? (@ starts with "string"))", {"\"string\""}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestStartsWithPredicateErrors() { + const TVector<TRuntimeErrorTestCase> testCases = { + {R"(["first", "second"])", R"($[*] starts with "first")", C(TIssuesIds::JSONPATH_INVALID_STARTS_WITH_ARGUMENT)}, + {"1", R"(1 starts with "string")", C(TIssuesIds::JSONPATH_INVALID_STARTS_WITH_ARGUMENT)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestExistsPredicate() { + const TVector<TMultiOutputTestCase> testCases = { + {R"({ + "key": 123 + })", "exists ($.key)", {"true"}}, + {"\"string\"", "exists ($ * 2)", {"null"}}, + {R"(["some string", 2])", "$[*] ? (exists (@ * 2))", {"2"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestIsUnknownPredicate() { + const TVector<TMultiOutputTestCase> testCases = { + {"1", "(1 < true) is unknown", {"true"}}, + {"1", "(true == true) is unknown", {"false"}}, + {"1", "(true == false) is unknown", {"false"}}, + {R"(["some string", -20])", "$[*] ? ((1 < @) is unknown)", {"\"some string\""}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestLikeRegexPredicate() { + const TVector<TMultiOutputTestCase> testCases = { + {R"(["string", "123", "456"])", R"($[*] like_regex "[0-9]+")", {"true"}}, + {R"(["string", "another string"])", R"($[*] like_regex "[0-9]+")", {"false"}}, + + // Case insensitive flag + {R"("AbCd")", R"($ like_regex "abcd")", {"false"}}, + {R"("AbCd")", R"($ like_regex "abcd" flag "i")", {"true"}}, + + {R"(["string", "123", "456"])", R"($[*] ? (@ like_regex "[0-9]+"))", {"\"123\"", "\"456\""}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TJsonPathCommonTest); diff --git a/ydb/library/yql/minikql/jsonpath/ut/examples_ut.cpp b/ydb/library/yql/minikql/jsonpath/ut/examples_ut.cpp index 3b964e28b5..78856b86b4 100644 --- a/ydb/library/yql/minikql/jsonpath/ut/examples_ut.cpp +++ b/ydb/library/yql/minikql/jsonpath/ut/examples_ut.cpp @@ -1,81 +1,81 @@ -#include "test_base.h" - -/* - These examples are taken from [ISO/IEC TR 19075-6:2017] standard (https://www.iso.org/standard/67367.html) -*/ - -class TJsonPathExamplesTest : public TJsonPathTestBase { -public: - TJsonPathExamplesTest() - : TJsonPathTestBase() - { - } - - UNIT_TEST_SUITE(TJsonPathExamplesTest); - UNIT_TEST(TestMemberAccessExamples); - UNIT_TEST(TestElementAccessExamples); - UNIT_TEST(TestFilterExamples); - UNIT_TEST_SUITE_END(); - - void TestMemberAccessExamples() { - TString input = R"({ - "phones": [ - {"type": "cell", "number": "abc-defg"}, - {"number": "pqr-wxyz"}, - {"type": "home", "number": "hij-klmn"} - ] - })"; - - RunTestCase(input, "lax $.phones.type", {"\"cell\"", "\"home\""}); - RunRuntimeErrorTestCase(input, "strict $.phones[*].type", C(TIssuesIds::JSONPATH_MEMBER_NOT_FOUND)); - // NOTE: Example in standard has different order of elements. This is okay because order of elements after - // wildcard member access is implementation-defined - RunTestCase(input, "lax $.phones.*", {"\"abc-defg\"", "\"cell\"", "\"pqr-wxyz\"", "\"hij-klmn\"", "\"home\""}); - } - - void TestElementAccessExamples() { - // NOTE: Example in standard has different order of elements. This is okay because order of elements after - // wildcard member access is implementation-defined - RunTestCase(R"({ - "sensors": { - "SF": [10, 11, 12, 13, 15, 16, 17], - "FC": [20, 22, 24], - "SJ": [30, 33] - } - })", "lax $.sensors.*[0, last, 2]", {"20", "24", "24", "10", "17", "12", "30", "33"}); - - RunTestCase(R"({ - "x": [12, 30], - "y": [8], - "z": ["a", "b", "c"] - })", "lax $.*[1 to last]", {"30", "\"b\"", "\"c\""}); - } - - void TestFilterExamples() { - RunParseErrorTestCase("$ ? (@.skilled)"); - - TString json = R"({"name":"Portia","skilled":true})"; - RunTestCase(json, "$ ? (@.skilled == true)", {json}); - - // Standard also mentions this example in lax mode. It is invalid because - // in this case automatic unwrapping on arrays before filters will be performed - // and query will finish with error - RunTestCase(R"({ - "x": [1, "one"] - })", "strict $.x ? (2 > @[*])", {}); - - RunTestCase(R"({ - "name": { - "first": "Manny", - "last": "Moe" - }, - "points": 123 - })", "strict $ ? (exists (@.name)).name", {R"({"first":"Manny","last":"Moe"})"}); - - RunTestCase(R"({ - "points": 41 - })", "strict $ ? (exists (@.name)).name", {}); - } -}; - -UNIT_TEST_SUITE_REGISTRATION(TJsonPathExamplesTest);
\ No newline at end of file +#include "test_base.h" + +/* + These examples are taken from [ISO/IEC TR 19075-6:2017] standard (https://www.iso.org/standard/67367.html) +*/ + +class TJsonPathExamplesTest : public TJsonPathTestBase { +public: + TJsonPathExamplesTest() + : TJsonPathTestBase() + { + } + + UNIT_TEST_SUITE(TJsonPathExamplesTest); + UNIT_TEST(TestMemberAccessExamples); + UNIT_TEST(TestElementAccessExamples); + UNIT_TEST(TestFilterExamples); + UNIT_TEST_SUITE_END(); + + void TestMemberAccessExamples() { + TString input = R"({ + "phones": [ + {"type": "cell", "number": "abc-defg"}, + {"number": "pqr-wxyz"}, + {"type": "home", "number": "hij-klmn"} + ] + })"; + + RunTestCase(input, "lax $.phones.type", {"\"cell\"", "\"home\""}); + RunRuntimeErrorTestCase(input, "strict $.phones[*].type", C(TIssuesIds::JSONPATH_MEMBER_NOT_FOUND)); + // NOTE: Example in standard has different order of elements. This is okay because order of elements after + // wildcard member access is implementation-defined + RunTestCase(input, "lax $.phones.*", {"\"abc-defg\"", "\"cell\"", "\"pqr-wxyz\"", "\"hij-klmn\"", "\"home\""}); + } + + void TestElementAccessExamples() { + // NOTE: Example in standard has different order of elements. This is okay because order of elements after + // wildcard member access is implementation-defined + RunTestCase(R"({ + "sensors": { + "SF": [10, 11, 12, 13, 15, 16, 17], + "FC": [20, 22, 24], + "SJ": [30, 33] + } + })", "lax $.sensors.*[0, last, 2]", {"20", "24", "24", "10", "17", "12", "30", "33"}); + + RunTestCase(R"({ + "x": [12, 30], + "y": [8], + "z": ["a", "b", "c"] + })", "lax $.*[1 to last]", {"30", "\"b\"", "\"c\""}); + } + + void TestFilterExamples() { + RunParseErrorTestCase("$ ? (@.skilled)"); + + TString json = R"({"name":"Portia","skilled":true})"; + RunTestCase(json, "$ ? (@.skilled == true)", {json}); + + // Standard also mentions this example in lax mode. It is invalid because + // in this case automatic unwrapping on arrays before filters will be performed + // and query will finish with error + RunTestCase(R"({ + "x": [1, "one"] + })", "strict $.x ? (2 > @[*])", {}); + + RunTestCase(R"({ + "name": { + "first": "Manny", + "last": "Moe" + }, + "points": 123 + })", "strict $ ? (exists (@.name)).name", {R"({"first":"Manny","last":"Moe"})"}); + + RunTestCase(R"({ + "points": 41 + })", "strict $ ? (exists (@.name)).name", {}); + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TJsonPathExamplesTest);
\ No newline at end of file diff --git a/ydb/library/yql/minikql/jsonpath/ut/lax_ut.cpp b/ydb/library/yql/minikql/jsonpath/ut/lax_ut.cpp index 4d5dda83ac..5efd95a2c4 100644 --- a/ydb/library/yql/minikql/jsonpath/ut/lax_ut.cpp +++ b/ydb/library/yql/minikql/jsonpath/ut/lax_ut.cpp @@ -1,283 +1,283 @@ -#include "test_base.h" - -class TJsonPathLaxTest : public TJsonPathTestBase { -public: - TJsonPathLaxTest() - : TJsonPathTestBase() - { - } - - UNIT_TEST_SUITE(TJsonPathLaxTest); - UNIT_TEST(TestArrayUnwrap); - UNIT_TEST(TestArrayWrap); - UNIT_TEST(TestInvalidArrayIndices); - UNIT_TEST(TestStructuralErrorsHandling); - UNIT_TEST(TestCompareOperations); - UNIT_TEST(TestFilter); - UNIT_TEST(TestNumericMethods); - UNIT_TEST(TestDoubleMethod); - UNIT_TEST(TestKeyValueMethod); - UNIT_TEST(TestExistsPredicate); - UNIT_TEST(TestLikeRegexPredicate); - UNIT_TEST(TestStartsWithPredicate); - UNIT_TEST_SUITE_END(); - - void TestArrayUnwrap() { - const TVector<TMultiOutputTestCase> testCases = { - {R"([ - {"key": 1}, - {"key": 2} - ])", "$.key", {"1", "2"}}, - {R"([ - {"key": 1}, - {"key": 2} - ])", "$.*", {"1", "2"}}, - {R"({ - "first": {"key": 1}, - "second": [] - })", "$.*.key", {"1"}}, - {R"({ - "first": {"key": 1}, - "second": [] - })", "$.*.*", {"1"}}, - {R"({"another_key": 123})", "$.key", {}}, - {R"([ - {"key": [{"nested": 28}]}, - {"key": [{"nested": 29}]} - ])", "$.key.nested", {"28", "29"}}, - {R"([ - {"key": [{"nested": 28}]}, - {"key": [{"nested": 29}]} - ])", "$.*.*", {"28", "29"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : LAX_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestArrayWrap() { - const TVector<TMultiOutputTestCase> testCases = { - {R"([1, 2])", "$[*][0]", {"1", "2"}}, - {R"([[1], 2, [3]])", "$[*][0]", {"1", "2", "3"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : LAX_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestInvalidArrayIndices() { - const TVector<TMultiOutputTestCase> testCases = { - {R"({ - "idx": -1, - "array": [1, 2, 3] - })", "$.array[$.idx]", {}}, - {R"({ - "from": -1, - "to": 3, - "array": [1, 2, 3] - })", "$.array[$.from to $.to]", {}}, - {R"({ - "from": 0, - "to": -1, - "array": [1, 2, 3] - })", "$.array[$.from to $.to]", {}}, - {R"([1, 2, 3, 4, 5])", "$[3 to 0]", {}}, - {R"({ - "idx": -1, - "array": [1, 2, 3] - })", "$.array[$.idx, 1 to 2]", {"2", "3"}}, - {R"({ - "from": -1, - "to": 3, - "array": [1, 2, 3] - })", "$.array[0, $.from to $.to, 2 to 2]", {"1", "3"}}, - {R"({ - "from": 0, - "to": -1, - "array": [1, 2, 3] - })", "$.array[0, $.from to $.to, 1 to 1]", {"1", "2"}}, - {R"([1, 2, 3, 4, 5])", "$[0, 3 to 0, 1]", {"1", "2"}}, - {R"([[1, 2], [3, 4, 5], []])", "$[*][2]", {"5"}}, - {"[]", "$[last]", {}}, - {"[]", "$[last to 0]", {}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : LAX_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestStructuralErrorsHandling() { - const TVector<TMultiOutputTestCase> testCases = { - {R"([[{"key": 1}]])", "$.key", {}}, - {R"([[{"key": 1}]])", "$.*", {}}, - {R"([ - {"key": 1}, - {"not_key": 2}, - {"key": 3} - ])", "$[*].key", {"1", "3"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : LAX_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestCompareOperations() { - const TVector<TMultiOutputTestCase> testCases = { - // Check unwrap - {R"({ - "left": [1, 2, 3], - "right": [4, 5, 6] - })", "$.left < $.right", {"true"}}, - // Check incomparable types - // NOTE: Even though values of types string and number are incomparable, - // pair 1 < 4 is true and was found first, so the overall result is true - {R"({ - "left": [1, 2, "string"], - "right": [4, 5, 6] - })", "$.left < $.right", {"true"}}, - // NOTE: In this example pair "string" < 4 results in error and was found first, - // so overall result is null - {R"({ - "left": ["string", 2, 3], - "right": [4, 5, 6] - })", "$.left < $.right", {"null"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : LAX_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestFilter() { - const TVector<TMultiOutputTestCase> testCases = { - // Check unwrap - {R"([ - {"age": 18}, - {"age": 25}, - {"age": 50}, - {"age": 5} - ])", "$ ? (@.age >= 18 && @.age <= 30) . age", {"18", "25"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : LAX_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestNumericMethods() { - const TVector<TMultiOutputTestCase> testCases = { - // Check unwrap - {"[-1.23, 4.56, 3, 0]", "$.abs()", {"1.23", "4.56", "3", "0"}}, - {"[-1.23, 4.56, 3, 0]", "$.floor()", {"-2", "4", "3", "0"}}, - {"[-1.23, 4.56, 3, 0]", "$.ceiling()", {"-1", "5", "3", "0"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : LAX_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestDoubleMethod() { - const TVector<TMultiOutputTestCase> testCases = { - // Check unwrap - {R"([ - "123", "123.4", "0.567", "1234e-1", "567e-3", "123.4e-1", - "123e3", "123e+3", "1.23e+1", "1.23e1", - "12e0", "12.3e0", "0", "0.0", "0.0e0" - ])", "$.double()", { - "123", "123.4", "0.567", "123.4", "0.567", "12.34", - "123000", "123000", "12.3", "12.3", - "12", "12.3", "0", "0", "0", - }}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : LAX_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestKeyValueMethod() { - const TVector<TMultiOutputTestCase> testCases = { - // Check unwrap - {R"([{ - "one": 1, - "two": 2, - "three": 3 - }])", "$.keyvalue().name", {"\"one\"", "\"three\"", "\"two\""}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : LAX_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestExistsPredicate() { - const TVector<TMultiOutputTestCase> testCases = { - {R"({ - "key": 123 - })", "exists ($.another_key)", {"false"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : LAX_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestLikeRegexPredicate() { - const TVector<TMultiOutputTestCase> testCases = { - // Check unwrapping - {R"(["string", "123", "456"])", R"($ like_regex "[0-9]+")", {"true"}}, - - // Check early stopping - {R"([123, "123", "456"])", R"($ like_regex "[0-9]+")", {"null"}}, - {R"(["123", "456", 123])", R"($ like_regex "[0-9]+")", {"true"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : LAX_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestStartsWithPredicate() { - const TVector<TMultiOutputTestCase> testCases = { - {R"(["a", "b", "c"])", R"("abcd" starts with $[*])", {"true"}}, - {R"(["a", 1.45, 50])", R"("abcd" starts with $[*])", {"true"}}, - {R"([1.45, 50, "a"])", R"("abcd" starts with $[*])", {"null"}}, - {R"(["b", "c"])", R"("abcd" starts with $[*])", {"false"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : LAX_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } -}; - -UNIT_TEST_SUITE_REGISTRATION(TJsonPathLaxTest);
\ No newline at end of file +#include "test_base.h" + +class TJsonPathLaxTest : public TJsonPathTestBase { +public: + TJsonPathLaxTest() + : TJsonPathTestBase() + { + } + + UNIT_TEST_SUITE(TJsonPathLaxTest); + UNIT_TEST(TestArrayUnwrap); + UNIT_TEST(TestArrayWrap); + UNIT_TEST(TestInvalidArrayIndices); + UNIT_TEST(TestStructuralErrorsHandling); + UNIT_TEST(TestCompareOperations); + UNIT_TEST(TestFilter); + UNIT_TEST(TestNumericMethods); + UNIT_TEST(TestDoubleMethod); + UNIT_TEST(TestKeyValueMethod); + UNIT_TEST(TestExistsPredicate); + UNIT_TEST(TestLikeRegexPredicate); + UNIT_TEST(TestStartsWithPredicate); + UNIT_TEST_SUITE_END(); + + void TestArrayUnwrap() { + const TVector<TMultiOutputTestCase> testCases = { + {R"([ + {"key": 1}, + {"key": 2} + ])", "$.key", {"1", "2"}}, + {R"([ + {"key": 1}, + {"key": 2} + ])", "$.*", {"1", "2"}}, + {R"({ + "first": {"key": 1}, + "second": [] + })", "$.*.key", {"1"}}, + {R"({ + "first": {"key": 1}, + "second": [] + })", "$.*.*", {"1"}}, + {R"({"another_key": 123})", "$.key", {}}, + {R"([ + {"key": [{"nested": 28}]}, + {"key": [{"nested": 29}]} + ])", "$.key.nested", {"28", "29"}}, + {R"([ + {"key": [{"nested": 28}]}, + {"key": [{"nested": 29}]} + ])", "$.*.*", {"28", "29"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestArrayWrap() { + const TVector<TMultiOutputTestCase> testCases = { + {R"([1, 2])", "$[*][0]", {"1", "2"}}, + {R"([[1], 2, [3]])", "$[*][0]", {"1", "2", "3"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestInvalidArrayIndices() { + const TVector<TMultiOutputTestCase> testCases = { + {R"({ + "idx": -1, + "array": [1, 2, 3] + })", "$.array[$.idx]", {}}, + {R"({ + "from": -1, + "to": 3, + "array": [1, 2, 3] + })", "$.array[$.from to $.to]", {}}, + {R"({ + "from": 0, + "to": -1, + "array": [1, 2, 3] + })", "$.array[$.from to $.to]", {}}, + {R"([1, 2, 3, 4, 5])", "$[3 to 0]", {}}, + {R"({ + "idx": -1, + "array": [1, 2, 3] + })", "$.array[$.idx, 1 to 2]", {"2", "3"}}, + {R"({ + "from": -1, + "to": 3, + "array": [1, 2, 3] + })", "$.array[0, $.from to $.to, 2 to 2]", {"1", "3"}}, + {R"({ + "from": 0, + "to": -1, + "array": [1, 2, 3] + })", "$.array[0, $.from to $.to, 1 to 1]", {"1", "2"}}, + {R"([1, 2, 3, 4, 5])", "$[0, 3 to 0, 1]", {"1", "2"}}, + {R"([[1, 2], [3, 4, 5], []])", "$[*][2]", {"5"}}, + {"[]", "$[last]", {}}, + {"[]", "$[last to 0]", {}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestStructuralErrorsHandling() { + const TVector<TMultiOutputTestCase> testCases = { + {R"([[{"key": 1}]])", "$.key", {}}, + {R"([[{"key": 1}]])", "$.*", {}}, + {R"([ + {"key": 1}, + {"not_key": 2}, + {"key": 3} + ])", "$[*].key", {"1", "3"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestCompareOperations() { + const TVector<TMultiOutputTestCase> testCases = { + // Check unwrap + {R"({ + "left": [1, 2, 3], + "right": [4, 5, 6] + })", "$.left < $.right", {"true"}}, + // Check incomparable types + // NOTE: Even though values of types string and number are incomparable, + // pair 1 < 4 is true and was found first, so the overall result is true + {R"({ + "left": [1, 2, "string"], + "right": [4, 5, 6] + })", "$.left < $.right", {"true"}}, + // NOTE: In this example pair "string" < 4 results in error and was found first, + // so overall result is null + {R"({ + "left": ["string", 2, 3], + "right": [4, 5, 6] + })", "$.left < $.right", {"null"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestFilter() { + const TVector<TMultiOutputTestCase> testCases = { + // Check unwrap + {R"([ + {"age": 18}, + {"age": 25}, + {"age": 50}, + {"age": 5} + ])", "$ ? (@.age >= 18 && @.age <= 30) . age", {"18", "25"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestNumericMethods() { + const TVector<TMultiOutputTestCase> testCases = { + // Check unwrap + {"[-1.23, 4.56, 3, 0]", "$.abs()", {"1.23", "4.56", "3", "0"}}, + {"[-1.23, 4.56, 3, 0]", "$.floor()", {"-2", "4", "3", "0"}}, + {"[-1.23, 4.56, 3, 0]", "$.ceiling()", {"-1", "5", "3", "0"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestDoubleMethod() { + const TVector<TMultiOutputTestCase> testCases = { + // Check unwrap + {R"([ + "123", "123.4", "0.567", "1234e-1", "567e-3", "123.4e-1", + "123e3", "123e+3", "1.23e+1", "1.23e1", + "12e0", "12.3e0", "0", "0.0", "0.0e0" + ])", "$.double()", { + "123", "123.4", "0.567", "123.4", "0.567", "12.34", + "123000", "123000", "12.3", "12.3", + "12", "12.3", "0", "0", "0", + }}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestKeyValueMethod() { + const TVector<TMultiOutputTestCase> testCases = { + // Check unwrap + {R"([{ + "one": 1, + "two": 2, + "three": 3 + }])", "$.keyvalue().name", {"\"one\"", "\"three\"", "\"two\""}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestExistsPredicate() { + const TVector<TMultiOutputTestCase> testCases = { + {R"({ + "key": 123 + })", "exists ($.another_key)", {"false"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestLikeRegexPredicate() { + const TVector<TMultiOutputTestCase> testCases = { + // Check unwrapping + {R"(["string", "123", "456"])", R"($ like_regex "[0-9]+")", {"true"}}, + + // Check early stopping + {R"([123, "123", "456"])", R"($ like_regex "[0-9]+")", {"null"}}, + {R"(["123", "456", 123])", R"($ like_regex "[0-9]+")", {"true"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestStartsWithPredicate() { + const TVector<TMultiOutputTestCase> testCases = { + {R"(["a", "b", "c"])", R"("abcd" starts with $[*])", {"true"}}, + {R"(["a", 1.45, 50])", R"("abcd" starts with $[*])", {"true"}}, + {R"([1.45, 50, "a"])", R"("abcd" starts with $[*])", {"null"}}, + {R"(["b", "c"])", R"("abcd" starts with $[*])", {"false"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : LAX_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TJsonPathLaxTest);
\ No newline at end of file diff --git a/ydb/library/yql/minikql/jsonpath/ut/strict_ut.cpp b/ydb/library/yql/minikql/jsonpath/ut/strict_ut.cpp index c8414581e4..05826a9a10 100644 --- a/ydb/library/yql/minikql/jsonpath/ut/strict_ut.cpp +++ b/ydb/library/yql/minikql/jsonpath/ut/strict_ut.cpp @@ -1,118 +1,118 @@ -#include "test_base.h" - -class TJsonPathStrictTest : public TJsonPathTestBase { -public: - TJsonPathStrictTest() - : TJsonPathTestBase() - { - } - - UNIT_TEST_SUITE(TJsonPathStrictTest); - UNIT_TEST(TestRuntimeErrors); - UNIT_TEST(TestIncomparableTypes); - UNIT_TEST(TestLikeRegexPredicate); - UNIT_TEST(TestStartsWithPredicate); - UNIT_TEST_SUITE_END(); - - void TestRuntimeErrors() { - const TVector<TRuntimeErrorTestCase> testCases = { - {R"([ - {"key": 1}, - {"key": 2} - ])", "$.key", C(TIssuesIds::JSONPATH_EXPECTED_OBJECT)}, - {R"([ - {"key": 1}, - {"key": 2} - ])", "$.*", C(TIssuesIds::JSONPATH_EXPECTED_OBJECT)}, - {R"({ - "first": {"key": 1}, - "second": [] - })", "$.*.key", C(TIssuesIds::JSONPATH_EXPECTED_OBJECT)}, - {R"({ - "first": {"key": 1}, - "second": [] - })", "$.*.*", C(TIssuesIds::JSONPATH_EXPECTED_OBJECT)}, - {R"({"another_key": 123})", "$.key", C(TIssuesIds::JSONPATH_MEMBER_NOT_FOUND)}, - {R"([1, 2])", "$[*][0]", C(TIssuesIds::JSONPATH_EXPECTED_ARRAY)}, - {R"([[1], 2, [3]])", "$[*][0]", C(TIssuesIds::JSONPATH_EXPECTED_ARRAY)}, - {R"({ - "idx": -1, - "array": [1, 2, 3] - })", "$.array[$.idx]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, - {R"({ - "from": -1, - "to": 3, - "array": [1, 2, 3] - })", "$.array[$.from to $.to]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, - {R"({ - "from": 0, - "to": -1, - "array": [1, 2, 3] - })", "$.array[$.from to $.to]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, - {R"({ - "from": -20, - "to": -10, - "array": [1, 2, 3] - })", "$.array[$.from to $.to]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, - {R"([1, 2, 3, 4, 5])", "$[3 to 0]", C(TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX_RANGE)}, - {R"([[1, 2], [3, 4, 5], []])", "$[*][2]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, - {"[]", "$[last]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, - {"[]", "$[last to 0]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : STRICT_MODES) { - RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); - } - } - } - - void TestIncomparableTypes() { - const TVector<TMultiOutputTestCase> testCases = { - {R"({ - "left": [1, 2, "string"], - "right": [4, 5, 6] - })", "$.left < $.right", {"null"}}, - {R"({ - "left": ["string", 2, 3], - "right": [4, 5, 6] - })", "$.left < $.right", {"null"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : STRICT_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestLikeRegexPredicate() { - const TVector<TMultiOutputTestCase> testCases = { - {R"(["123", 123])", R"($[*] like_regex "[0-9]+")", {"null"}}, - {R"([123, "123"])", R"($[*] like_regex "[0-9]+")", {"null"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : STRICT_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } - - void TestStartsWithPredicate() { - const TVector<TMultiOutputTestCase> testCases = { - {R"(["a", "b", "c"])", R"("abcd" starts with $[*])", {"true"}}, - {R"(["a", 1.45, 50])", R"("abcd" starts with $[*])", {"null"}}, - {R"([1.45, 50, "a"])", R"("abcd" starts with $[*])", {"null"}}, - {R"(["b", "c"])", R"("abcd" starts with $[*])", {"false"}}, - }; - - for (const auto& testCase : testCases) { - for (const auto mode : STRICT_MODES) { - RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); - } - } - } -}; - -UNIT_TEST_SUITE_REGISTRATION(TJsonPathStrictTest);
\ No newline at end of file +#include "test_base.h" + +class TJsonPathStrictTest : public TJsonPathTestBase { +public: + TJsonPathStrictTest() + : TJsonPathTestBase() + { + } + + UNIT_TEST_SUITE(TJsonPathStrictTest); + UNIT_TEST(TestRuntimeErrors); + UNIT_TEST(TestIncomparableTypes); + UNIT_TEST(TestLikeRegexPredicate); + UNIT_TEST(TestStartsWithPredicate); + UNIT_TEST_SUITE_END(); + + void TestRuntimeErrors() { + const TVector<TRuntimeErrorTestCase> testCases = { + {R"([ + {"key": 1}, + {"key": 2} + ])", "$.key", C(TIssuesIds::JSONPATH_EXPECTED_OBJECT)}, + {R"([ + {"key": 1}, + {"key": 2} + ])", "$.*", C(TIssuesIds::JSONPATH_EXPECTED_OBJECT)}, + {R"({ + "first": {"key": 1}, + "second": [] + })", "$.*.key", C(TIssuesIds::JSONPATH_EXPECTED_OBJECT)}, + {R"({ + "first": {"key": 1}, + "second": [] + })", "$.*.*", C(TIssuesIds::JSONPATH_EXPECTED_OBJECT)}, + {R"({"another_key": 123})", "$.key", C(TIssuesIds::JSONPATH_MEMBER_NOT_FOUND)}, + {R"([1, 2])", "$[*][0]", C(TIssuesIds::JSONPATH_EXPECTED_ARRAY)}, + {R"([[1], 2, [3]])", "$[*][0]", C(TIssuesIds::JSONPATH_EXPECTED_ARRAY)}, + {R"({ + "idx": -1, + "array": [1, 2, 3] + })", "$.array[$.idx]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, + {R"({ + "from": -1, + "to": 3, + "array": [1, 2, 3] + })", "$.array[$.from to $.to]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, + {R"({ + "from": 0, + "to": -1, + "array": [1, 2, 3] + })", "$.array[$.from to $.to]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, + {R"({ + "from": -20, + "to": -10, + "array": [1, 2, 3] + })", "$.array[$.from to $.to]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, + {R"([1, 2, 3, 4, 5])", "$[3 to 0]", C(TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX_RANGE)}, + {R"([[1, 2], [3, 4, 5], []])", "$[*][2]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, + {"[]", "$[last]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, + {"[]", "$[last to 0]", C(TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS)}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : STRICT_MODES) { + RunRuntimeErrorTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Error); + } + } + } + + void TestIncomparableTypes() { + const TVector<TMultiOutputTestCase> testCases = { + {R"({ + "left": [1, 2, "string"], + "right": [4, 5, 6] + })", "$.left < $.right", {"null"}}, + {R"({ + "left": ["string", 2, 3], + "right": [4, 5, 6] + })", "$.left < $.right", {"null"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : STRICT_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestLikeRegexPredicate() { + const TVector<TMultiOutputTestCase> testCases = { + {R"(["123", 123])", R"($[*] like_regex "[0-9]+")", {"null"}}, + {R"([123, "123"])", R"($[*] like_regex "[0-9]+")", {"null"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : STRICT_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestStartsWithPredicate() { + const TVector<TMultiOutputTestCase> testCases = { + {R"(["a", "b", "c"])", R"("abcd" starts with $[*])", {"true"}}, + {R"(["a", 1.45, 50])", R"("abcd" starts with $[*])", {"null"}}, + {R"([1.45, 50, "a"])", R"("abcd" starts with $[*])", {"null"}}, + {R"(["b", "c"])", R"("abcd" starts with $[*])", {"false"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : STRICT_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TJsonPathStrictTest);
\ No newline at end of file diff --git a/ydb/library/yql/minikql/jsonpath/ut/test_base.cpp b/ydb/library/yql/minikql/jsonpath/ut/test_base.cpp index abc3110c9c..66903c6559 100644 --- a/ydb/library/yql/minikql/jsonpath/ut/test_base.cpp +++ b/ydb/library/yql/minikql/jsonpath/ut/test_base.cpp @@ -1,166 +1,166 @@ -#include "test_base.h" - +#include "test_base.h" + #include <ydb/library/binary_json/write.h> - -using namespace NKikimr::NBinaryJson; - -TJsonPathTestBase::TJsonPathTestBase() - : FunctionRegistry(CreateFunctionRegistry(CreateBuiltinRegistry())) - , Env(Alloc) - , MemInfo("Memory") - , HolderFactory(Alloc.Ref(), MemInfo, FunctionRegistry.Get()) - , ValueBuilder(HolderFactory) -{ -} - -TIssueCode TJsonPathTestBase::C(TIssuesIds::EIssueCode code) { - return static_cast<TIssueCode>(code); -} - -TUnboxedValue TJsonPathTestBase::ParseJson(TStringBuf raw) { - return TryParseJsonDom(raw, &ValueBuilder); -} - -void TJsonPathTestBase::RunTestCase(const TString& rawJson, const TString& rawJsonPath, const TVector<TString>& expectedResult) { - try { - const auto unboxedValueJson = TValue(ParseJson(rawJson)); - - const auto binaryJson = *SerializeToBinaryJson(rawJson);; - auto reader = TBinaryJsonReader::Make(binaryJson); - auto binaryJsonRoot = TValue(reader->GetRootCursor()); - - TIssues issues; - const TJsonPathPtr jsonPath = ParseJsonPath(rawJsonPath, issues, MAX_PARSE_ERRORS); - UNIT_ASSERT_C(issues.Empty(), "Parse errors found"); - - for (const auto json : {unboxedValueJson, binaryJsonRoot}) { - const auto result = ExecuteJsonPath(jsonPath, json, TVariablesMap{}, &ValueBuilder); - UNIT_ASSERT_C(!result.IsError(), "Runtime errors found"); - - const auto& nodes = result.GetNodes(); - UNIT_ASSERT_VALUES_EQUAL(nodes.size(), expectedResult.size()); - for (size_t i = 0; i < nodes.size(); i++) { - const auto converted = nodes[i].ConvertToUnboxedValue(&ValueBuilder); - UNIT_ASSERT_VALUES_EQUAL(SerializeJsonDom(converted), expectedResult[i]); - } - } - } catch (...) { - TStringBuilder message; - message << "Exception: " << CurrentExceptionMessage() << Endl - << "Input JSON: " << rawJson << Endl - << "Jsonpath: " << rawJsonPath << Endl - << "Expected output:"; - for (const auto& item : expectedResult) { - message << " " << item; - } - message << Endl; - - UNIT_FAIL(message); - } -} - -void TJsonPathTestBase::RunParseErrorTestCase(const TString& rawJsonPath) { - try { - TIssues issues; - const TJsonPathPtr jsonPath = ParseJsonPath(rawJsonPath, issues, 2); - UNIT_ASSERT_C(!issues.Empty(), "Expected parse errors"); - } catch (...) { - UNIT_FAIL( - "Exception: " << CurrentExceptionMessage() << Endl - << "Jsonpath: " << rawJsonPath << Endl - ); - } -} - -void TJsonPathTestBase::RunRuntimeErrorTestCase(const TString& rawJson, const TString& rawJsonPath, TIssueCode error) { - try { - const auto unboxedValueJson = TValue(ParseJson(rawJson)); - - const auto binaryJson = *SerializeToBinaryJson(rawJson); - auto reader = TBinaryJsonReader::Make(binaryJson); - auto binaryJsonRoot = TValue(reader->GetRootCursor()); - - TIssues issues; - const TJsonPathPtr jsonPath = ParseJsonPath(rawJsonPath, issues, MAX_PARSE_ERRORS); - UNIT_ASSERT_C(issues.Empty(), "Parse errors found"); - - for (const auto json : {unboxedValueJson, binaryJsonRoot}) { - const auto result = ExecuteJsonPath(jsonPath, json, TVariablesMap{}, &ValueBuilder); - UNIT_ASSERT_C(result.IsError(), "Expected runtime error"); - UNIT_ASSERT_VALUES_EQUAL(result.GetError().GetCode(), error); - } - } catch (...) { - UNIT_FAIL( - TStringBuilder() - << "Exception: " << CurrentExceptionMessage() << Endl - << "Input JSON: " << rawJson << Endl - << "Jsonpath: " << rawJsonPath << Endl - << "Expected error: " << error << Endl - ); - } -} - -void TJsonPathTestBase::RunVariablesTestCase(const TString& rawJson, const THashMap<TStringBuf, TStringBuf>& variables, const TString& rawJsonPath, const TVector<TString>& expectedResult) { - try { - const auto unboxedValueJson = TValue(ParseJson(rawJson)); - - const auto binaryJson = *SerializeToBinaryJson(rawJson); - auto reader = TBinaryJsonReader::Make(binaryJson); - auto binaryJsonRoot = TValue(reader->GetRootCursor()); - - TVariablesMap unboxedValueVariables; - for (const auto& it : variables) { - unboxedValueVariables[it.first] = TValue(ParseJson(it.second)); - } - - TVariablesMap binaryJsonVariables; - TVector<TBinaryJson> storage; - TVector<TBinaryJsonReaderPtr> readers; - storage.reserve(variables.size()); - readers.reserve(variables.size()); - for (const auto& it : variables) { - storage.push_back(*SerializeToBinaryJson(it.second)); - readers.push_back(TBinaryJsonReader::Make(storage.back())); - binaryJsonVariables[it.first] = TValue(readers.back()->GetRootCursor()); - } - - TIssues issues; - const TJsonPathPtr jsonPath = ParseJsonPath(rawJsonPath, issues, MAX_PARSE_ERRORS); - UNIT_ASSERT_C(issues.Empty(), "Parse errors found"); - - TVector<std::pair<TValue, TVariablesMap>> testCases = { - {unboxedValueJson, unboxedValueVariables}, - {binaryJsonRoot, binaryJsonVariables}, - }; - for (const auto testCase : testCases) { - const auto result = ExecuteJsonPath(jsonPath, testCase.first, testCase.second, &ValueBuilder); - UNIT_ASSERT_C(!result.IsError(), "Runtime errors found"); - - const auto& nodes = result.GetNodes(); - UNIT_ASSERT_VALUES_EQUAL(nodes.size(), expectedResult.size()); - for (size_t i = 0; i < nodes.size(); i++) { - const auto converted = nodes[i].ConvertToUnboxedValue(&ValueBuilder); - UNIT_ASSERT_VALUES_EQUAL(SerializeJsonDom(converted), expectedResult[i]); - } - } - } catch (...) { - TStringBuilder message; - message << "Exception: " << CurrentExceptionMessage() << Endl - << "Input JSON: " << rawJson << Endl - << "Variables:" << Endl; - for (const auto& it : variables) { - message << "\t" << it.first << " = " << it.second; - } - - message << Endl - << "Jsonpath: " << rawJsonPath << Endl - << "Expected output:"; - for (const auto& item : expectedResult) { - message << " " << item; - } - message << Endl; - - UNIT_FAIL(message); - } + +using namespace NKikimr::NBinaryJson; + +TJsonPathTestBase::TJsonPathTestBase() + : FunctionRegistry(CreateFunctionRegistry(CreateBuiltinRegistry())) + , Env(Alloc) + , MemInfo("Memory") + , HolderFactory(Alloc.Ref(), MemInfo, FunctionRegistry.Get()) + , ValueBuilder(HolderFactory) +{ +} + +TIssueCode TJsonPathTestBase::C(TIssuesIds::EIssueCode code) { + return static_cast<TIssueCode>(code); +} + +TUnboxedValue TJsonPathTestBase::ParseJson(TStringBuf raw) { + return TryParseJsonDom(raw, &ValueBuilder); +} + +void TJsonPathTestBase::RunTestCase(const TString& rawJson, const TString& rawJsonPath, const TVector<TString>& expectedResult) { + try { + const auto unboxedValueJson = TValue(ParseJson(rawJson)); + + const auto binaryJson = *SerializeToBinaryJson(rawJson);; + auto reader = TBinaryJsonReader::Make(binaryJson); + auto binaryJsonRoot = TValue(reader->GetRootCursor()); + + TIssues issues; + const TJsonPathPtr jsonPath = ParseJsonPath(rawJsonPath, issues, MAX_PARSE_ERRORS); + UNIT_ASSERT_C(issues.Empty(), "Parse errors found"); + + for (const auto json : {unboxedValueJson, binaryJsonRoot}) { + const auto result = ExecuteJsonPath(jsonPath, json, TVariablesMap{}, &ValueBuilder); + UNIT_ASSERT_C(!result.IsError(), "Runtime errors found"); + + const auto& nodes = result.GetNodes(); + UNIT_ASSERT_VALUES_EQUAL(nodes.size(), expectedResult.size()); + for (size_t i = 0; i < nodes.size(); i++) { + const auto converted = nodes[i].ConvertToUnboxedValue(&ValueBuilder); + UNIT_ASSERT_VALUES_EQUAL(SerializeJsonDom(converted), expectedResult[i]); + } + } + } catch (...) { + TStringBuilder message; + message << "Exception: " << CurrentExceptionMessage() << Endl + << "Input JSON: " << rawJson << Endl + << "Jsonpath: " << rawJsonPath << Endl + << "Expected output:"; + for (const auto& item : expectedResult) { + message << " " << item; + } + message << Endl; + + UNIT_FAIL(message); + } +} + +void TJsonPathTestBase::RunParseErrorTestCase(const TString& rawJsonPath) { + try { + TIssues issues; + const TJsonPathPtr jsonPath = ParseJsonPath(rawJsonPath, issues, 2); + UNIT_ASSERT_C(!issues.Empty(), "Expected parse errors"); + } catch (...) { + UNIT_FAIL( + "Exception: " << CurrentExceptionMessage() << Endl + << "Jsonpath: " << rawJsonPath << Endl + ); + } +} + +void TJsonPathTestBase::RunRuntimeErrorTestCase(const TString& rawJson, const TString& rawJsonPath, TIssueCode error) { + try { + const auto unboxedValueJson = TValue(ParseJson(rawJson)); + + const auto binaryJson = *SerializeToBinaryJson(rawJson); + auto reader = TBinaryJsonReader::Make(binaryJson); + auto binaryJsonRoot = TValue(reader->GetRootCursor()); + + TIssues issues; + const TJsonPathPtr jsonPath = ParseJsonPath(rawJsonPath, issues, MAX_PARSE_ERRORS); + UNIT_ASSERT_C(issues.Empty(), "Parse errors found"); + + for (const auto json : {unboxedValueJson, binaryJsonRoot}) { + const auto result = ExecuteJsonPath(jsonPath, json, TVariablesMap{}, &ValueBuilder); + UNIT_ASSERT_C(result.IsError(), "Expected runtime error"); + UNIT_ASSERT_VALUES_EQUAL(result.GetError().GetCode(), error); + } + } catch (...) { + UNIT_FAIL( + TStringBuilder() + << "Exception: " << CurrentExceptionMessage() << Endl + << "Input JSON: " << rawJson << Endl + << "Jsonpath: " << rawJsonPath << Endl + << "Expected error: " << error << Endl + ); + } +} + +void TJsonPathTestBase::RunVariablesTestCase(const TString& rawJson, const THashMap<TStringBuf, TStringBuf>& variables, const TString& rawJsonPath, const TVector<TString>& expectedResult) { + try { + const auto unboxedValueJson = TValue(ParseJson(rawJson)); + + const auto binaryJson = *SerializeToBinaryJson(rawJson); + auto reader = TBinaryJsonReader::Make(binaryJson); + auto binaryJsonRoot = TValue(reader->GetRootCursor()); + + TVariablesMap unboxedValueVariables; + for (const auto& it : variables) { + unboxedValueVariables[it.first] = TValue(ParseJson(it.second)); + } + + TVariablesMap binaryJsonVariables; + TVector<TBinaryJson> storage; + TVector<TBinaryJsonReaderPtr> readers; + storage.reserve(variables.size()); + readers.reserve(variables.size()); + for (const auto& it : variables) { + storage.push_back(*SerializeToBinaryJson(it.second)); + readers.push_back(TBinaryJsonReader::Make(storage.back())); + binaryJsonVariables[it.first] = TValue(readers.back()->GetRootCursor()); + } + + TIssues issues; + const TJsonPathPtr jsonPath = ParseJsonPath(rawJsonPath, issues, MAX_PARSE_ERRORS); + UNIT_ASSERT_C(issues.Empty(), "Parse errors found"); + + TVector<std::pair<TValue, TVariablesMap>> testCases = { + {unboxedValueJson, unboxedValueVariables}, + {binaryJsonRoot, binaryJsonVariables}, + }; + for (const auto testCase : testCases) { + const auto result = ExecuteJsonPath(jsonPath, testCase.first, testCase.second, &ValueBuilder); + UNIT_ASSERT_C(!result.IsError(), "Runtime errors found"); + + const auto& nodes = result.GetNodes(); + UNIT_ASSERT_VALUES_EQUAL(nodes.size(), expectedResult.size()); + for (size_t i = 0; i < nodes.size(); i++) { + const auto converted = nodes[i].ConvertToUnboxedValue(&ValueBuilder); + UNIT_ASSERT_VALUES_EQUAL(SerializeJsonDom(converted), expectedResult[i]); + } + } + } catch (...) { + TStringBuilder message; + message << "Exception: " << CurrentExceptionMessage() << Endl + << "Input JSON: " << rawJson << Endl + << "Variables:" << Endl; + for (const auto& it : variables) { + message << "\t" << it.first << " = " << it.second; + } + + message << Endl + << "Jsonpath: " << rawJsonPath << Endl + << "Expected output:"; + for (const auto& item : expectedResult) { + message << " " << item; + } + message << Endl; + + UNIT_FAIL(message); + } } diff --git a/ydb/library/yql/minikql/jsonpath/ut/test_base.h b/ydb/library/yql/minikql/jsonpath/ut/test_base.h index 7c4bd803f6..bebd4aa830 100644 --- a/ydb/library/yql/minikql/jsonpath/ut/test_base.h +++ b/ydb/library/yql/minikql/jsonpath/ut/test_base.h @@ -1,9 +1,9 @@ -#pragma once - +#pragma once + #include <ydb/library/yql/core/issue/protos/issue_id.pb.h> #include <ydb/library/yql/minikql/jsonpath/jsonpath.h> #include <ydb/library/yql/minikql/dom/json.h> - + #include <ydb/library/yql/minikql/computation/mkql_value_builder.h> #include <ydb/library/yql/minikql/computation/mkql_computation_node_holders.h> #include <ydb/library/yql/minikql/invoke_builtins/mkql_builtins.h> @@ -11,65 +11,65 @@ #include <ydb/library/yql/minikql/mkql_function_registry.h> #include <ydb/library/yql/minikql/mkql_alloc.h> #include <ydb/library/yql/minikql/mkql_node.h> - + #include <library/cpp/json/json_reader.h> #include <library/cpp/testing/unittest/registar.h> - -#include <util/generic/yexception.h> - -using namespace NYql; -using namespace NYql::NDom; -using namespace NYql::NUdf; -using namespace NYql::NJsonPath; -using namespace NJson; -using namespace NKikimr::NMiniKQL; - -class TJsonPathTestBase: public TTestBase { -public: - TJsonPathTestBase(); - -protected: - const TVector<TStringBuf> LAX_MODES = {"", "lax "}; - const TVector<TStringBuf> STRICT_MODES = {"strict "}; - const TVector<TStringBuf> ALL_MODES = {"", "lax ", "strict "}; - - TIntrusivePtr<IFunctionRegistry> FunctionRegistry; - TScopedAlloc Alloc; - TTypeEnvironment Env; - TMemoryUsageInfo MemInfo; - THolderFactory HolderFactory; - TDefaultValueBuilder ValueBuilder; - - const int MAX_PARSE_ERRORS = 100; - - TIssueCode C(TIssuesIds::EIssueCode code); - - TUnboxedValue ParseJson(TStringBuf raw); - - struct TMultiOutputTestCase { - TString Json; - TString JsonPath; - TVector<TString> Result; - }; - - void RunTestCase(const TString& rawJson, const TString& rawJsonPath, const TVector<TString>& expectedResult); - - void RunParseErrorTestCase(const TString& rawJsonPath); - - struct TRuntimeErrorTestCase { - TString Json; - TString JsonPath; - TIssueCode Error; - }; - - void RunRuntimeErrorTestCase(const TString& rawJson, const TString& rawJsonPath, TIssueCode error); - - struct TVariablesTestCase { - TString Json; - THashMap<TStringBuf, TStringBuf> Variables; - TString JsonPath; - TVector<TString> Result; - }; - - void RunVariablesTestCase(const TString& rawJson, const THashMap<TStringBuf, TStringBuf>& variables, const TString& rawJsonPath, const TVector<TString>& expectedResult); -}; + +#include <util/generic/yexception.h> + +using namespace NYql; +using namespace NYql::NDom; +using namespace NYql::NUdf; +using namespace NYql::NJsonPath; +using namespace NJson; +using namespace NKikimr::NMiniKQL; + +class TJsonPathTestBase: public TTestBase { +public: + TJsonPathTestBase(); + +protected: + const TVector<TStringBuf> LAX_MODES = {"", "lax "}; + const TVector<TStringBuf> STRICT_MODES = {"strict "}; + const TVector<TStringBuf> ALL_MODES = {"", "lax ", "strict "}; + + TIntrusivePtr<IFunctionRegistry> FunctionRegistry; + TScopedAlloc Alloc; + TTypeEnvironment Env; + TMemoryUsageInfo MemInfo; + THolderFactory HolderFactory; + TDefaultValueBuilder ValueBuilder; + + const int MAX_PARSE_ERRORS = 100; + + TIssueCode C(TIssuesIds::EIssueCode code); + + TUnboxedValue ParseJson(TStringBuf raw); + + struct TMultiOutputTestCase { + TString Json; + TString JsonPath; + TVector<TString> Result; + }; + + void RunTestCase(const TString& rawJson, const TString& rawJsonPath, const TVector<TString>& expectedResult); + + void RunParseErrorTestCase(const TString& rawJsonPath); + + struct TRuntimeErrorTestCase { + TString Json; + TString JsonPath; + TIssueCode Error; + }; + + void RunRuntimeErrorTestCase(const TString& rawJson, const TString& rawJsonPath, TIssueCode error); + + struct TVariablesTestCase { + TString Json; + THashMap<TStringBuf, TStringBuf> Variables; + TString JsonPath; + TVector<TString> Result; + }; + + void RunVariablesTestCase(const TString& rawJson, const THashMap<TStringBuf, TStringBuf>& variables, const TString& rawJsonPath, const TVector<TString>& expectedResult); +}; diff --git a/ydb/library/yql/minikql/jsonpath/ut/ya.make b/ydb/library/yql/minikql/jsonpath/ut/ya.make index ef51d59ae3..970a1829e5 100644 --- a/ydb/library/yql/minikql/jsonpath/ut/ya.make +++ b/ydb/library/yql/minikql/jsonpath/ut/ya.make @@ -1,16 +1,16 @@ -UNITTEST_FOR(yql/library/jsonpath) - -OWNER(g:yql) - -SRCS( - common_ut.cpp - examples_ut.cpp - lax_ut.cpp - strict_ut.cpp - test_base.cpp -) - -PEERDIR( +UNITTEST_FOR(yql/library/jsonpath) + +OWNER(g:yql) + +SRCS( + common_ut.cpp + examples_ut.cpp + lax_ut.cpp + strict_ut.cpp + test_base.cpp +) + +PEERDIR( library/cpp/json ydb/library/binary_json ydb/library/yql/minikql @@ -19,8 +19,8 @@ PEERDIR( ydb/library/yql/minikql/invoke_builtins ydb/library/yql/public/udf/service/exception_policy ydb/library/yql/core/issue/protos -) - +) + YQL_LAST_ABI_VERSION() -END() +END() diff --git a/ydb/library/yql/minikql/jsonpath/value.cpp b/ydb/library/yql/minikql/jsonpath/value.cpp index 9424fc4ee6..0f990a7249 100644 --- a/ydb/library/yql/minikql/jsonpath/value.cpp +++ b/ydb/library/yql/minikql/jsonpath/value.cpp @@ -1,383 +1,383 @@ -#include "value.h" - +#include "value.h" + #include <ydb/library/yql/minikql/dom/node.h> - -namespace NYql::NJsonPath { - -using namespace NUdf; -using namespace NDom; -using namespace NKikimr; -using namespace NKikimr::NBinaryJson; - -TArrayIterator::TArrayIterator() - : Iterator(TEmptyMarker()) -{ -} - -TArrayIterator::TArrayIterator(const TUnboxedValue& iterator) - : Iterator(iterator) -{ -} - -TArrayIterator::TArrayIterator(TUnboxedValue&& iterator) - : Iterator(std::move(iterator)) -{ -} - -TArrayIterator::TArrayIterator(const NBinaryJson::TArrayIterator& iterator) - : Iterator(iterator) -{ -} - -TArrayIterator::TArrayIterator(NBinaryJson::TArrayIterator&& iterator) - : Iterator(std::move(iterator)) -{ -} - -bool TArrayIterator::Next(TValue& value) { - if (std::holds_alternative<TEmptyMarker>(Iterator)) { - return false; - } else if (auto* iterator = std::get_if<NBinaryJson::TArrayIterator>(&Iterator)) { - if (!iterator->HasNext()) { - return false; - } - value = TValue(iterator->Next()); - return true; - } else if (auto* iterator = std::get_if<TUnboxedValue>(&Iterator)) { - TUnboxedValue result; - const bool success = iterator->Next(result); - if (success) { - value = TValue(result); - } - return success; - } else { - Y_FAIL("Unexpected variant case in Next"); - } -} - -TObjectIterator::TObjectIterator() - : Iterator(TEmptyMarker()) -{ -} - -TObjectIterator::TObjectIterator(const TUnboxedValue& iterator) - : Iterator(iterator) -{ -} - -TObjectIterator::TObjectIterator(TUnboxedValue&& iterator) - : Iterator(std::move(iterator)) -{ -} - -TObjectIterator::TObjectIterator(const NBinaryJson::TObjectIterator& iterator) - : Iterator(iterator) -{ -} - -TObjectIterator::TObjectIterator(NBinaryJson::TObjectIterator&& iterator) - : Iterator(std::move(iterator)) -{ -} - -bool TObjectIterator::Next(TValue& key, TValue& value) { - if (std::holds_alternative<TEmptyMarker>(Iterator)) { - return false; - } else if (auto* iterator = std::get_if<NBinaryJson::TObjectIterator>(&Iterator)) { - if (!iterator->HasNext()) { - return false; - } - const auto [itKey, itValue] = iterator->Next(); - key = TValue(itKey); - value = TValue(itValue); - return true; - } else if (auto* iterator = std::get_if<TUnboxedValue>(&Iterator)) { - TUnboxedValue itKey; - TUnboxedValue itValue; - const bool success = iterator->NextPair(itKey, itValue); - if (success) { - key = TValue(itKey); - value = TValue(itValue); - } - return success; - } else { - Y_FAIL("Unexpected variant case in Next"); - } -} - -TValue::TValue() - : Value(MakeEntity()) -{ -} - -TValue::TValue(const TUnboxedValue& value) - : Value(value) -{ -} - -TValue::TValue(TUnboxedValue&& value) - : Value(std::move(value)) -{ -} - -TValue::TValue(const TEntryCursor& value) - : Value(value) -{ - UnpackInnerValue(); -} - -TValue::TValue(TEntryCursor&& value) - : Value(std::move(value)) -{ - UnpackInnerValue(); -} - -TValue::TValue(const TContainerCursor& value) - : Value(value) -{ - UnpackInnerValue(); -} - -TValue::TValue(TContainerCursor&& value) - : Value(std::move(value)) -{ - UnpackInnerValue(); -} - -EValueType TValue::GetType() const { - if (const auto* value = std::get_if<TEntryCursor>(&Value)) { - switch (value->GetType()) { - case EEntryType::BoolFalse: - case EEntryType::BoolTrue: - return EValueType::Bool; - case EEntryType::Null: - return EValueType::Null; - case EEntryType::Number: - return EValueType::Number; - case EEntryType::String: - return EValueType::String; - case EEntryType::Container: - Y_FAIL("Logical error: TEntryCursor with Container type must be converted to TContainerCursor"); - } - } else if (const auto* value = std::get_if<TContainerCursor>(&Value)) { - switch (value->GetType()) { - case EContainerType::Array: - return EValueType::Array; - case EContainerType::Object: - return EValueType::Object; - case EContainerType::TopLevelScalar: - Y_FAIL("Logical error: TContainerCursor with TopLevelScalar type must be converted to TEntryCursor"); - } - } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { - switch (GetNodeType(*value)) { - case ENodeType::Bool: - return EValueType::Bool; - case ENodeType::Double: - case ENodeType::Int64: - case ENodeType::Uint64: - return EValueType::Number; - case ENodeType::Dict: - case ENodeType::Attr: - return EValueType::Object; - case ENodeType::List: - return EValueType::Array; - case ENodeType::String: - return EValueType::String; - case ENodeType::Entity: - return EValueType::Null; - } - } else { - Y_FAIL("Unexpected variant case in GetType"); - } -} - -bool TValue::Is(EValueType type) const { - return GetType() == type; -} - -bool TValue::IsBool() const { - return Is(EValueType::Bool); -} - -bool TValue::IsNumber() const { - return Is(EValueType::Number); -} - -bool TValue::IsString() const { - return Is(EValueType::String); -} - -bool TValue::IsNull() const { - return Is(EValueType::Null); -} - -bool TValue::IsObject() const { - return Is(EValueType::Object); -} - -bool TValue::IsArray() const { - return Is(EValueType::Array); -} - -double TValue::GetNumber() const { - Y_VERIFY_DEBUG(IsNumber()); - - if (const auto* value = std::get_if<TEntryCursor>(&Value)) { - return value->GetNumber(); - } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { - if (IsNodeType(*value, ENodeType::Double)) { - return value->Get<double>(); - } else if (IsNodeType(*value, ENodeType::Int64)) { - return static_cast<double>(value->Get<i64>()); - } else { - return static_cast<double>(value->Get<ui64>()); - } - } else { - Y_FAIL("Unexpected variant case in GetNumber"); - } -} - -bool TValue::GetBool() const { - Y_VERIFY_DEBUG(IsBool()); - - if (const auto* value = std::get_if<TEntryCursor>(&Value)) { - return value->GetType() == EEntryType::BoolTrue; - } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { - return value->Get<bool>(); - } else { - Y_FAIL("Unexpected variant case in GetBool"); - } -} - -const TStringBuf TValue::GetString() const { - Y_VERIFY_DEBUG(IsString()); - - if (const auto* value = std::get_if<TEntryCursor>(&Value)) { - return value->GetString(); - } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { - return value->AsStringRef(); - } else { - Y_FAIL("Unexpected variant case in GetString"); - } -} - -ui32 TValue::GetSize() const { - Y_VERIFY_DEBUG(IsArray() || IsObject()); - - if (const auto* value = std::get_if<TContainerCursor>(&Value)) { - return value->GetSize(); - } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { - if (value->IsEmbedded()) { - return 0; - } - - if (IsNodeType(*value, ENodeType::List)) { - return value->GetListLength(); - } else { - return value->GetDictLength(); - } - } else { - Y_FAIL("Unexpected variant case in GetString"); - } -} - -TValue TValue::GetElement(ui32 index) const { - Y_VERIFY_DEBUG(IsArray()); - - if (const auto* value = std::get_if<TContainerCursor>(&Value)) { - return TValue(value->GetElement(index)); - } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { - return TValue(value->GetElement(index)); - } else { - Y_FAIL("Unexpected variant case in GetString"); - } -} - -TArrayIterator TValue::GetArrayIterator() const { - Y_VERIFY_DEBUG(IsArray()); - - if (const auto* value = std::get_if<TContainerCursor>(&Value)) { - return TArrayIterator(value->GetArrayIterator()); - } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { - if (value->IsEmbedded()) { - return TArrayIterator(); - } - return TArrayIterator(value->GetListIterator()); - } else { - Y_FAIL("Unexpected variant case in GetString"); - } -} - -TMaybe<TValue> TValue::Lookup(const TStringBuf key) const { - Y_VERIFY_DEBUG(IsObject()); - - if (const auto* value = std::get_if<TContainerCursor>(&Value)) { - const auto payload = value->Lookup(key); - if (!payload.Defined()) { - return Nothing(); - } - return TValue(*payload); - } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { - if (value->IsEmbedded()) { - return Nothing(); - } - - // Lookup on TUnboxedValue can be performed only with TUnboxedValue key. - // To avoid allocating new string we use our custom Lookup method defined - // on underlying TMapNode that accepts TStringRef - const auto* dict = static_cast<const TMapNode*>(value->AsBoxed().Get()); - if (const auto payload = dict->Lookup(key)) { - return {TValue(payload)}; - } else { - return Nothing(); - } - } else { - Y_FAIL("Unexpected variant case in GetString"); - } -} - -TObjectIterator TValue::GetObjectIterator() const { - Y_VERIFY_DEBUG(IsObject()); - - if (const auto* value = std::get_if<TContainerCursor>(&Value)) { - return TObjectIterator(value->GetObjectIterator()); - } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { - if (value->IsEmbedded()) { - return TObjectIterator(); - } - return TObjectIterator(value->GetDictIterator()); - } else { - Y_FAIL("Unexpected variant case in GetString"); - } -} - -TUnboxedValue TValue::ConvertToUnboxedValue(const NUdf::IValueBuilder* valueBuilder) const { - if (const auto* value = std::get_if<TEntryCursor>(&Value)) { - return ReadElementToJsonDom(*value, valueBuilder); - } else if (const auto* value = std::get_if<TContainerCursor>(&Value)) { - return ReadContainerToJsonDom(*value, valueBuilder); - } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { - return *value; - } else { - Y_FAIL("Unexpected variant case in ConvertToUnboxedValue"); - } -} - -void TValue::UnpackInnerValue() { - // If TEntryCursor points to container, we need to extract TContainerCursor - if (const auto* value = std::get_if<TEntryCursor>(&Value)) { - if (value->GetType() == EEntryType::Container) { - Value = value->GetContainer(); - } - } - - // If TContainerCursor points to top level scalar, we need to extract TEntryCursor - if (const auto* value = std::get_if<TContainerCursor>(&Value)) { - if (value->GetType() == EContainerType::TopLevelScalar) { - Value = value->GetElement(0); - } - } -} - + +namespace NYql::NJsonPath { + +using namespace NUdf; +using namespace NDom; +using namespace NKikimr; +using namespace NKikimr::NBinaryJson; + +TArrayIterator::TArrayIterator() + : Iterator(TEmptyMarker()) +{ +} + +TArrayIterator::TArrayIterator(const TUnboxedValue& iterator) + : Iterator(iterator) +{ +} + +TArrayIterator::TArrayIterator(TUnboxedValue&& iterator) + : Iterator(std::move(iterator)) +{ +} + +TArrayIterator::TArrayIterator(const NBinaryJson::TArrayIterator& iterator) + : Iterator(iterator) +{ +} + +TArrayIterator::TArrayIterator(NBinaryJson::TArrayIterator&& iterator) + : Iterator(std::move(iterator)) +{ +} + +bool TArrayIterator::Next(TValue& value) { + if (std::holds_alternative<TEmptyMarker>(Iterator)) { + return false; + } else if (auto* iterator = std::get_if<NBinaryJson::TArrayIterator>(&Iterator)) { + if (!iterator->HasNext()) { + return false; + } + value = TValue(iterator->Next()); + return true; + } else if (auto* iterator = std::get_if<TUnboxedValue>(&Iterator)) { + TUnboxedValue result; + const bool success = iterator->Next(result); + if (success) { + value = TValue(result); + } + return success; + } else { + Y_FAIL("Unexpected variant case in Next"); + } +} + +TObjectIterator::TObjectIterator() + : Iterator(TEmptyMarker()) +{ +} + +TObjectIterator::TObjectIterator(const TUnboxedValue& iterator) + : Iterator(iterator) +{ +} + +TObjectIterator::TObjectIterator(TUnboxedValue&& iterator) + : Iterator(std::move(iterator)) +{ +} + +TObjectIterator::TObjectIterator(const NBinaryJson::TObjectIterator& iterator) + : Iterator(iterator) +{ +} + +TObjectIterator::TObjectIterator(NBinaryJson::TObjectIterator&& iterator) + : Iterator(std::move(iterator)) +{ +} + +bool TObjectIterator::Next(TValue& key, TValue& value) { + if (std::holds_alternative<TEmptyMarker>(Iterator)) { + return false; + } else if (auto* iterator = std::get_if<NBinaryJson::TObjectIterator>(&Iterator)) { + if (!iterator->HasNext()) { + return false; + } + const auto [itKey, itValue] = iterator->Next(); + key = TValue(itKey); + value = TValue(itValue); + return true; + } else if (auto* iterator = std::get_if<TUnboxedValue>(&Iterator)) { + TUnboxedValue itKey; + TUnboxedValue itValue; + const bool success = iterator->NextPair(itKey, itValue); + if (success) { + key = TValue(itKey); + value = TValue(itValue); + } + return success; + } else { + Y_FAIL("Unexpected variant case in Next"); + } +} + +TValue::TValue() + : Value(MakeEntity()) +{ +} + +TValue::TValue(const TUnboxedValue& value) + : Value(value) +{ +} + +TValue::TValue(TUnboxedValue&& value) + : Value(std::move(value)) +{ +} + +TValue::TValue(const TEntryCursor& value) + : Value(value) +{ + UnpackInnerValue(); +} + +TValue::TValue(TEntryCursor&& value) + : Value(std::move(value)) +{ + UnpackInnerValue(); +} + +TValue::TValue(const TContainerCursor& value) + : Value(value) +{ + UnpackInnerValue(); +} + +TValue::TValue(TContainerCursor&& value) + : Value(std::move(value)) +{ + UnpackInnerValue(); +} + +EValueType TValue::GetType() const { + if (const auto* value = std::get_if<TEntryCursor>(&Value)) { + switch (value->GetType()) { + case EEntryType::BoolFalse: + case EEntryType::BoolTrue: + return EValueType::Bool; + case EEntryType::Null: + return EValueType::Null; + case EEntryType::Number: + return EValueType::Number; + case EEntryType::String: + return EValueType::String; + case EEntryType::Container: + Y_FAIL("Logical error: TEntryCursor with Container type must be converted to TContainerCursor"); + } + } else if (const auto* value = std::get_if<TContainerCursor>(&Value)) { + switch (value->GetType()) { + case EContainerType::Array: + return EValueType::Array; + case EContainerType::Object: + return EValueType::Object; + case EContainerType::TopLevelScalar: + Y_FAIL("Logical error: TContainerCursor with TopLevelScalar type must be converted to TEntryCursor"); + } + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + switch (GetNodeType(*value)) { + case ENodeType::Bool: + return EValueType::Bool; + case ENodeType::Double: + case ENodeType::Int64: + case ENodeType::Uint64: + return EValueType::Number; + case ENodeType::Dict: + case ENodeType::Attr: + return EValueType::Object; + case ENodeType::List: + return EValueType::Array; + case ENodeType::String: + return EValueType::String; + case ENodeType::Entity: + return EValueType::Null; + } + } else { + Y_FAIL("Unexpected variant case in GetType"); + } +} + +bool TValue::Is(EValueType type) const { + return GetType() == type; +} + +bool TValue::IsBool() const { + return Is(EValueType::Bool); +} + +bool TValue::IsNumber() const { + return Is(EValueType::Number); +} + +bool TValue::IsString() const { + return Is(EValueType::String); +} + +bool TValue::IsNull() const { + return Is(EValueType::Null); +} + +bool TValue::IsObject() const { + return Is(EValueType::Object); +} + +bool TValue::IsArray() const { + return Is(EValueType::Array); +} + +double TValue::GetNumber() const { + Y_VERIFY_DEBUG(IsNumber()); + + if (const auto* value = std::get_if<TEntryCursor>(&Value)) { + return value->GetNumber(); + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + if (IsNodeType(*value, ENodeType::Double)) { + return value->Get<double>(); + } else if (IsNodeType(*value, ENodeType::Int64)) { + return static_cast<double>(value->Get<i64>()); + } else { + return static_cast<double>(value->Get<ui64>()); + } + } else { + Y_FAIL("Unexpected variant case in GetNumber"); + } +} + +bool TValue::GetBool() const { + Y_VERIFY_DEBUG(IsBool()); + + if (const auto* value = std::get_if<TEntryCursor>(&Value)) { + return value->GetType() == EEntryType::BoolTrue; + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + return value->Get<bool>(); + } else { + Y_FAIL("Unexpected variant case in GetBool"); + } +} + +const TStringBuf TValue::GetString() const { + Y_VERIFY_DEBUG(IsString()); + + if (const auto* value = std::get_if<TEntryCursor>(&Value)) { + return value->GetString(); + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + return value->AsStringRef(); + } else { + Y_FAIL("Unexpected variant case in GetString"); + } +} + +ui32 TValue::GetSize() const { + Y_VERIFY_DEBUG(IsArray() || IsObject()); + + if (const auto* value = std::get_if<TContainerCursor>(&Value)) { + return value->GetSize(); + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + if (value->IsEmbedded()) { + return 0; + } + + if (IsNodeType(*value, ENodeType::List)) { + return value->GetListLength(); + } else { + return value->GetDictLength(); + } + } else { + Y_FAIL("Unexpected variant case in GetString"); + } +} + +TValue TValue::GetElement(ui32 index) const { + Y_VERIFY_DEBUG(IsArray()); + + if (const auto* value = std::get_if<TContainerCursor>(&Value)) { + return TValue(value->GetElement(index)); + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + return TValue(value->GetElement(index)); + } else { + Y_FAIL("Unexpected variant case in GetString"); + } +} + +TArrayIterator TValue::GetArrayIterator() const { + Y_VERIFY_DEBUG(IsArray()); + + if (const auto* value = std::get_if<TContainerCursor>(&Value)) { + return TArrayIterator(value->GetArrayIterator()); + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + if (value->IsEmbedded()) { + return TArrayIterator(); + } + return TArrayIterator(value->GetListIterator()); + } else { + Y_FAIL("Unexpected variant case in GetString"); + } +} + +TMaybe<TValue> TValue::Lookup(const TStringBuf key) const { + Y_VERIFY_DEBUG(IsObject()); + + if (const auto* value = std::get_if<TContainerCursor>(&Value)) { + const auto payload = value->Lookup(key); + if (!payload.Defined()) { + return Nothing(); + } + return TValue(*payload); + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + if (value->IsEmbedded()) { + return Nothing(); + } + + // Lookup on TUnboxedValue can be performed only with TUnboxedValue key. + // To avoid allocating new string we use our custom Lookup method defined + // on underlying TMapNode that accepts TStringRef + const auto* dict = static_cast<const TMapNode*>(value->AsBoxed().Get()); + if (const auto payload = dict->Lookup(key)) { + return {TValue(payload)}; + } else { + return Nothing(); + } + } else { + Y_FAIL("Unexpected variant case in GetString"); + } +} + +TObjectIterator TValue::GetObjectIterator() const { + Y_VERIFY_DEBUG(IsObject()); + + if (const auto* value = std::get_if<TContainerCursor>(&Value)) { + return TObjectIterator(value->GetObjectIterator()); + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + if (value->IsEmbedded()) { + return TObjectIterator(); + } + return TObjectIterator(value->GetDictIterator()); + } else { + Y_FAIL("Unexpected variant case in GetString"); + } +} + +TUnboxedValue TValue::ConvertToUnboxedValue(const NUdf::IValueBuilder* valueBuilder) const { + if (const auto* value = std::get_if<TEntryCursor>(&Value)) { + return ReadElementToJsonDom(*value, valueBuilder); + } else if (const auto* value = std::get_if<TContainerCursor>(&Value)) { + return ReadContainerToJsonDom(*value, valueBuilder); + } else if (const auto* value = std::get_if<TUnboxedValue>(&Value)) { + return *value; + } else { + Y_FAIL("Unexpected variant case in ConvertToUnboxedValue"); + } +} + +void TValue::UnpackInnerValue() { + // If TEntryCursor points to container, we need to extract TContainerCursor + if (const auto* value = std::get_if<TEntryCursor>(&Value)) { + if (value->GetType() == EEntryType::Container) { + Value = value->GetContainer(); + } + } + + // If TContainerCursor points to top level scalar, we need to extract TEntryCursor + if (const auto* value = std::get_if<TContainerCursor>(&Value)) { + if (value->GetType() == EContainerType::TopLevelScalar) { + Value = value->GetElement(0); + } + } +} + } diff --git a/ydb/library/yql/minikql/jsonpath/value.h b/ydb/library/yql/minikql/jsonpath/value.h index 589991c74d..f8bd9c0d35 100644 --- a/ydb/library/yql/minikql/jsonpath/value.h +++ b/ydb/library/yql/minikql/jsonpath/value.h @@ -1,101 +1,101 @@ -#pragma once - +#pragma once + #include <ydb/library/binary_json/read.h> - + #include <ydb/library/yql/public/udf/udf_value.h> - -#include <util/generic/maybe.h> - -#include <variant> - -namespace NYql::NJsonPath { - -enum class EValueType { - Bool = 0, - Number = 1, - String = 2, - Null = 4, - Object = 5, - Array = 6, -}; - -struct TEmptyMarker { -}; - -class TValue; - -class TArrayIterator { -public: - TArrayIterator(); - explicit TArrayIterator(const NUdf::TUnboxedValue& iterator); - explicit TArrayIterator(NUdf::TUnboxedValue&& iterator); - - explicit TArrayIterator(const NKikimr::NBinaryJson::TArrayIterator& iterator); - explicit TArrayIterator(NKikimr::NBinaryJson::TArrayIterator&& iterator); - - bool Next(TValue& value); - -private: - std::variant<TEmptyMarker, NUdf::TUnboxedValue, NKikimr::NBinaryJson::TArrayIterator> Iterator; -}; - -class TObjectIterator { -public: - TObjectIterator(); - explicit TObjectIterator(const NUdf::TUnboxedValue& iterator); - explicit TObjectIterator(NUdf::TUnboxedValue&& iterator); - - explicit TObjectIterator(const NKikimr::NBinaryJson::TObjectIterator& iterator); - explicit TObjectIterator(NKikimr::NBinaryJson::TObjectIterator&& iterator); - - bool Next(TValue& key, TValue& value); - -private: - std::variant<TEmptyMarker, NUdf::TUnboxedValue, NKikimr::NBinaryJson::TObjectIterator> Iterator; -}; - -class TValue { -public: - TValue(); - explicit TValue(const NUdf::TUnboxedValue& value); - explicit TValue(NUdf::TUnboxedValue&& value); - - explicit TValue(const NKikimr::NBinaryJson::TEntryCursor& value); - explicit TValue(NKikimr::NBinaryJson::TEntryCursor&& value); - - explicit TValue(const NKikimr::NBinaryJson::TContainerCursor& value); - explicit TValue(NKikimr::NBinaryJson::TContainerCursor&& value); - - EValueType GetType() const; - bool Is(EValueType type) const; - bool IsBool() const; - bool IsNumber() const; - bool IsString() const; - bool IsNull() const; - bool IsObject() const; - bool IsArray() const; - - // Scalar value methods - double GetNumber() const; - bool GetBool() const; - const TStringBuf GetString() const; - - ui32 GetSize() const; - - // Array methods - TValue GetElement(ui32 index) const; - TArrayIterator GetArrayIterator() const; - - // Object methods - TMaybe<TValue> Lookup(const TStringBuf key) const; - TObjectIterator GetObjectIterator() const; - - NUdf::TUnboxedValue ConvertToUnboxedValue(const NUdf::IValueBuilder* valueBuilder) const; - -private: - void UnpackInnerValue(); - - std::variant<NUdf::TUnboxedValue, NKikimr::NBinaryJson::TEntryCursor, NKikimr::NBinaryJson::TContainerCursor> Value; -}; - -} + +#include <util/generic/maybe.h> + +#include <variant> + +namespace NYql::NJsonPath { + +enum class EValueType { + Bool = 0, + Number = 1, + String = 2, + Null = 4, + Object = 5, + Array = 6, +}; + +struct TEmptyMarker { +}; + +class TValue; + +class TArrayIterator { +public: + TArrayIterator(); + explicit TArrayIterator(const NUdf::TUnboxedValue& iterator); + explicit TArrayIterator(NUdf::TUnboxedValue&& iterator); + + explicit TArrayIterator(const NKikimr::NBinaryJson::TArrayIterator& iterator); + explicit TArrayIterator(NKikimr::NBinaryJson::TArrayIterator&& iterator); + + bool Next(TValue& value); + +private: + std::variant<TEmptyMarker, NUdf::TUnboxedValue, NKikimr::NBinaryJson::TArrayIterator> Iterator; +}; + +class TObjectIterator { +public: + TObjectIterator(); + explicit TObjectIterator(const NUdf::TUnboxedValue& iterator); + explicit TObjectIterator(NUdf::TUnboxedValue&& iterator); + + explicit TObjectIterator(const NKikimr::NBinaryJson::TObjectIterator& iterator); + explicit TObjectIterator(NKikimr::NBinaryJson::TObjectIterator&& iterator); + + bool Next(TValue& key, TValue& value); + +private: + std::variant<TEmptyMarker, NUdf::TUnboxedValue, NKikimr::NBinaryJson::TObjectIterator> Iterator; +}; + +class TValue { +public: + TValue(); + explicit TValue(const NUdf::TUnboxedValue& value); + explicit TValue(NUdf::TUnboxedValue&& value); + + explicit TValue(const NKikimr::NBinaryJson::TEntryCursor& value); + explicit TValue(NKikimr::NBinaryJson::TEntryCursor&& value); + + explicit TValue(const NKikimr::NBinaryJson::TContainerCursor& value); + explicit TValue(NKikimr::NBinaryJson::TContainerCursor&& value); + + EValueType GetType() const; + bool Is(EValueType type) const; + bool IsBool() const; + bool IsNumber() const; + bool IsString() const; + bool IsNull() const; + bool IsObject() const; + bool IsArray() const; + + // Scalar value methods + double GetNumber() const; + bool GetBool() const; + const TStringBuf GetString() const; + + ui32 GetSize() const; + + // Array methods + TValue GetElement(ui32 index) const; + TArrayIterator GetArrayIterator() const; + + // Object methods + TMaybe<TValue> Lookup(const TStringBuf key) const; + TObjectIterator GetObjectIterator() const; + + NUdf::TUnboxedValue ConvertToUnboxedValue(const NUdf::IValueBuilder* valueBuilder) const; + +private: + void UnpackInnerValue(); + + std::variant<NUdf::TUnboxedValue, NKikimr::NBinaryJson::TEntryCursor, NKikimr::NBinaryJson::TContainerCursor> Value; +}; + +} diff --git a/ydb/library/yql/minikql/jsonpath/ya.make b/ydb/library/yql/minikql/jsonpath/ya.make index 6dab147b83..b5c7c6ee94 100644 --- a/ydb/library/yql/minikql/jsonpath/ya.make +++ b/ydb/library/yql/minikql/jsonpath/ya.make @@ -1,18 +1,18 @@ -LIBRARY() - +LIBRARY() + OWNER( g:kikimr g:yql g:yql_ydb_core ) - -YQL_ABI_VERSION( - 2 - 18 - 0 -) - -PEERDIR( + +YQL_ABI_VERSION( + 2 + 18 + 0 +) + +PEERDIR( contrib/libs/double-conversion library/cpp/json library/cpp/regex/hyperscan @@ -24,21 +24,21 @@ PEERDIR( ydb/library/yql/core/issue/protos ydb/library/yql/parser/proto_ast ydb/library/yql/parser/proto_ast/gen/jsonpath -) - -SRCS( - ast_builder.cpp - ast_nodes.cpp - binary.cpp - executor.cpp - jsonpath.cpp - parse_double.cpp - type_check.cpp - value.cpp -) - -GENERATE_ENUM_SERIALIZATION(ast_nodes.h) - +) + +SRCS( + ast_builder.cpp + ast_nodes.cpp + binary.cpp + executor.cpp + jsonpath.cpp + parse_double.cpp + type_check.cpp + value.cpp +) + +GENERATE_ENUM_SERIALIZATION(ast_nodes.h) + END() RECURSE_FOR_TESTS( diff --git a/ydb/library/yql/minikql/mkql_program_builder.cpp b/ydb/library/yql/minikql/mkql_program_builder.cpp index 47ae6e4c8f..46424eaa88 100644 --- a/ydb/library/yql/minikql/mkql_program_builder.cpp +++ b/ydb/library/yql/minikql/mkql_program_builder.cpp @@ -1863,11 +1863,11 @@ TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::Json>(const NUdf:: } template<> -TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::JsonDocument>(const NUdf::TStringRef& data) const { - return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<NUdf::TJsonDocument>::Id, Env), true); -} - -template<> +TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::JsonDocument>(const NUdf::TStringRef& data) const { + return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<NUdf::TJsonDocument>::Id, Env), true); +} + +template<> TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::Uuid>(const NUdf::TStringRef& data) const { return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<NUdf::TUuid>::Id, Env), true); } @@ -1894,7 +1894,7 @@ TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::Interval>(const NU template<> TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::DyNumber>(const NUdf::TStringRef& data) const { - return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<NUdf::TDyNumber>::Id, Env), true); + return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<NUdf::TDyNumber>::Id, Env), true); } TRuntimeNode TProgramBuilder::NewDecimalLiteral(NYql::NDecimal::TInt128 data, ui8 precision, ui8 scale) const { @@ -4753,7 +4753,7 @@ TRuntimeNode TProgramBuilder::Default(TType* type) { const auto scheme = targetType->GetSchemeType(); const auto value = scheme == NUdf::TDataType<NUdf::TUuid>::Id ? Env.NewStringValue("\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"sv) : - scheme == NUdf::TDataType<NUdf::TDyNumber>::Id ? NUdf::TUnboxedValuePod::Embedded("\1") : NUdf::TUnboxedValuePod::Zero(); + scheme == NUdf::TDataType<NUdf::TDyNumber>::Id ? NUdf::TUnboxedValuePod::Embedded("\1") : NUdf::TUnboxedValuePod::Zero(); return TRuntimeNode(TDataLiteral::Create(value, targetType, Env), true); } diff --git a/ydb/library/yql/minikql/mkql_type_ops.cpp b/ydb/library/yql/minikql/mkql_type_ops.cpp index d559bb108c..bb31245b21 100644 --- a/ydb/library/yql/minikql/mkql_type_ops.cpp +++ b/ydb/library/yql/minikql/mkql_type_ops.cpp @@ -12,7 +12,7 @@ #include <ydb/library/binary_json/write.h> #include <ydb/library/binary_json/read.h> #include <ydb/library/dynumber/dynumber.h> - + #include <library/cpp/containers/stack_vector/stack_vec.h> #include <library/cpp/yson/parser.h> @@ -140,9 +140,9 @@ bool IsValidValue(NUdf::EDataSlot type, const NUdf::TUnboxedValuePod& value) { case NUdf::EDataSlot::Uuid: return bool(value) && value.AsStringRef().Size() == 16; case NUdf::EDataSlot::DyNumber: - return NDyNumber::IsValidDyNumber(value.AsStringRef()); - case NUdf::EDataSlot::JsonDocument: - return bool(value) && NKikimr::NBinaryJson::IsValidBinaryJson(value.AsStringRef()); + return NDyNumber::IsValidDyNumber(value.AsStringRef()); + case NUdf::EDataSlot::JsonDocument: + return bool(value) && NKikimr::NBinaryJson::IsValidBinaryJson(value.AsStringRef()); } MKQL_ENSURE(false, "Incorrect data slot: " << (ui32)type); } @@ -481,15 +481,15 @@ NUdf::TUnboxedValuePod ValueToString(NUdf::EDataSlot type, NUdf::TUnboxedValuePo } case NUdf::EDataSlot::DyNumber: { - out << NDyNumber::DyNumberToString(value.AsStringRef()); - break; - } - - case NUdf::EDataSlot::JsonDocument: { - out << NKikimr::NBinaryJson::SerializeToJson(value.AsStringRef()); + out << NDyNumber::DyNumberToString(value.AsStringRef()); break; } + case NUdf::EDataSlot::JsonDocument: { + out << NKikimr::NBinaryJson::SerializeToJson(value.AsStringRef()); + break; + } + case NUdf::EDataSlot::Decimal: default: THROW yexception() << "Incorrect data slot: " << (ui32)type; @@ -1612,13 +1612,13 @@ bool IsValidStringValue(NUdf::EDataSlot type, NUdf::TStringRef buf) { case NUdf::EDataSlot::Yson: return NDom::IsValidYson(buf); case NUdf::EDataSlot::Json: - case NUdf::EDataSlot::JsonDocument: + case NUdf::EDataSlot::JsonDocument: return NDom::IsValidJson(buf); case NUdf::EDataSlot::Uuid: return IsValidUuid(buf); case NUdf::EDataSlot::DyNumber: - return NDyNumber::IsValidDyNumberString(buf); + return NDyNumber::IsValidDyNumberString(buf); case NUdf::EDataSlot::Date: case NUdf::EDataSlot::Datetime: @@ -1726,24 +1726,24 @@ NUdf::TUnboxedValuePod ValueFromString(NUdf::EDataSlot type, NUdf::TStringRef bu case NUdf::EDataSlot::TzTimestamp: return ParseTzTimestamp(buf); - case NUdf::EDataSlot::DyNumber: { - auto dyNumber = NDyNumber::ParseDyNumberString(buf); - if (!dyNumber.Defined()) { - // DyNumber parse error happened, return NULL - return NUdf::TUnboxedValuePod(); - } - return MakeString(*dyNumber); - } - - case NUdf::EDataSlot::JsonDocument: { - auto binaryJson = NKikimr::NBinaryJson::SerializeToBinaryJson(buf); - if (!binaryJson.Defined()) { - // JSON parse error happened, return NULL - return NUdf::TUnboxedValuePod(); - } - return MakeString(TStringBuf(binaryJson->Data(), binaryJson->Size())); - } - + case NUdf::EDataSlot::DyNumber: { + auto dyNumber = NDyNumber::ParseDyNumberString(buf); + if (!dyNumber.Defined()) { + // DyNumber parse error happened, return NULL + return NUdf::TUnboxedValuePod(); + } + return MakeString(*dyNumber); + } + + case NUdf::EDataSlot::JsonDocument: { + auto binaryJson = NKikimr::NBinaryJson::SerializeToBinaryJson(buf); + if (!binaryJson.Defined()) { + // JSON parse error happened, return NULL + return NUdf::TUnboxedValuePod(); + } + return MakeString(TStringBuf(binaryJson->Data(), binaryJson->Size())); + } + case NUdf::EDataSlot::Decimal: default: break; @@ -1819,7 +1819,7 @@ NUdf::TUnboxedValuePod SimpleValueFromYson(NUdf::EDataSlot type, NUdf::TStringRe case NUdf::EDataSlot::Decimal: case NUdf::EDataSlot::Uuid: Y_FAIL("TODO"); - + default: ; } @@ -1928,7 +1928,7 @@ NUdf::TUnboxedValuePod SimpleValueFromYson(NUdf::EDataSlot type, NUdf::TStringRe case NUdf::EDataSlot::Decimal: case NUdf::EDataSlot::Uuid: case NUdf::EDataSlot::DyNumber: - case NUdf::EDataSlot::JsonDocument: + case NUdf::EDataSlot::JsonDocument: Y_FAIL("TODO"); } diff --git a/ydb/library/yql/parser/proto_ast/gen/jsonpath/ya.make b/ydb/library/yql/parser/proto_ast/gen/jsonpath/ya.make index deeb75f177..122e9ccc70 100644 --- a/ydb/library/yql/parser/proto_ast/gen/jsonpath/ya.make +++ b/ydb/library/yql/parser/proto_ast/gen/jsonpath/ya.make @@ -1,20 +1,20 @@ -PROTO_LIBRARY() - +PROTO_LIBRARY() + OWNER(g:yql g:kikimr g:yql_ydb_core) - + IF (CPP_PROTO) SET(antlr_output ${ARCADIA_BUILD_ROOT}/${MODDIR}) SET(antlr_templates ${antlr_output}/org/antlr/codegen/templates) SET(jsonpath_grammar ${ARCADIA_ROOT}/ydb/library/yql/minikql/jsonpath/JsonPath.g) - + SET(ANTLR_PACKAGE_NAME NJsonPathGenerated) SET(PROTOBUF_HEADER_PATH ${MODDIR}) SET(LEXER_PARSER_NAMESPACE NALP) - + CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/Cpp/Cpp.stg.in ${antlr_templates}/Cpp/Cpp.stg) CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/protobuf/protobuf.stg.in ${antlr_templates}/protobuf/protobuf.stg) - + RUN_ANTLR( ${jsonpath_grammar} -lib . @@ -24,13 +24,13 @@ IF (CPP_PROTO) OUT_NOAUTO JsonPathParser.proto CWD ${antlr_output} ) - + EXCLUDE_TAGS(GO_PROTO JAVA_PROTO) - + NO_COMPILER_WARNINGS() - + INCLUDE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/ya.make.incl) - + RUN_ANTLR( ${jsonpath_grammar} -lib . @@ -43,8 +43,8 @@ IF (CPP_PROTO) CWD ${antlr_output} ) ENDIF() - + SRCS(JsonPathParser.proto) -END() +END() diff --git a/ydb/library/yql/parser/proto_ast/gen/v0/ya.make b/ydb/library/yql/parser/proto_ast/gen/v0/ya.make index 7aa382db01..0243fb1cee 100644 --- a/ydb/library/yql/parser/proto_ast/gen/v0/ya.make +++ b/ydb/library/yql/parser/proto_ast/gen/v0/ya.make @@ -5,7 +5,7 @@ OWNER(g:yql g:yql_ydb_core) EXCLUDE_TAGS(GO_PROTO JAVA_PROTO) IF (CPP_PROTO) - + SET(antlr_output ${ARCADIA_BUILD_ROOT}/${MODDIR}) SET(antlr_templates ${antlr_output}/org/antlr/codegen/templates) SET(sql_grammar ${ARCADIA_ROOT}/ydb/library/yql/sql/v0/SQL.g) @@ -13,7 +13,7 @@ IF (CPP_PROTO) SET(ANTLR_PACKAGE_NAME NSQLGenerated) SET(PROTOBUF_HEADER_PATH ${MODDIR}) SET(LEXER_PARSER_NAMESPACE NALP) - + CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/Cpp/Cpp.stg.in ${antlr_templates}/Cpp/Cpp.stg) CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/protobuf/protobuf.stg.in ${antlr_templates}/protobuf/protobuf.stg) @@ -27,11 +27,11 @@ IF (CPP_PROTO) OUT_NOAUTO SQLParser.proto CWD ${antlr_output} ) - + NO_COMPILER_WARNINGS() - + INCLUDE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/ya.make.incl) - + RUN_ANTLR( ${sql_grammar} -lib . diff --git a/ydb/library/yql/parser/proto_ast/gen/v1/ya.make b/ydb/library/yql/parser/proto_ast/gen/v1/ya.make index 2a3c9560d0..86d3d63b6c 100644 --- a/ydb/library/yql/parser/proto_ast/gen/v1/ya.make +++ b/ydb/library/yql/parser/proto_ast/gen/v1/ya.make @@ -7,12 +7,12 @@ PEERDIR ( OWNER(g:yql g:yql_ydb_core) SET(antlr_output ${ARCADIA_BUILD_ROOT}/${MODDIR}) -SET(antlr_templates ${antlr_output}/org/antlr/codegen/templates) +SET(antlr_templates ${antlr_output}/org/antlr/codegen/templates) SET(sql_grammar ${antlr_output}/SQLv1.g) -SET(ANTLR_PACKAGE_NAME NSQLv1Generated) +SET(ANTLR_PACKAGE_NAME NSQLv1Generated) SET(PROTOBUF_HEADER_PATH ydb/library/yql/parser/proto_ast/gen/v1_proto) - + SET(LEXER_PARSER_NAMESPACE NALPDefault) SET(GRAMMAR_STRING_CORE_SINGLE "\"~(QUOTE_SINGLE | BACKSLASH) | (BACKSLASH .)\"") @@ -21,21 +21,21 @@ SET(GRAMMAR_MULTILINE_COMMENT_CORE "\".\"") CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/Cpp/Cpp.stg.in ${antlr_templates}/Cpp/Cpp.stg) CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/sql/v1/SQLv1.g.in ${sql_grammar}) - -NO_COMPILER_WARNINGS() - + +NO_COMPILER_WARNINGS() + INCLUDE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/ya.make.incl) - -RUN_ANTLR( - ${sql_grammar} - -lib . - -fo ${antlr_output} - IN ${sql_grammar} ${antlr_templates}/Cpp/Cpp.stg - OUT SQLv1Parser.cpp SQLv1Lexer.cpp SQLv1Parser.h SQLv1Lexer.h - OUTPUT_INCLUDES + +RUN_ANTLR( + ${sql_grammar} + -lib . + -fo ${antlr_output} + IN ${sql_grammar} ${antlr_templates}/Cpp/Cpp.stg + OUT SQLv1Parser.cpp SQLv1Lexer.cpp SQLv1Parser.h SQLv1Lexer.h + OUTPUT_INCLUDES ${PROTOBUF_HEADER_PATH}/SQLv1Parser.pb.h - ${STG_INCLUDES} - CWD ${antlr_output} -) - + ${STG_INCLUDES} + CWD ${antlr_output} +) + END() diff --git a/ydb/library/yql/parser/proto_ast/gen/ya.make b/ydb/library/yql/parser/proto_ast/gen/ya.make index 72c7ac4da7..dd8dd62689 100644 --- a/ydb/library/yql/parser/proto_ast/gen/ya.make +++ b/ydb/library/yql/parser/proto_ast/gen/ya.make @@ -2,5 +2,5 @@ RECURSE( v0 v1 v1_proto - jsonpath + jsonpath ) diff --git a/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/Cpp/Cpp.stg.in b/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/Cpp/Cpp.stg.in index e18b3ad7f6..95153201c1 100755 --- a/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/Cpp/Cpp.stg.in +++ b/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/Cpp/Cpp.stg.in @@ -1,364 +1,364 @@ -/* - [The "BSD license"] - Copyright (c) 2005-2009 Gokulakannan Somasundaram, - - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* - * This code generating template and the associated Cpp runtime was produced by: - * Gokulakannan Somasundaram ( heavy lifting from C Run-time by Jim Idle ) - */ -cTypeInitMap ::= [ - "int" : "0", // Integers start out being 0 - "long" : "0", // Longs start out being 0 - "float" : "0.0", // Floats start out being 0 - "double" : "0.0", // Doubles start out being 0 - "bool" : "false", // Booleans start out being Antlr C for false - "byte" : "0", // Bytes start out being 0 - "short" : "0", // Shorts start out being 0 - "char" : "0" // Chars start out being 0 -] - -leadIn(type) ::= -<< -/** \file - * This <type> file was generated by $ANTLR version <ANTLRVersion> - * - * - From the grammar source file : <fileName> - * - On : <generatedTimestamp> -<if(LEXER)> - * - for the lexer : <name>Lexer -<endif> -<if(PARSER)> - * - for the parser : <name>Parser -<endif> -<if(TREE_PARSER)> - * - for the tree parser : <name>TreeParser -<endif> - * - * Editing it, at least manually, is not wise. - * - * C++ language generator and runtime by Gokulakannan Somasundaram ( heavy lifting from C Run-time by Jim Idle ) - * - * ->> - -/** The overall file structure of a recognizer; stores methods for rules - * and cyclic DFAs plus support code. - */ -outputFile( LEXER, - PARSER, - TREE_PARSER, - actionScope, - actions, - docComment, - recognizer, - name, - tokens, - tokenNames, - rules, - cyclicDFAs, - bitsets, - buildTemplate, - buildAST, - rewriteMode, - profile, - backtracking, - synpreds, - memoize, - numRules, - fileName, - ANTLRVersion, - generatedTimestamp, - trace, - scopes, - superClass, - literals - ) ::= -<< -<leadIn("C++ source")> -*/ -// [The "BSD license"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -/* ----------------------------------------- - * Include the ANTLR3 generated header file. - */ -<if(PARSER)> +/* + [The "BSD license"] + Copyright (c) 2005-2009 Gokulakannan Somasundaram, + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* + * This code generating template and the associated Cpp runtime was produced by: + * Gokulakannan Somasundaram ( heavy lifting from C Run-time by Jim Idle ) + */ +cTypeInitMap ::= [ + "int" : "0", // Integers start out being 0 + "long" : "0", // Longs start out being 0 + "float" : "0.0", // Floats start out being 0 + "double" : "0.0", // Doubles start out being 0 + "bool" : "false", // Booleans start out being Antlr C for false + "byte" : "0", // Bytes start out being 0 + "short" : "0", // Shorts start out being 0 + "char" : "0" // Chars start out being 0 +] + +leadIn(type) ::= +<< +/** \file + * This <type> file was generated by $ANTLR version <ANTLRVersion> + * + * - From the grammar source file : <fileName> + * - On : <generatedTimestamp> +<if(LEXER)> + * - for the lexer : <name>Lexer +<endif> +<if(PARSER)> + * - for the parser : <name>Parser +<endif> +<if(TREE_PARSER)> + * - for the tree parser : <name>TreeParser +<endif> + * + * Editing it, at least manually, is not wise. + * + * C++ language generator and runtime by Gokulakannan Somasundaram ( heavy lifting from C Run-time by Jim Idle ) + * + * +>> + +/** The overall file structure of a recognizer; stores methods for rules + * and cyclic DFAs plus support code. + */ +outputFile( LEXER, + PARSER, + TREE_PARSER, + actionScope, + actions, + docComment, + recognizer, + name, + tokens, + tokenNames, + rules, + cyclicDFAs, + bitsets, + buildTemplate, + buildAST, + rewriteMode, + profile, + backtracking, + synpreds, + memoize, + numRules, + fileName, + ANTLRVersion, + generatedTimestamp, + trace, + scopes, + superClass, + literals + ) ::= +<< +<leadIn("C++ source")> +*/ +// [The "BSD license"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +/* ----------------------------------------- + * Include the ANTLR3 generated header file. + */ +<if(PARSER)> #include \<@PROTOBUF_HEADER_PATH@/<name>.pb.h> -<endif> - -#include "<recognizer.grammar.name>Lexer.h" -#include "<recognizer.grammar.name>Parser.h" - -<if(trace)> -#include \<util/stream/output.h> -<endif> -<if(recognizer.grammar.delegators)> -// Include delegator definition header files -// -<recognizer.grammar.delegators: {g|#include "<g.recognizerName>.hpp" }; separator="\n"> -<endif> - +<endif> + +#include "<recognizer.grammar.name>Lexer.h" +#include "<recognizer.grammar.name>Parser.h" + +<if(trace)> +#include \<util/stream/output.h> +<endif> +<if(recognizer.grammar.delegators)> +// Include delegator definition header files +// +<recognizer.grammar.delegators: {g|#include "<g.recognizerName>.hpp" }; separator="\n"> +<endif> + namespace @LEXER_PARSER_NAMESPACE@ { -/* ----------------------------------------- */ - -<docComment> - -<if(literals)> - -<beginNamespace(actions)> - -/** String literals used by <name> that we must do things like MATCHS() with. - * C will normally just lay down 8 bit characters, and you can use L"xxx" to - * get wchar_t, but wchar_t is 16 bits on Windows, which is not UTF32 and so - * we perform this little trick of defining the literals as arrays of UINT32 - * and passing in the address of these. - */ -<literals:{it | static ANTLR_UCHAR lit_<i>[] = <it>;}; separator="\n"> - -<endNamespace(actions)> - -<endif> - -/* ============================================================================= */ - -/* ============================================================================= - * Start of recognizer - */ - -<recognizer> - +/* ----------------------------------------- */ + +<docComment> + +<if(literals)> + +<beginNamespace(actions)> + +/** String literals used by <name> that we must do things like MATCHS() with. + * C will normally just lay down 8 bit characters, and you can use L"xxx" to + * get wchar_t, but wchar_t is 16 bits on Windows, which is not UTF32 and so + * we perform this little trick of defining the literals as arrays of UINT32 + * and passing in the address of these. + */ +<literals:{it | static ANTLR_UCHAR lit_<i>[] = <it>;}; separator="\n"> + +<endNamespace(actions)> + +<endif> + +/* ============================================================================= */ + +/* ============================================================================= + * Start of recognizer + */ + +<recognizer> + } // namespace @LEXER_PARSER_NAMESPACE@ -/* End of code - * ============================================================================= - */ - ->> -headerFileExtension() ::= ".h" - -beginNamespace(actions) ::= <% - <if(actions.(actionScope).namespace)> - <endif> -%> - -endNamespace(actions) ::= <% - <if(actions.(actionScope).namespace)> - <endif> -%> - - -headerFile( LEXER, - PARSER, - TREE_PARSER, - actionScope, - actions, - docComment, - recognizer, - name, - tokens, - tokenNames, - rules, - cyclicDFAs, - bitsets, - buildTemplate, - buildAST, - rewriteMode, - profile, - backtracking, - synpreds, - memoize, - numRules, - fileName, - ANTLRVersion, - generatedTimestamp, - trace, - scopes, - superClass, - literals - ) ::= -<< -#pragma once - -<leadIn("C++ header")> -<if(PARSER)> - * The parser <mainName()> has the callable functions (rules) shown below, -<endif> -<if(LEXER)> - * The lexer <mainName()> has the callable functions (rules) shown below, -<endif> -<if(TREE_PARSER)> - * The tree parser <mainName()> has the callable functions (rules) shown below, -<endif> - * which will invoke the code for the associated rule in the source grammar - * assuming that the input stream is pointing to a token/text stream that could begin - * this rule. - * - * For instance if you call the first (topmost) rule in a parser grammar, you will - * get the results of a full parse, but calling a rule half way through the grammar will - * allow you to pass part of a full token stream to the parser, such as for syntax checking - * in editors and so on. - * - */ - -// [The "BSD license"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -/* ============================================================================= - * Standard antlr3 C++ runtime definitions - */ -#include \<contrib/libs/antlr3_cpp_runtime/include/antlr3.hpp> - -/* End of standard antlr 3 runtime definitions - * ============================================================================= - */ - -#include \<util/generic/ptr.h> -#include \<util/generic/string.h> -#include \<util/generic/vector.h> -#include \<util/stream/output.h> -#include \<util/stream/str.h> - +/* End of code + * ============================================================================= + */ + +>> +headerFileExtension() ::= ".h" + +beginNamespace(actions) ::= <% + <if(actions.(actionScope).namespace)> + <endif> +%> + +endNamespace(actions) ::= <% + <if(actions.(actionScope).namespace)> + <endif> +%> + + +headerFile( LEXER, + PARSER, + TREE_PARSER, + actionScope, + actions, + docComment, + recognizer, + name, + tokens, + tokenNames, + rules, + cyclicDFAs, + bitsets, + buildTemplate, + buildAST, + rewriteMode, + profile, + backtracking, + synpreds, + memoize, + numRules, + fileName, + ANTLRVersion, + generatedTimestamp, + trace, + scopes, + superClass, + literals + ) ::= +<< +#pragma once + +<leadIn("C++ header")> +<if(PARSER)> + * The parser <mainName()> has the callable functions (rules) shown below, +<endif> +<if(LEXER)> + * The lexer <mainName()> has the callable functions (rules) shown below, +<endif> +<if(TREE_PARSER)> + * The tree parser <mainName()> has the callable functions (rules) shown below, +<endif> + * which will invoke the code for the associated rule in the source grammar + * assuming that the input stream is pointing to a token/text stream that could begin + * this rule. + * + * For instance if you call the first (topmost) rule in a parser grammar, you will + * get the results of a full parse, but calling a rule half way through the grammar will + * allow you to pass part of a full token stream to the parser, such as for syntax checking + * in editors and so on. + * + */ + +// [The "BSD license"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/* ============================================================================= + * Standard antlr3 C++ runtime definitions + */ +#include \<contrib/libs/antlr3_cpp_runtime/include/antlr3.hpp> + +/* End of standard antlr 3 runtime definitions + * ============================================================================= + */ + +#include \<util/generic/ptr.h> +#include \<util/generic/string.h> +#include \<util/generic/vector.h> +#include \<util/stream/output.h> +#include \<util/stream/str.h> + #include \<google/protobuf/message.h> - -<if(recognizer.grammar.delegates)> -// Include delegate definition header files -// -<recognizer.grammar.delegates: {g|#include "<g.recognizerName>.hpp"}; separator="\n"> - -<endif> - -#ifdef WIN32 -// Disable: Unreferenced parameter, - Rules with parameters that are not used -// constant conditional, - ANTLR realizes that a prediction is always true (synpred usually) -// initialized but unused variable - tree rewrite variables declared but not needed -// Unreferenced local variable - lexer rule declares but does not always use _type -// potentially unitialized variable used - retval always returned from a rule -// unreferenced local function has been removed - susually getTokenNames or freeScope, they can go without warnigns -// -// These are only really displayed at warning level /W4 but that is the code ideal I am aiming at -// and the codegen must generate some of these warnings by necessity, apart from 4100, which is -// usually generated when a parser rule is given a parameter that it does not use. Mostly though -// this is a matter of orthogonality hence I disable that one. -// -#pragma warning( disable : 4100 ) -#pragma warning( disable : 4101 ) -#pragma warning( disable : 4127 ) -#pragma warning( disable : 4189 ) -#pragma warning( disable : 4505 ) -#pragma warning( disable : 4701 ) -#endif -<if(backtracking)> - -/* ======================== - * BACKTRACKING IS ENABLED - * ======================== - */ -<endif> - -<beginNamespace(actions)> - -<if(recognizer.grammar.delegators)> -// Include delegator definition classes -// -<recognizer.grammar.delegators: {g|class <g.recognizerName>; }; separator="\n"> -<endif> - + +<if(recognizer.grammar.delegates)> +// Include delegate definition header files +// +<recognizer.grammar.delegates: {g|#include "<g.recognizerName>.hpp"}; separator="\n"> + +<endif> + +#ifdef WIN32 +// Disable: Unreferenced parameter, - Rules with parameters that are not used +// constant conditional, - ANTLR realizes that a prediction is always true (synpred usually) +// initialized but unused variable - tree rewrite variables declared but not needed +// Unreferenced local variable - lexer rule declares but does not always use _type +// potentially unitialized variable used - retval always returned from a rule +// unreferenced local function has been removed - susually getTokenNames or freeScope, they can go without warnigns +// +// These are only really displayed at warning level /W4 but that is the code ideal I am aiming at +// and the codegen must generate some of these warnings by necessity, apart from 4100, which is +// usually generated when a parser rule is given a parameter that it does not use. Mostly though +// this is a matter of orthogonality hence I disable that one. +// +#pragma warning( disable : 4100 ) +#pragma warning( disable : 4101 ) +#pragma warning( disable : 4127 ) +#pragma warning( disable : 4189 ) +#pragma warning( disable : 4505 ) +#pragma warning( disable : 4701 ) +#endif +<if(backtracking)> + +/* ======================== + * BACKTRACKING IS ENABLED + * ======================== + */ +<endif> + +<beginNamespace(actions)> + +<if(recognizer.grammar.delegators)> +// Include delegator definition classes +// +<recognizer.grammar.delegators: {g|class <g.recognizerName>; }; separator="\n"> +<endif> + #include \<ydb/library/yql/parser/proto_ast/proto_ast.h> - + namespace @LEXER_PARSER_NAMESPACE@ { -<if(LEXER)> -template \<class ImplTraits> -class <name>Traits : public antlr3::CustomTraitsBase\<ImplTraits> { -public: - typedef google::protobuf::Message* RuleReturnValueType; -}; - -class <recognizer.grammar.name>Lexer; -class <recognizer.grammar.name>Parser; - -typedef antlr3::Traits\<<recognizer.grammar.name>Lexer, <recognizer.grammar.name>Parser, <name>Traits> <name>ImplTraits; - -// Fix for windows -#ifdef TOKEN_QUERY -#undef TOKEN_QUERY -#endif - +<if(LEXER)> +template \<class ImplTraits> +class <name>Traits : public antlr3::CustomTraitsBase\<ImplTraits> { +public: + typedef google::protobuf::Message* RuleReturnValueType; +}; + +class <recognizer.grammar.name>Lexer; +class <recognizer.grammar.name>Parser; + +typedef antlr3::Traits\<<recognizer.grammar.name>Lexer, <recognizer.grammar.name>Parser, <name>Traits> <name>ImplTraits; + +// Fix for windows +#ifdef TOKEN_QUERY +#undef TOKEN_QUERY +#endif + namespace { inline bool IsBetween(ANTLR_UINT32 value, ANTLR_UINT32 lower, ANTLR_UINT32 upper) { @@ -367,2294 +367,2294 @@ inline bool IsBetween(ANTLR_UINT32 value, ANTLR_UINT32 lower, ANTLR_UINT32 upper } -class <name>Tokens { -public: - /** Symbolic definitions of all the tokens that the <grammarType()> will work with. - * - * Antlr will define EOF, but we can't use that as it it is too common in - * in C header files and that would be confusing. There is no way to filter this out at the moment - * so we just undef it here for now. That isn't the value we get back from C recognizers - * anyway. We are looking for ANTLR_TOKEN_EOF. - */ - enum Tokens - { - TOKEN_EOF = <name>ImplTraits::CommonTokenType::TOKEN_EOF, - <tokens:{it | TOKEN_<it.name> = <it.type>}; separator=",\n"> - }; -}; - -<endif> -<if(PARSER)> -#include "<recognizer.grammar.name>Lexer.h" - -typedef <recognizer.grammar.name>LexerImplTraits <name>ImplTraits; -typedef <recognizer.grammar.name>LexerTokens <name>Tokens; -<endif> - -<rules:{r | <if(r.ruleDescriptor.isSynPred)> struct <r.ruleDescriptor.name> {\}; <endif>}; separator="\n"> - -/** Context tracking structure for <mainName()> - */ -class <name> : public <componentBaseType()>, public <name>Tokens { -public: - typedef <name>ImplTraits ImplTraits; - typedef <name> ComponentType; - typedef ComponentType::StreamType StreamType; - typedef <componentBaseType()> BaseType; - typedef ImplTraits::RuleReturnValueType RuleReturnType; - typedef ImplTraits::RecognizerSharedStateType\<StreamType> RecognizerSharedStateType; - typedef StreamType InputType; - -<if(recognizer.filterMode)> - static const bool IsFiltered = true; -<else> - static const bool IsFiltered = false; -<endif> - - <scopes:{it | <if(it.isDynamicGlobalScope)><globalAttributeScopeDecl(it)><endif>}> - <rules:{r | <if(r.ruleDescriptor.ruleScope)><ruleAttributeScopeDecl(scope=r.ruleDescriptor.ruleScope)><endif>}> - -private: -<if(recognizer.grammar.delegates)> - <recognizer.grammar.delegates: - {g|<g.recognizerName>* m_<g:delegateName()>;}; separator="\n"> -<endif> -<if(recognizer.grammar.delegators)> - <recognizer.grammar.delegators: - {g|<g.recognizerName>* m_<g:delegateName()>;}; separator="\n"> -<endif> -<scopes:{it | <if(it.isDynamicGlobalScope)> - <globalAttributeScopeDef(it)> -<endif>}; separator="\n\n"> -<rules: {r |<if(r.ruleDescriptor.ruleScope)> - <ruleAttributeScopeDef(scope=r.ruleDescriptor.ruleScope)> -<endif>}> - bool Error = false; - NProtoAST::IErrorCollector* Errors = nullptr; - ui32 FollowDepth_ = 0; - google::protobuf::Arena* Arena = nullptr; +class <name>Tokens { +public: + /** Symbolic definitions of all the tokens that the <grammarType()> will work with. + * + * Antlr will define EOF, but we can't use that as it it is too common in + * in C header files and that would be confusing. There is no way to filter this out at the moment + * so we just undef it here for now. That isn't the value we get back from C recognizers + * anyway. We are looking for ANTLR_TOKEN_EOF. + */ + enum Tokens + { + TOKEN_EOF = <name>ImplTraits::CommonTokenType::TOKEN_EOF, + <tokens:{it | TOKEN_<it.name> = <it.type>}; separator=",\n"> + }; +}; + +<endif> +<if(PARSER)> +#include "<recognizer.grammar.name>Lexer.h" + +typedef <recognizer.grammar.name>LexerImplTraits <name>ImplTraits; +typedef <recognizer.grammar.name>LexerTokens <name>Tokens; +<endif> + +<rules:{r | <if(r.ruleDescriptor.isSynPred)> struct <r.ruleDescriptor.name> {\}; <endif>}; separator="\n"> + +/** Context tracking structure for <mainName()> + */ +class <name> : public <componentBaseType()>, public <name>Tokens { +public: + typedef <name>ImplTraits ImplTraits; + typedef <name> ComponentType; + typedef ComponentType::StreamType StreamType; + typedef <componentBaseType()> BaseType; + typedef ImplTraits::RuleReturnValueType RuleReturnType; + typedef ImplTraits::RecognizerSharedStateType\<StreamType> RecognizerSharedStateType; + typedef StreamType InputType; + +<if(recognizer.filterMode)> + static const bool IsFiltered = true; +<else> + static const bool IsFiltered = false; +<endif> + + <scopes:{it | <if(it.isDynamicGlobalScope)><globalAttributeScopeDecl(it)><endif>}> + <rules:{r | <if(r.ruleDescriptor.ruleScope)><ruleAttributeScopeDecl(scope=r.ruleDescriptor.ruleScope)><endif>}> + +private: +<if(recognizer.grammar.delegates)> + <recognizer.grammar.delegates: + {g|<g.recognizerName>* m_<g:delegateName()>;}; separator="\n"> +<endif> +<if(recognizer.grammar.delegators)> + <recognizer.grammar.delegators: + {g|<g.recognizerName>* m_<g:delegateName()>;}; separator="\n"> +<endif> +<scopes:{it | <if(it.isDynamicGlobalScope)> + <globalAttributeScopeDef(it)> +<endif>}; separator="\n\n"> +<rules: {r |<if(r.ruleDescriptor.ruleScope)> + <ruleAttributeScopeDef(scope=r.ruleDescriptor.ruleScope)> +<endif>}> + bool Error = false; + NProtoAST::IErrorCollector* Errors = nullptr; + ui32 FollowDepth_ = 0; + google::protobuf::Arena* Arena = nullptr; static inline bool IsHiddenToken(ANTLR_UINT32 token) { return token == TOKEN_WS || token == TOKEN_COMMENT; } - -public: - <name>(InputType* instream<recognizer.grammar.delegators:{g|, <g.recognizerName>* <g:delegateName()>}>, google::protobuf::Arena* arena); -private: - <name>(InputType* instream, RecognizerSharedStateType* state<recognizer.grammar.delegators:{g|, <g.recognizerName>* <g:delegateName()>}>); -public: - - void init(InputType* instream <recognizer.grammar.delegators:{g|, <g.recognizerName>* <g:delegateName()>}> ); - -<if(LEXER)> - void ReportErrors(NProtoAST::IErrorCollector* errors) { - Errors = errors; - } - bool HasError() const { - return Error; - } -<if(recognizer.filterMode)> - void memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart); - bool alreadyParsedRule(ANTLR_MARKER ruleIndex); - <filteringNextToken()> -<endif> - <rules:{r | <if(!r.ruleDescriptor.isSynPred)><headerReturnType(ruleDescriptor=r.ruleDescriptor)> m<r.ruleDescriptor.name>( <r.ruleDescriptor.parameterScope:parameterScope()>);<endif>}; separator="\n"> - <rules:{r | <if(r.ruleDescriptor.isSynPred)> <headerReturnType(ruleDescriptor=r.ruleDescriptor)> msynpred( antlr3::ClassForwarder\< <r.ruleDescriptor.name> > <r.ruleDescriptor.parameterScope:parameterScope()>); - void m<r.ruleDescriptor.name>_fragment (<r.ruleDescriptor.parameterScope:parameterScope()>);<endif>}; separator="\n"> -<endif> -<if(PARSER)> - RuleReturnType Parse(const <recognizer.grammar.name>Lexer& lexer, NProtoAST::IErrorCollector* errors = 0); - void followPush( const BitsetListType& follow ) { - BaseType::followPush(follow); - ++FollowDepth_; -#ifndef NDEBUG - if (FollowDepth_ > 500) { -#else - if (FollowDepth_ > 5000) { -#endif - throw yexception() \<\< "Too many nested tokens"; - } - } - - void followPop() { - BaseType::followPop(); - --FollowDepth_; - } - -<endif> - - void displayRecognitionError(ANTLR_UINT8** tokenNames, ExceptionBaseType* ex); - -<if(!LEXER)> - <rules:{r | <headerReturnScope(ruleDescriptor=r.ruleDescriptor)>}> - <rules:{r | <if(!r.ruleDescriptor.isSynPred)> <headerReturnType(ruleDescriptor=r.ruleDescriptor)> <r.ruleDescriptor.name> (<r.ruleDescriptor.parameterScope:parameterScope()>);<endif>}; separator="\n"> - <rules:{r | <if(r.ruleDescriptor.isSynPred)> <headerReturnType(ruleDescriptor=r.ruleDescriptor)> msynpred( antlr3::ClassForwarder\< <r.ruleDescriptor.name> > <r.ruleDescriptor.parameterScope:parameterScope()>); - void m<r.ruleDescriptor.name>_fragment (<r.ruleDescriptor.parameterScope:parameterScope()>);<endif>}; separator="\n"> -<! generate rule/method definitions for imported rules so they - appear to be defined in this recognizer. !> - // Delegated rules -<recognizer.grammar.delegatedRules:{ruleDescriptor| - <headerReturnType(ruleDescriptor)> <ruleDescriptor.name>(<ruleDescriptor.parameterScope:parameterScope()>);}; separator="\n"> -<endif> - - const char * getGrammarFileName(); - void reset(); - ~<name>(); - -}; - -// Function protoypes for the constructor functions that external translation units -// such as delegators and delegates may wish to call. -// -<if(!recognizer.grammar.grammarIsRoot)> -extern ANTLR_UINT8* <recognizer.grammar.composite.rootGrammar.recognizerName>TokenNames[]; -<endif> - - -/* End of token definitions for <name> - * ============================================================================= - */ - + +public: + <name>(InputType* instream<recognizer.grammar.delegators:{g|, <g.recognizerName>* <g:delegateName()>}>, google::protobuf::Arena* arena); +private: + <name>(InputType* instream, RecognizerSharedStateType* state<recognizer.grammar.delegators:{g|, <g.recognizerName>* <g:delegateName()>}>); +public: + + void init(InputType* instream <recognizer.grammar.delegators:{g|, <g.recognizerName>* <g:delegateName()>}> ); + +<if(LEXER)> + void ReportErrors(NProtoAST::IErrorCollector* errors) { + Errors = errors; + } + bool HasError() const { + return Error; + } +<if(recognizer.filterMode)> + void memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart); + bool alreadyParsedRule(ANTLR_MARKER ruleIndex); + <filteringNextToken()> +<endif> + <rules:{r | <if(!r.ruleDescriptor.isSynPred)><headerReturnType(ruleDescriptor=r.ruleDescriptor)> m<r.ruleDescriptor.name>( <r.ruleDescriptor.parameterScope:parameterScope()>);<endif>}; separator="\n"> + <rules:{r | <if(r.ruleDescriptor.isSynPred)> <headerReturnType(ruleDescriptor=r.ruleDescriptor)> msynpred( antlr3::ClassForwarder\< <r.ruleDescriptor.name> > <r.ruleDescriptor.parameterScope:parameterScope()>); + void m<r.ruleDescriptor.name>_fragment (<r.ruleDescriptor.parameterScope:parameterScope()>);<endif>}; separator="\n"> +<endif> +<if(PARSER)> + RuleReturnType Parse(const <recognizer.grammar.name>Lexer& lexer, NProtoAST::IErrorCollector* errors = 0); + void followPush( const BitsetListType& follow ) { + BaseType::followPush(follow); + ++FollowDepth_; +#ifndef NDEBUG + if (FollowDepth_ > 500) { +#else + if (FollowDepth_ > 5000) { +#endif + throw yexception() \<\< "Too many nested tokens"; + } + } + + void followPop() { + BaseType::followPop(); + --FollowDepth_; + } + +<endif> + + void displayRecognitionError(ANTLR_UINT8** tokenNames, ExceptionBaseType* ex); + +<if(!LEXER)> + <rules:{r | <headerReturnScope(ruleDescriptor=r.ruleDescriptor)>}> + <rules:{r | <if(!r.ruleDescriptor.isSynPred)> <headerReturnType(ruleDescriptor=r.ruleDescriptor)> <r.ruleDescriptor.name> (<r.ruleDescriptor.parameterScope:parameterScope()>);<endif>}; separator="\n"> + <rules:{r | <if(r.ruleDescriptor.isSynPred)> <headerReturnType(ruleDescriptor=r.ruleDescriptor)> msynpred( antlr3::ClassForwarder\< <r.ruleDescriptor.name> > <r.ruleDescriptor.parameterScope:parameterScope()>); + void m<r.ruleDescriptor.name>_fragment (<r.ruleDescriptor.parameterScope:parameterScope()>);<endif>}; separator="\n"> +<! generate rule/method definitions for imported rules so they + appear to be defined in this recognizer. !> + // Delegated rules +<recognizer.grammar.delegatedRules:{ruleDescriptor| + <headerReturnType(ruleDescriptor)> <ruleDescriptor.name>(<ruleDescriptor.parameterScope:parameterScope()>);}; separator="\n"> +<endif> + + const char * getGrammarFileName(); + void reset(); + ~<name>(); + +}; + +// Function protoypes for the constructor functions that external translation units +// such as delegators and delegates may wish to call. +// +<if(!recognizer.grammar.grammarIsRoot)> +extern ANTLR_UINT8* <recognizer.grammar.composite.rootGrammar.recognizerName>TokenNames[]; +<endif> + + +/* End of token definitions for <name> + * ============================================================================= + */ + } // namespace @LEXER_PARSER_NAMESPACE@ -<endNamespace(actions)> - -/* END - Note:Keep extra line feed to satisfy UNIX systems */ - ->> - -grammarType() ::= <% -<if(PARSER)> -parser -<endif> -<if(LEXER)> -lexer -<endif> -<if(TREE_PARSER)> -tree parser -<endif> -%> - -componentType() ::= << -<if(PARSER)> -<name>ImplTraits::ParserType -<endif> -<if(LEXER)> -<name>ImplTraits::LexerType -<endif> -<if(TREE_PARSER)> -<name>ImplTraits::TreeParserType -<endif> ->> - -componentBaseType() ::= <% -<if(PARSER)> -<name>ImplTraits::BaseParserType -<endif> -<if(LEXER)> -<name>ImplTraits::BaseLexerType -<endif> -<if(TREE_PARSER)> -<name>ImplTraits::BaseTreeParserType -<endif> -%> - -streamType() ::= << -<if(PARSER)> -<name>ImplTraits::ParserType::StreamType -<endif> -<if(LEXER)> -<name>ImplTraits::LexerType::StreamType -<endif> -<if(TREE_PARSER)> -<name>ImplTraits::TreeParserType::StreamType -<endif> ->> - - -mainName() ::= <% -<if(PARSER)> -<name> -<endif> -<if(LEXER)> -<name> -<endif> -<if(TREE_PARSER)> -<name> -<endif> -%> - -headerReturnScope(ruleDescriptor) ::= "<returnScope(scope=ruleDescriptor.returnScope)>" - -headerReturnType(ruleDescriptor) ::= <% -<if(LEXER)> -<if(!ruleDescriptor.isSynPred)> - void -<else> - <returnType()> -<endif> -<else> - <returnType()> -<endif> -%> - -// Produce the lexer output -// -lexer( grammar, - name, - tokens, - scopes, - rules, - numRules, - filterMode, - superClass, - labelType="ImplTraits::CommonTokenType*") ::= << - -using namespace antlr3; - -<beginNamespace(actions)> - -<if(filterMode)> - -/* Override the normal MEMOIZE and HAVEALREADYPARSED macros as this is a filtering - * lexer. In filter mode, the memoizing and backtracking are gated at BACKTRACKING > 1 rather - * than just BACKTRACKING. In some cases this might generate code akin to: - * if (BACKTRACKING) if (BACKTRACKING > 1) memoize. - */ -void <name>::memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart) -{ - BaseType* base = this; - if ( this->get_backtracking()>1 ) - base->memoize( ruleIndex, ruleParseStart ); - -} - -bool <name>::alreadyParsedRule(ANTLR_MARKER ruleIndex) -{ - BaseType* base = this; - if ( this->get_backtracking() > 1 ) - return base->haveParsedRule(ruleIndex); - return false; -} - -<endif> - -/* ========================================================================= - * Lexer matching rules end. - * ========================================================================= - */ - -<scopes:{it |<if(it.isDynamicGlobalScope)><globalAttributeScope(it)><endif>}> - -<name>::~<name>() -{ -<if(memoize)> - RuleMemoType* rulememo = this->getRuleMemo(); - if(rulememo != NULL) - { - delete rulememo; - this->setRuleMemo(NULL); - } -<endif> -<if(grammar.directDelegates)> - // Free the lexers that we delegated to - // functions to. NULL the state so we only free it once. - // - <grammar.directDelegates: - {g| m_<g:delegateName()>->set_lexstate(NULL); - delete m_<g:delegateName()>; }; separator="\n"> -<endif> -} - -void -<name>::reset() -{ - this->get_rec()->reset(); -} - -/** \brief Name of the grammar file that generated this code - */ -static const char fileName[] = "<fileName>"; - -/** \brief Return the name of the grammar file that generated this code. - */ -const char* <name>::getGrammarFileName() -{ - return fileName; -} - -<displayRecognitionError(name)> - -/** \brief Create a new lexer called <name> - * - * \param[in] instream Pointer to an initialized input stream - * \return - * - Success p<name> initialized for the lex start - * - Fail NULL - */ -<name>::<name>(StreamType* instream<grammar.delegators:{g|, <g.recognizerName>* <g:delegateName()>}>, google::protobuf::Arena*) -:<name>ImplTraits::BaseLexerType(ANTLR_SIZE_HINT, instream, NULL) -{ - // See if we can create a new lexer with the standard constructor - // - this->init(instream <grammar.delegators:{g|, <g:delegateName()>}>); -} - -/** \brief Create a new lexer called <name> - * - * \param[in] instream Pointer to an initialized input stream - * \param[state] state Previously created shared recognizer stat - * \return - * - Success p<name> initialized for the lex start - * - Fail NULL - */ -<name>::<name>(StreamType* instream, RecognizerSharedStateType* state<grammar.delegators:{g|, <g.recognizerName>* <g:delegateName()>}>) -:<name>ImplTraits::BaseLexerType(ANTLR_SIZE_HINT, instream, state) -{ - this->init(instream <grammar.delegators:{g|, <g:delegateName()>} >); -} - -void <name>::init(StreamType* instream<grammar.delegators:{g|, <g.recognizerName>* <g:delegateName()>} >) -{ - /* ------------------------------------------------------------------- - * Memory for basic structure is allocated, now to fill in - * in base ANTLR3 structures. We initialize the function pointers - * for the standard ANTLR3 lexer function set, but upon return - * from here, the programmer may set the pointers to provide custom - * implementations of each function. - * - * We don't use the macros defined in <name>.h here so you can get a sense - * of what goes where. - */ - -<if(memoize)> -<if(grammar.grammarIsRoot)> - // Create a LIST for recording rule memos. - // - this->setRuleMemo( new IntTrie(15) ); /* 16 bit depth is enough for 32768 rules! */ -<endif> -<endif> - -<if(grammar.directDelegates)> - // Initialize the lexers that we are going to delegate some - // functions to. - // - <grammar.directDelegates: - {g|m_<g:delegateName()> = new <g.recognizerName>(instream, this->get_lexstate(), this<grammar.delegators:{g|, <g:delegateName()>}>);}; separator="\n"> -<else> - Y_UNUSED(instream); -<endif> -<if(grammar.delegators)> - // Install the pointers back to lexers that will delegate us to perform certain functions - // for them. - // - <grammar.delegators: - {g| m_<g:delegateName()> = <g:delegateName()>;}; separator="\n"> -<endif> -} - -<if(cyclicDFAs)> - -/* ========================================================================= - * DFA tables for the lexer - */ -<cyclicDFAs:cyclicDFA()><! dump tables for all DFA !> -/* ========================================================================= - * End of DFA tables for the lexer - */ -<endif> - -/* ========================================================================= - * Functions to match the lexer grammar defined tokens from the input stream - */ - -<rules; separator="\n\n"> - -/* ========================================================================= - * Lexer matching rules end. - * ========================================================================= - */ -<if(synpreds)> - -/* ========================================================================= - * Lexer syntactic predicates - */ -<synpreds:{p | <lexerSynpred(predname=p)>}> -/* ========================================================================= - * Lexer syntactic predicates end. - * ========================================================================= - */ -<endif> - -/* End of Lexer code - * ================================================ - * ================================================ - */ - -<endNamespace(actions)> - ->> - - -filteringNextToken() ::= << -<name>ImplTraits::CommonTokenType* -<name>ImplTraits::TokenSourceType::nextToken() -{ - LexerType* lexer; - typename LexerType::RecognizerSharedStateType* state; - - lexer = this->get_super(); - state = lexer->get_lexstate(); - - /* Get rid of any previous token (token factory takes care of - * any deallocation when this token is finally used up. - */ - state->set_token_present( false ); - state->set_error( false ); /* Start out without an exception */ - state->set_failedflag(false); - - /* Record the start of the token in our input stream. - */ - state->set_tokenStartCharIndex( lexer->index(); - state->set_tokenStartCharPositionInLine( lexer->getCharPositionInLine() ); - state->set_tokenStartLine( lexer->getLine() ); - state->set_text(""); - - /* Now call the matching rules and see if we can generate a new token - */ - for (;;) - { - if (lexer->LA(1) == ANTLR_CHARSTREAM_EOF) - { - /* Reached the end of the stream, nothing more to do. - */ - CommonTokenType& teof = m_eofToken; - - teof.set_startIndex(lexer->getCharIndex()); - teof.set_stopIndex(lexer->getCharIndex()); - teof.setLine(lexer->getLine()); - return &teof; - } - - state->set_token_present(false); - state->set_error(false); /* Start out without an exception */ - - { - ANTLR_MARKER m; - - m = this->get_istream()->mark(); - state->set_backtracking(1); /* No exceptions */ - state->set_failedflag(false); - - /* Call the generated lexer, see if it can get a new token together. - */ - lexer->mTokens(); - state->set_backtracking(0); - - <! mTokens backtracks with synpred at BACKTRACKING==2 - and we set the synpredgate to allow actions at level 1. !> - - if(state->get_failed()) - { - lexer->rewind(m); - lexer->consume(); <! advance one char and try again !> - } - else - { - lexer->emit(); /* Assemble the token and emit it to the stream */ - TokenType& tok = state->get_token(); - return &tok; - } - } - } -} ->> - -actionGate() ::= "this->get_backtracking()==0" - -filteringActionGate() ::= "this->get_backtracking()==1" - -/** How to generate a parser */ -genericParser( grammar, name, scopes, tokens, tokenNames, rules, numRules, - bitsets, inputStreamType, superClass, - labelType, members, rewriteElementType, - filterMode, ASTLabelType="ImplTraits::TreeType*") ::= << - -using namespace antlr3; -<if(grammar.grammarIsRoot)> -/** \brief Table of all token names in symbolic order, mainly used for - * error reporting. - */ -ANTLR_UINT8* <name>TokenNames[<length(tokenNames)>+4] - = { - (ANTLR_UINT8*) "nothing", /* String to print to indicate an invalid token */ - (ANTLR_UINT8*) "\<EOR>", - (ANTLR_UINT8*) "\<DOWN>", - (ANTLR_UINT8*) "\<UP>", - <tokenNames:{it |(ANTLR_UINT8*) <it>}; separator=",\n"> - }; -<endif> - -/** \brief Name of the grammar file that generated this code - */ -static const char fileName[] = "<fileName>"; - -/** \brief Return the name of the grammar file that generated this code. - */ -const char* <name>::getGrammarFileName() -{ - return fileName; -} -/** \brief Create a new <name> parser and return a context for it. - * - * \param[in] instream Pointer to an input stream interface. - * - * \return Pointer to new parser context upon success. - */ -<name>::<name>( StreamType* instream<grammar.delegators:{g|, <g.recognizerName>* <g:delegateName()>}>, google::protobuf::Arena* arena) -<constructorInitializerType("NULL")> - , Arena(arena) -{ - // See if we can create a new parser with the standard constructor - // - this->init(instream<grammar.delegators:{g|, <g:delegateName()>}>); -} - -/** \brief Create a new <name> parser and return a context for it. - * - * \param[in] instream Pointer to an input stream interface. - * - * \return Pointer to new parser context upon success. - */ -<name>::<name>( StreamType* instream, RecognizerSharedStateType* state<grammar.delegators:{g|, <g.recognizerName>* <g:delegateName()>}>) -<constructorInitializerType("state")> -{ - this->init(instream <grammar.delegators:{g|, <g:delegateName()>}>); -} - -void <name>::init(StreamType* instream<grammar.delegators:{g|, <g.recognizerName>* <g:delegateName()>}>) -{ -<if(memoize)> -<if(grammar.grammarIsRoot)> - /* Create a LIST for recording rule memos. - */ - typedef RecognizerSharedStateType::RuleMemoType RuleMemoType; - this->setRuleMemo( new RuleMemoType(15) ); /* 16 bit depth is enough for 32768 rules! */<\n> -<endif> -<endif> -<if(grammar.directDelegates)> - // Initialize the lexers that we are going to delegate some - // functions to. - // - <grammar.directDelegates: - {g|m_<g:delegateName()> = new <g.recognizerName>(instream, this->get_psrstate(), this<grammar.delegators:{g|, <g:delegateName()>}>);}; separator="\n"> - <endif> - <if(grammar.delegators)> - // Install the pointers back to lexers that will delegate us to perform certain functions - // for them. - // - <grammar.delegators: {g| m_<g:delegateName()> = <g:delegateName()>;}; separator="\n"> - <endif> - /* Install the token table - */ - this->get_psrstate()->set_tokenNames( <grammar.composite.rootGrammar.recognizerName>TokenNames ); - - <@debugStuff()> - -} - -void -<name>::reset() -{ - this->get_rec()->reset(); -} - -/** Free the parser resources - */ -<name>::~<name>() - { - <@cleanup> - <@end> -<if(grammar.directDelegates)> - // Free the parsers that we delegated to - // functions to.NULL the state so we only free it once. - // - <grammar.directDelegates: - {g| m_<g:delegateName()>->set_psrstate( NULL ); - delete m_<g:delegateName()>;}; separator="\n"> -<endif> -<if(memoize)> -<if(grammar.grammarIsRoot)> - if(this->getRuleMemo() != NULL) - { - delete this->getRuleMemo(); - this->setRuleMemo(NULL); - } -<endif> -<endif> -} - -<if(PARSER)> -<name>::RuleReturnType <name>::Parse(const <recognizer.grammar.name>Lexer& lexer, NProtoAST::IErrorCollector* errors) { - <first(rules):{r | -Errors = errors; -RuleReturnType root(<name>::<r.ruleDescriptor.name>()); -if (!lexer.HasError() && !Error && root) { +<endNamespace(actions)> + +/* END - Note:Keep extra line feed to satisfy UNIX systems */ + +>> + +grammarType() ::= <% +<if(PARSER)> +parser +<endif> +<if(LEXER)> +lexer +<endif> +<if(TREE_PARSER)> +tree parser +<endif> +%> + +componentType() ::= << +<if(PARSER)> +<name>ImplTraits::ParserType +<endif> +<if(LEXER)> +<name>ImplTraits::LexerType +<endif> +<if(TREE_PARSER)> +<name>ImplTraits::TreeParserType +<endif> +>> + +componentBaseType() ::= <% +<if(PARSER)> +<name>ImplTraits::BaseParserType +<endif> +<if(LEXER)> +<name>ImplTraits::BaseLexerType +<endif> +<if(TREE_PARSER)> +<name>ImplTraits::BaseTreeParserType +<endif> +%> + +streamType() ::= << +<if(PARSER)> +<name>ImplTraits::ParserType::StreamType +<endif> +<if(LEXER)> +<name>ImplTraits::LexerType::StreamType +<endif> +<if(TREE_PARSER)> +<name>ImplTraits::TreeParserType::StreamType +<endif> +>> + + +mainName() ::= <% +<if(PARSER)> +<name> +<endif> +<if(LEXER)> +<name> +<endif> +<if(TREE_PARSER)> +<name> +<endif> +%> + +headerReturnScope(ruleDescriptor) ::= "<returnScope(scope=ruleDescriptor.returnScope)>" + +headerReturnType(ruleDescriptor) ::= <% +<if(LEXER)> +<if(!ruleDescriptor.isSynPred)> + void +<else> + <returnType()> +<endif> +<else> + <returnType()> +<endif> +%> + +// Produce the lexer output +// +lexer( grammar, + name, + tokens, + scopes, + rules, + numRules, + filterMode, + superClass, + labelType="ImplTraits::CommonTokenType*") ::= << + +using namespace antlr3; + +<beginNamespace(actions)> + +<if(filterMode)> + +/* Override the normal MEMOIZE and HAVEALREADYPARSED macros as this is a filtering + * lexer. In filter mode, the memoizing and backtracking are gated at BACKTRACKING > 1 rather + * than just BACKTRACKING. In some cases this might generate code akin to: + * if (BACKTRACKING) if (BACKTRACKING > 1) memoize. + */ +void <name>::memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart) +{ + BaseType* base = this; + if ( this->get_backtracking()>1 ) + base->memoize( ruleIndex, ruleParseStart ); + +} + +bool <name>::alreadyParsedRule(ANTLR_MARKER ruleIndex) +{ + BaseType* base = this; + if ( this->get_backtracking() > 1 ) + return base->haveParsedRule(ruleIndex); + return false; +} + +<endif> + +/* ========================================================================= + * Lexer matching rules end. + * ========================================================================= + */ + +<scopes:{it |<if(it.isDynamicGlobalScope)><globalAttributeScope(it)><endif>}> + +<name>::~<name>() +{ +<if(memoize)> + RuleMemoType* rulememo = this->getRuleMemo(); + if(rulememo != NULL) + { + delete rulememo; + this->setRuleMemo(NULL); + } +<endif> +<if(grammar.directDelegates)> + // Free the lexers that we delegated to + // functions to. NULL the state so we only free it once. + // + <grammar.directDelegates: + {g| m_<g:delegateName()>->set_lexstate(NULL); + delete m_<g:delegateName()>; }; separator="\n"> +<endif> +} + +void +<name>::reset() +{ + this->get_rec()->reset(); +} + +/** \brief Name of the grammar file that generated this code + */ +static const char fileName[] = "<fileName>"; + +/** \brief Return the name of the grammar file that generated this code. + */ +const char* <name>::getGrammarFileName() +{ + return fileName; +} + +<displayRecognitionError(name)> + +/** \brief Create a new lexer called <name> + * + * \param[in] instream Pointer to an initialized input stream + * \return + * - Success p<name> initialized for the lex start + * - Fail NULL + */ +<name>::<name>(StreamType* instream<grammar.delegators:{g|, <g.recognizerName>* <g:delegateName()>}>, google::protobuf::Arena*) +:<name>ImplTraits::BaseLexerType(ANTLR_SIZE_HINT, instream, NULL) +{ + // See if we can create a new lexer with the standard constructor + // + this->init(instream <grammar.delegators:{g|, <g:delegateName()>}>); +} + +/** \brief Create a new lexer called <name> + * + * \param[in] instream Pointer to an initialized input stream + * \param[state] state Previously created shared recognizer stat + * \return + * - Success p<name> initialized for the lex start + * - Fail NULL + */ +<name>::<name>(StreamType* instream, RecognizerSharedStateType* state<grammar.delegators:{g|, <g.recognizerName>* <g:delegateName()>}>) +:<name>ImplTraits::BaseLexerType(ANTLR_SIZE_HINT, instream, state) +{ + this->init(instream <grammar.delegators:{g|, <g:delegateName()>} >); +} + +void <name>::init(StreamType* instream<grammar.delegators:{g|, <g.recognizerName>* <g:delegateName()>} >) +{ + /* ------------------------------------------------------------------- + * Memory for basic structure is allocated, now to fill in + * in base ANTLR3 structures. We initialize the function pointers + * for the standard ANTLR3 lexer function set, but upon return + * from here, the programmer may set the pointers to provide custom + * implementations of each function. + * + * We don't use the macros defined in <name>.h here so you can get a sense + * of what goes where. + */ + +<if(memoize)> +<if(grammar.grammarIsRoot)> + // Create a LIST for recording rule memos. + // + this->setRuleMemo( new IntTrie(15) ); /* 16 bit depth is enough for 32768 rules! */ +<endif> +<endif> + +<if(grammar.directDelegates)> + // Initialize the lexers that we are going to delegate some + // functions to. + // + <grammar.directDelegates: + {g|m_<g:delegateName()> = new <g.recognizerName>(instream, this->get_lexstate(), this<grammar.delegators:{g|, <g:delegateName()>}>);}; separator="\n"> +<else> + Y_UNUSED(instream); +<endif> +<if(grammar.delegators)> + // Install the pointers back to lexers that will delegate us to perform certain functions + // for them. + // + <grammar.delegators: + {g| m_<g:delegateName()> = <g:delegateName()>;}; separator="\n"> +<endif> +} + +<if(cyclicDFAs)> + +/* ========================================================================= + * DFA tables for the lexer + */ +<cyclicDFAs:cyclicDFA()><! dump tables for all DFA !> +/* ========================================================================= + * End of DFA tables for the lexer + */ +<endif> + +/* ========================================================================= + * Functions to match the lexer grammar defined tokens from the input stream + */ + +<rules; separator="\n\n"> + +/* ========================================================================= + * Lexer matching rules end. + * ========================================================================= + */ +<if(synpreds)> + +/* ========================================================================= + * Lexer syntactic predicates + */ +<synpreds:{p | <lexerSynpred(predname=p)>}> +/* ========================================================================= + * Lexer syntactic predicates end. + * ========================================================================= + */ +<endif> + +/* End of Lexer code + * ================================================ + * ================================================ + */ + +<endNamespace(actions)> + +>> + + +filteringNextToken() ::= << +<name>ImplTraits::CommonTokenType* +<name>ImplTraits::TokenSourceType::nextToken() +{ + LexerType* lexer; + typename LexerType::RecognizerSharedStateType* state; + + lexer = this->get_super(); + state = lexer->get_lexstate(); + + /* Get rid of any previous token (token factory takes care of + * any deallocation when this token is finally used up. + */ + state->set_token_present( false ); + state->set_error( false ); /* Start out without an exception */ + state->set_failedflag(false); + + /* Record the start of the token in our input stream. + */ + state->set_tokenStartCharIndex( lexer->index(); + state->set_tokenStartCharPositionInLine( lexer->getCharPositionInLine() ); + state->set_tokenStartLine( lexer->getLine() ); + state->set_text(""); + + /* Now call the matching rules and see if we can generate a new token + */ + for (;;) + { + if (lexer->LA(1) == ANTLR_CHARSTREAM_EOF) + { + /* Reached the end of the stream, nothing more to do. + */ + CommonTokenType& teof = m_eofToken; + + teof.set_startIndex(lexer->getCharIndex()); + teof.set_stopIndex(lexer->getCharIndex()); + teof.setLine(lexer->getLine()); + return &teof; + } + + state->set_token_present(false); + state->set_error(false); /* Start out without an exception */ + + { + ANTLR_MARKER m; + + m = this->get_istream()->mark(); + state->set_backtracking(1); /* No exceptions */ + state->set_failedflag(false); + + /* Call the generated lexer, see if it can get a new token together. + */ + lexer->mTokens(); + state->set_backtracking(0); + + <! mTokens backtracks with synpred at BACKTRACKING==2 + and we set the synpredgate to allow actions at level 1. !> + + if(state->get_failed()) + { + lexer->rewind(m); + lexer->consume(); <! advance one char and try again !> + } + else + { + lexer->emit(); /* Assemble the token and emit it to the stream */ + TokenType& tok = state->get_token(); + return &tok; + } + } + } +} +>> + +actionGate() ::= "this->get_backtracking()==0" + +filteringActionGate() ::= "this->get_backtracking()==1" + +/** How to generate a parser */ +genericParser( grammar, name, scopes, tokens, tokenNames, rules, numRules, + bitsets, inputStreamType, superClass, + labelType, members, rewriteElementType, + filterMode, ASTLabelType="ImplTraits::TreeType*") ::= << + +using namespace antlr3; +<if(grammar.grammarIsRoot)> +/** \brief Table of all token names in symbolic order, mainly used for + * error reporting. + */ +ANTLR_UINT8* <name>TokenNames[<length(tokenNames)>+4] + = { + (ANTLR_UINT8*) "nothing", /* String to print to indicate an invalid token */ + (ANTLR_UINT8*) "\<EOR>", + (ANTLR_UINT8*) "\<DOWN>", + (ANTLR_UINT8*) "\<UP>", + <tokenNames:{it |(ANTLR_UINT8*) <it>}; separator=",\n"> + }; +<endif> + +/** \brief Name of the grammar file that generated this code + */ +static const char fileName[] = "<fileName>"; + +/** \brief Return the name of the grammar file that generated this code. + */ +const char* <name>::getGrammarFileName() +{ + return fileName; +} +/** \brief Create a new <name> parser and return a context for it. + * + * \param[in] instream Pointer to an input stream interface. + * + * \return Pointer to new parser context upon success. + */ +<name>::<name>( StreamType* instream<grammar.delegators:{g|, <g.recognizerName>* <g:delegateName()>}>, google::protobuf::Arena* arena) +<constructorInitializerType("NULL")> + , Arena(arena) +{ + // See if we can create a new parser with the standard constructor + // + this->init(instream<grammar.delegators:{g|, <g:delegateName()>}>); +} + +/** \brief Create a new <name> parser and return a context for it. + * + * \param[in] instream Pointer to an input stream interface. + * + * \return Pointer to new parser context upon success. + */ +<name>::<name>( StreamType* instream, RecognizerSharedStateType* state<grammar.delegators:{g|, <g.recognizerName>* <g:delegateName()>}>) +<constructorInitializerType("state")> +{ + this->init(instream <grammar.delegators:{g|, <g:delegateName()>}>); +} + +void <name>::init(StreamType* instream<grammar.delegators:{g|, <g.recognizerName>* <g:delegateName()>}>) +{ +<if(memoize)> +<if(grammar.grammarIsRoot)> + /* Create a LIST for recording rule memos. + */ + typedef RecognizerSharedStateType::RuleMemoType RuleMemoType; + this->setRuleMemo( new RuleMemoType(15) ); /* 16 bit depth is enough for 32768 rules! */<\n> +<endif> +<endif> +<if(grammar.directDelegates)> + // Initialize the lexers that we are going to delegate some + // functions to. + // + <grammar.directDelegates: + {g|m_<g:delegateName()> = new <g.recognizerName>(instream, this->get_psrstate(), this<grammar.delegators:{g|, <g:delegateName()>}>);}; separator="\n"> + <endif> + <if(grammar.delegators)> + // Install the pointers back to lexers that will delegate us to perform certain functions + // for them. + // + <grammar.delegators: {g| m_<g:delegateName()> = <g:delegateName()>;}; separator="\n"> + <endif> + /* Install the token table + */ + this->get_psrstate()->set_tokenNames( <grammar.composite.rootGrammar.recognizerName>TokenNames ); + + <@debugStuff()> + +} + +void +<name>::reset() +{ + this->get_rec()->reset(); +} + +/** Free the parser resources + */ +<name>::~<name>() + { + <@cleanup> + <@end> +<if(grammar.directDelegates)> + // Free the parsers that we delegated to + // functions to.NULL the state so we only free it once. + // + <grammar.directDelegates: + {g| m_<g:delegateName()>->set_psrstate( NULL ); + delete m_<g:delegateName()>;}; separator="\n"> +<endif> +<if(memoize)> +<if(grammar.grammarIsRoot)> + if(this->getRuleMemo() != NULL) + { + delete this->getRuleMemo(); + this->setRuleMemo(NULL); + } +<endif> +<endif> +} + +<if(PARSER)> +<name>::RuleReturnType <name>::Parse(const <recognizer.grammar.name>Lexer& lexer, NProtoAST::IErrorCollector* errors) { + <first(rules):{r | +Errors = errors; +RuleReturnType root(<name>::<r.ruleDescriptor.name>()); +if (!lexer.HasError() && !Error && root) { auto ast = google::protobuf::Arena::CreateMessage\<@ANTLR_PACKAGE_NAME@::T<name>AST>(Arena); - Y_ASSERT(dynamic_cast\<@ANTLR_PACKAGE_NAME@::TRule_<r.ruleDescriptor.name>* >(root)); - ast->unsafe_arena_set_allocated_rule_<r.ruleDescriptor.name>(static_cast\<@ANTLR_PACKAGE_NAME@::TRule_<r.ruleDescriptor.name>* >(root)); - return ast; -\} -return nullptr; -}; separator=""> -} - -<displayRecognitionError(name)> - -<endif> - -/** Return token names used by this <grammarType()> - * - * The returned pointer is used as an index into the token names table (using the token - * number as the index). - * - * \return Pointer to first char * in the table. - */ -<if(LEXER)> -static ANTLR_UINT8** getTokenNames() -{ - return <grammar.composite.rootGrammar.recognizerName>TokenNames; -} -<endif> - -/* Declare the bitsets - */ -<bitsets:{it | <bitsetDeclare(bitsetname={FOLLOW_<it.name>_in_<it.inName><it.tokenIndex>}, - words64=it.bits, traits={<name>ImplTraits} )>}> - - -<if(cyclicDFAs)> - -/* ========================================================================= - * DFA tables for the parser - */ -<cyclicDFAs:cyclicDFA()> <! dump tables for all DFA !> -/* ========================================================================= - * End of DFA tables for the parser - */ -<endif> - -/* ============================================== - * Parsing rules - */ -<rules; separator="\n\n"> -<if(grammar.delegatedRules)> - // Delegated methods that appear to be a part of this - // parser - // -<grammar.delegatedRules:{ruleDescriptor| - <returnType()> <name>::<ruleDescriptor.name>(<ruleDescriptor.parameterScope:parameterScope()>) - { - <if(ruleDescriptor.hasReturnValue)>return <endif>m_<ruleDescriptor.grammar:delegateName()>-><ruleDescriptor.name>(<if(ruleDescriptor.parameterScope)><ruleDescriptor.parameterScope.attributes:{a|<a.name>}; separator=", "><endif>); - \}}; separator="\n"> - -<endif> -/* End of parsing rules - * ============================================== - */ - -/* ============================================== - * Syntactic predicates - */ -<synpreds:{p | <synpred(predname=p)>}> -/* End of syntactic predicates - * ============================================== - */ - ->> - -displayRecognitionError(name) ::=<< -void <name>::displayRecognitionError(ANTLR_UINT8** tokenNames, ExceptionBaseType* ex) { - Error = true; - if (!Errors) { - return; - } - if (ex) { - TStringStream err; - err \<\< ex->get_message(); - -<if(PARSER)> - NProtoAST::InvalidToken(err, ex->get_token()); -<else> - if (ex->get_c() && ex->get_c() != ImplTraits::CommonTokenType::TOKEN_EOF) { - NProtoAST::InvalidCharacter(err, ex->get_input()->get_super()); - } -<endif> - - ImplTraits::StringStreamType errtext; - ex->displayRecognitionError(tokenNames, errtext); - err \<\< errtext.str(); - Errors->Error(ex->get_line(), ex->get_charPositionInLine(), err.Str()); - } else { - Errors->Error(0, 0, "Unknown error"); - } -} ->> - -constructorInitializerType(rec_state) ::=<< -<if(PARSER)> - : ImplTraits::BaseParserType(ANTLR_SIZE_HINT, instream, <rec_state>) -<endif> -<if(TREE_PARSER)> - : ImplTraits::BaseTreeParserType(ANTLR_SIZE_HINT, instream, <rec_state>) -<endif> ->> - -parser( grammar, - name, - scopes, - tokens, - tokenNames, - rules, - numRules, - bitsets, - ASTLabelType, - superClass="Parser", - labelType="ImplTraits::CommonTokenType*", - members={<actions.parser.members>} - ) ::= << -<beginNamespace(actions)> -<genericParser(inputStreamType="CommonTokenStreamType*", rewriteElementType="Token", filterMode=false, ...)> -<endNamespace(actions)> ->> - -/** How to generate a tree parser; same as parser except the input - * stream is a different type. - */ -treeParser( grammar, - name, - scopes, - tokens, - tokenNames, - globalAction, - rules, - numRules, - bitsets, - filterMode, - labelType={<ASTLabelType>}, - ASTLabelType="ImplTraits::TreeType*", - superClass="TreeParser", - members={<actions.treeparser.members>} - ) ::= << -<beginNamespace(actions)> -<genericParser(inputStreamType="CommonTreeNodeStream*", rewriteElementType="Node", ...)> -<endNamespace(actions)> ->> - -/** A simpler version of a rule template that is specific to the imaginary - * rules created for syntactic predicates. As they never have return values - * nor parameters etc..., just give simplest possible method. Don't do - * any of the normal memoization stuff in here either; it's a waste. - * As predicates cannot be inlined into the invoking rule, they need to - * be in a rule by themselves. - */ -synpredRule(ruleName, ruleDescriptor, block, description, nakedBlock) ::= -<< -// $ANTLR start <ruleName> -void <name>::m<ruleName>_fragment( <ruleDescriptor.parameterScope:parameterScope()> ) -{ - <ruleDeclarations()> - <ruleLabelDefs()> - <ruleLabelInitializations()> -<if(trace)> - ANTLR_PRINTF("enter <ruleName> %d failed = %d, backtracking = %d\\n", this->LT(1),failed,this->get_backtracking() ); - <block> - ANTLR_PRINTF("exit <ruleName> %d, failed = %d, backtracking = %d\\n", this->LT(1),failed,this->get_backtracking()); - -<else> - <block> -<endif> - -goto rule<ruleDescriptor.name>Ex; /* Prevent compiler warnings */ -rule<ruleDescriptor.name>Ex: ; -} -// $ANTLR end <ruleName> ->> - -synpred(predname) ::= << - -bool <name>::msynpred( antlr3::ClassForwarder\< <predname> > ) -{ - ANTLR_MARKER start; - bool success; - - this->inc_backtracking(); - <@start()> - start = this->mark(); - this->m<predname>_fragment(); // can never throw exception - success = !( this->get_failedflag() ); - this->rewind(start); - <@stop()> - this->dec_backtracking(); - this->set_failedflag(false); - return success; -}<\n> ->> - -lexerSynpred(predname) ::= << -<synpred(predname)> ->> - -ruleMemoization(rname) ::= << -<if(memoize)> -if ( (this->get_backtracking()>0) && (this->haveParsedRule(<ruleDescriptor.index>)) ) -{ - <finalCode(finalBlock=finally)> -<if(!ruleDescriptor.isSynPred)> - <scopeClean()><\n> -<endif> - return <ruleReturnValue()>; -} -<endif> ->> - -/** How to test for failure and return from rule */ -checkRuleBacktrackFailure() ::= << -if (this->hasException()) -{ - goto rule<ruleDescriptor.name>Ex; -} -<if(backtracking)> -if (this->hasFailed()) -{ - <scopeClean()> - <@debugClean()> - return <ruleReturnValue()>; -} -<endif> ->> - -/** This rule has failed, exit indicating failure during backtrack */ -ruleBacktrackFailure() ::= << -<if(backtracking)> -if (this->get_backtracking()>0) -{ - this->set_failedflag( true ); - <scopeClean()> - return <ruleReturnValue()>; -} -<endif> ->> - -/** How to generate code for a rule. This includes any return type - * data aggregates required for multiple return values. - */ -rule(ruleName,ruleDescriptor,block,emptyRule,description,exceptions,finally,memoize) ::= << -/** - * $ANTLR start <ruleName> - * <fileName>:<description> - */ -<name>::<returnType()> -<name>::<ruleName>(<ruleDescriptor.parameterScope:parameterScope()>) -{ - <if(trace)>ANTLR_PRINTF("enter <ruleName> %s failed=%d, backtracking=%d\n", this->LT(1), this->get_backtracking() );<endif> - <ruleDeclarations()> - <ruleLabelDefs()> - <ruleInitializations()> - <ruleMemoization(rname=ruleName)> - <ruleLabelInitializations()> - - <if(actions.(actionScope).rulecatch)> - try { - <else> - <if(exceptions)> - try { - <endif> - <endif> - <@preamble()> - { - <block> - } - <ruleCleanUp()> - -<if(exceptions)> - <exceptions:{e|<catch(decl=e.decl,action=e.action)><\n>}> -<else> - <if(!emptyRule)> - if (this->hasException()) - { - retval = nullptr; - this->preporterror(); - this->precover(); - <@setErrorReturnValue()> - } - <if(actions.(actionScope).rulecatch)> - } <actions.(actionScope).rulecatch> - <endif> - <endif> -<endif> - - <if(trace)>ANTLR_PRINTF("exit <ruleName> %d failed=%s backtracking=%s\n", this->LT(1), failed, this->get_backtracking() );<endif> - <memoize()> -<if(finally)> - <finalCode(finalBlock=finally)> -<endif> - <scopeClean()> - <@postamble()> - return <ruleReturnValue()>; -} -/* $ANTLR end <ruleName> */ ->> - -finalCode(finalBlock) ::= << -{ - <finalBlock> -} - ->> - -catch(decl,action) ::= << -/* catch(decl,action) - */ -}catch (<e.decl>) { - <e.action> -} ->> - -ruleDeclarations() ::= << -<if(PARSER)> -RuleReturnType res = nullptr; -<if(!ruleDescriptor.isSynPred)> + Y_ASSERT(dynamic_cast\<@ANTLR_PACKAGE_NAME@::TRule_<r.ruleDescriptor.name>* >(root)); + ast->unsafe_arena_set_allocated_rule_<r.ruleDescriptor.name>(static_cast\<@ANTLR_PACKAGE_NAME@::TRule_<r.ruleDescriptor.name>* >(root)); + return ast; +\} +return nullptr; +}; separator=""> +} + +<displayRecognitionError(name)> + +<endif> + +/** Return token names used by this <grammarType()> + * + * The returned pointer is used as an index into the token names table (using the token + * number as the index). + * + * \return Pointer to first char * in the table. + */ +<if(LEXER)> +static ANTLR_UINT8** getTokenNames() +{ + return <grammar.composite.rootGrammar.recognizerName>TokenNames; +} +<endif> + +/* Declare the bitsets + */ +<bitsets:{it | <bitsetDeclare(bitsetname={FOLLOW_<it.name>_in_<it.inName><it.tokenIndex>}, + words64=it.bits, traits={<name>ImplTraits} )>}> + + +<if(cyclicDFAs)> + +/* ========================================================================= + * DFA tables for the parser + */ +<cyclicDFAs:cyclicDFA()> <! dump tables for all DFA !> +/* ========================================================================= + * End of DFA tables for the parser + */ +<endif> + +/* ============================================== + * Parsing rules + */ +<rules; separator="\n\n"> +<if(grammar.delegatedRules)> + // Delegated methods that appear to be a part of this + // parser + // +<grammar.delegatedRules:{ruleDescriptor| + <returnType()> <name>::<ruleDescriptor.name>(<ruleDescriptor.parameterScope:parameterScope()>) + { + <if(ruleDescriptor.hasReturnValue)>return <endif>m_<ruleDescriptor.grammar:delegateName()>-><ruleDescriptor.name>(<if(ruleDescriptor.parameterScope)><ruleDescriptor.parameterScope.attributes:{a|<a.name>}; separator=", "><endif>); + \}}; separator="\n"> + +<endif> +/* End of parsing rules + * ============================================== + */ + +/* ============================================== + * Syntactic predicates + */ +<synpreds:{p | <synpred(predname=p)>}> +/* End of syntactic predicates + * ============================================== + */ + +>> + +displayRecognitionError(name) ::=<< +void <name>::displayRecognitionError(ANTLR_UINT8** tokenNames, ExceptionBaseType* ex) { + Error = true; + if (!Errors) { + return; + } + if (ex) { + TStringStream err; + err \<\< ex->get_message(); + +<if(PARSER)> + NProtoAST::InvalidToken(err, ex->get_token()); +<else> + if (ex->get_c() && ex->get_c() != ImplTraits::CommonTokenType::TOKEN_EOF) { + NProtoAST::InvalidCharacter(err, ex->get_input()->get_super()); + } +<endif> + + ImplTraits::StringStreamType errtext; + ex->displayRecognitionError(tokenNames, errtext); + err \<\< errtext.str(); + Errors->Error(ex->get_line(), ex->get_charPositionInLine(), err.Str()); + } else { + Errors->Error(0, 0, "Unknown error"); + } +} +>> + +constructorInitializerType(rec_state) ::=<< +<if(PARSER)> + : ImplTraits::BaseParserType(ANTLR_SIZE_HINT, instream, <rec_state>) +<endif> +<if(TREE_PARSER)> + : ImplTraits::BaseTreeParserType(ANTLR_SIZE_HINT, instream, <rec_state>) +<endif> +>> + +parser( grammar, + name, + scopes, + tokens, + tokenNames, + rules, + numRules, + bitsets, + ASTLabelType, + superClass="Parser", + labelType="ImplTraits::CommonTokenType*", + members={<actions.parser.members>} + ) ::= << +<beginNamespace(actions)> +<genericParser(inputStreamType="CommonTokenStreamType*", rewriteElementType="Token", filterMode=false, ...)> +<endNamespace(actions)> +>> + +/** How to generate a tree parser; same as parser except the input + * stream is a different type. + */ +treeParser( grammar, + name, + scopes, + tokens, + tokenNames, + globalAction, + rules, + numRules, + bitsets, + filterMode, + labelType={<ASTLabelType>}, + ASTLabelType="ImplTraits::TreeType*", + superClass="TreeParser", + members={<actions.treeparser.members>} + ) ::= << +<beginNamespace(actions)> +<genericParser(inputStreamType="CommonTreeNodeStream*", rewriteElementType="Node", ...)> +<endNamespace(actions)> +>> + +/** A simpler version of a rule template that is specific to the imaginary + * rules created for syntactic predicates. As they never have return values + * nor parameters etc..., just give simplest possible method. Don't do + * any of the normal memoization stuff in here either; it's a waste. + * As predicates cannot be inlined into the invoking rule, they need to + * be in a rule by themselves. + */ +synpredRule(ruleName, ruleDescriptor, block, description, nakedBlock) ::= +<< +// $ANTLR start <ruleName> +void <name>::m<ruleName>_fragment( <ruleDescriptor.parameterScope:parameterScope()> ) +{ + <ruleDeclarations()> + <ruleLabelDefs()> + <ruleLabelInitializations()> +<if(trace)> + ANTLR_PRINTF("enter <ruleName> %d failed = %d, backtracking = %d\\n", this->LT(1),failed,this->get_backtracking() ); + <block> + ANTLR_PRINTF("exit <ruleName> %d, failed = %d, backtracking = %d\\n", this->LT(1),failed,this->get_backtracking()); + +<else> + <block> +<endif> + +goto rule<ruleDescriptor.name>Ex; /* Prevent compiler warnings */ +rule<ruleDescriptor.name>Ex: ; +} +// $ANTLR end <ruleName> +>> + +synpred(predname) ::= << + +bool <name>::msynpred( antlr3::ClassForwarder\< <predname> > ) +{ + ANTLR_MARKER start; + bool success; + + this->inc_backtracking(); + <@start()> + start = this->mark(); + this->m<predname>_fragment(); // can never throw exception + success = !( this->get_failedflag() ); + this->rewind(start); + <@stop()> + this->dec_backtracking(); + this->set_failedflag(false); + return success; +}<\n> +>> + +lexerSynpred(predname) ::= << +<synpred(predname)> +>> + +ruleMemoization(rname) ::= << +<if(memoize)> +if ( (this->get_backtracking()>0) && (this->haveParsedRule(<ruleDescriptor.index>)) ) +{ + <finalCode(finalBlock=finally)> +<if(!ruleDescriptor.isSynPred)> + <scopeClean()><\n> +<endif> + return <ruleReturnValue()>; +} +<endif> +>> + +/** How to test for failure and return from rule */ +checkRuleBacktrackFailure() ::= << +if (this->hasException()) +{ + goto rule<ruleDescriptor.name>Ex; +} +<if(backtracking)> +if (this->hasFailed()) +{ + <scopeClean()> + <@debugClean()> + return <ruleReturnValue()>; +} +<endif> +>> + +/** This rule has failed, exit indicating failure during backtrack */ +ruleBacktrackFailure() ::= << +<if(backtracking)> +if (this->get_backtracking()>0) +{ + this->set_failedflag( true ); + <scopeClean()> + return <ruleReturnValue()>; +} +<endif> +>> + +/** How to generate code for a rule. This includes any return type + * data aggregates required for multiple return values. + */ +rule(ruleName,ruleDescriptor,block,emptyRule,description,exceptions,finally,memoize) ::= << +/** + * $ANTLR start <ruleName> + * <fileName>:<description> + */ +<name>::<returnType()> +<name>::<ruleName>(<ruleDescriptor.parameterScope:parameterScope()>) +{ + <if(trace)>ANTLR_PRINTF("enter <ruleName> %s failed=%d, backtracking=%d\n", this->LT(1), this->get_backtracking() );<endif> + <ruleDeclarations()> + <ruleLabelDefs()> + <ruleInitializations()> + <ruleMemoization(rname=ruleName)> + <ruleLabelInitializations()> + + <if(actions.(actionScope).rulecatch)> + try { + <else> + <if(exceptions)> + try { + <endif> + <endif> + <@preamble()> + { + <block> + } + <ruleCleanUp()> + +<if(exceptions)> + <exceptions:{e|<catch(decl=e.decl,action=e.action)><\n>}> +<else> + <if(!emptyRule)> + if (this->hasException()) + { + retval = nullptr; + this->preporterror(); + this->precover(); + <@setErrorReturnValue()> + } + <if(actions.(actionScope).rulecatch)> + } <actions.(actionScope).rulecatch> + <endif> + <endif> +<endif> + + <if(trace)>ANTLR_PRINTF("exit <ruleName> %d failed=%s backtracking=%s\n", this->LT(1), failed, this->get_backtracking() );<endif> + <memoize()> +<if(finally)> + <finalCode(finalBlock=finally)> +<endif> + <scopeClean()> + <@postamble()> + return <ruleReturnValue()>; +} +/* $ANTLR end <ruleName> */ +>> + +finalCode(finalBlock) ::= << +{ + <finalBlock> +} + +>> + +catch(decl,action) ::= << +/* catch(decl,action) + */ +}catch (<e.decl>) { + <e.action> +} +>> + +ruleDeclarations() ::= << +<if(PARSER)> +RuleReturnType res = nullptr; +<if(!ruleDescriptor.isSynPred)> @ANTLR_PACKAGE_NAME@::TRule_<ruleName>* retval = google::protobuf::Arena::CreateMessage\<@ANTLR_PACKAGE_NAME@::TRule_<ruleName>\>(Arena); -<endif> -<endif> -<if(memoize)> -ANTLR_MARKER <ruleDescriptor.name>_StartIndex; -<endif> ->> - -ruleInitializations() ::= << -/* Initialize rule variables - */ -<if(memoize)> -<ruleDescriptor.name>_StartIndex = this->index();<\n> -<endif> -<ruleDescriptor.useScopes:{it | m_<it>_stack.push(<it>Scope()); }; separator="\n"> -<ruleDescriptor.ruleScope:{it | m_<it.name>_stack.push(<it.name>Scope()); }; separator="\n"> ->> - -ruleLabelDefs() ::= << -<[ruleDescriptor.tokenLabels,ruleDescriptor.tokenListLabels, - ruleDescriptor.wildcardTreeLabels,ruleDescriptor.wildcardTreeListLabels] - :{it |<labelType> <it.label.text> = NULL;}; separator="\n" -> -<[ruleDescriptor.tokenListLabels,ruleDescriptor.ruleListLabels,ruleDescriptor.wildcardTreeListLabels] - :{it |ImplTraits::TokenPtrsListType list_<it.label.text>;}; separator="\n" -> -<ruleDescriptor.ruleLabels:ruleLabelDef(); separator="\n"> ->> - -ruleLabelInitializations() ::= << ->> - -lexerRuleLabelDefs() ::= << -<[ruleDescriptor.tokenLabels, - ruleDescriptor.tokenListLabels, - ruleDescriptor.ruleLabels] - :{it |<labelType> <it.label.text> = NULL;}; separator="\n" -> -<ruleDescriptor.charLabels:{it |ANTLR_UINT32 <it.label.text>;}; separator="\n"> -<[ruleDescriptor.tokenListLabels, - ruleDescriptor.ruleListLabels, - ruleDescriptor.ruleListLabels] - :{it | ImplTraits::IntTrieType<CommonTokenType>* list_<it.label.text>;}; separator="\n" -> ->> - -lexerRuleLabelInit() ::= << -<[ruleDescriptor.tokenListLabels, - ruleDescriptor.ruleListLabels, - ruleDescriptor.ruleListLabels] - :{it |list_<it.label.text> = new ImplTraits::IntTrieType<CommonTokenType>(31);}; separator="\n" -> ->> - -lexerRuleLabelFree() ::= << -<[ruleDescriptor.tokenLabels, - ruleDescriptor.tokenListLabels, - ruleDescriptor.ruleLabels] - :{it |<it.label.text> = NULL;}; separator="\n" -> -<[ruleDescriptor.tokenListLabels, - ruleDescriptor.ruleListLabels, - ruleDescriptor.ruleListLabels] - :{it | delete list_<it.label.text>;}; separator="\n" -> ->> - -ruleReturnValue() ::= <% -<if(!ruleDescriptor.isSynPred)> -retval -<endif> -%> - -memoize() ::= << -<if(memoize)> -<if(backtracking)> -if ( this->get_backtracking() > 0 ) { this->memoize(<ruleDescriptor.index>, <ruleDescriptor.name>_StartIndex); } -<endif> -<endif> ->> - -ruleCleanUp() ::= << - -// This is where rules clean up and exit -// -goto rule<ruleDescriptor.name>Ex; /* Prevent compiler warnings */ -rule<ruleDescriptor.name>Ex: ; ->> - -scopeClean() ::= << -<ruleDescriptor.useScopes:{it | m_<it>_stack.pop(); }; separator="\n"> -<ruleDescriptor.ruleScope:{it | m_<it.name>_stack.pop(); }; separator="\n"> - ->> -/** How to generate a rule in the lexer; naked blocks are used for - * fragment rules, which do not produce tokens. - */ -lexerRule(ruleName,nakedBlock,ruleDescriptor,block,memoize) ::= << -// Comes from: <block.description> -/** \brief Lexer rule generated by ANTLR3 - * - * $ANTLR start <ruleName> - * - * Looks to match the characters the constitute the token <ruleName> - * from the attached input stream. - * - * - * \remark - * - lexer->error == true if an exception was thrown. - */ -void <name>::m<ruleName>(<ruleDescriptor.parameterScope:parameterScope()>) -{ - ANTLR_UINT32 _type; - <ruleDeclarations()> - <lexerRuleLabelDefs()> - <if(trace)> - Cout \<\< "enter <ruleName> '" \<\< (char)this->LA(1) - \<\< "' line=" \<\< this->getLine() \<\< ":" \<\< this->getCharPositionInLine() - \<\< " failed=" \<\< this->get_failedflag() \<\< " backtracking=" \<\< this->get_backtracking() \<\< Endl; - <endif> - -<if(nakedBlock)> - <ruleMemoization(rname=ruleName)> - <lexerRuleLabelInit()> - - <block><\n> -<else> - <ruleMemoization(rname=ruleName)> - <lexerRuleLabelInit()> - _type = TOKEN_<ruleName>; - - <block> - // TODO: FIX +<endif> +<endif> +<if(memoize)> +ANTLR_MARKER <ruleDescriptor.name>_StartIndex; +<endif> +>> + +ruleInitializations() ::= << +/* Initialize rule variables + */ +<if(memoize)> +<ruleDescriptor.name>_StartIndex = this->index();<\n> +<endif> +<ruleDescriptor.useScopes:{it | m_<it>_stack.push(<it>Scope()); }; separator="\n"> +<ruleDescriptor.ruleScope:{it | m_<it.name>_stack.push(<it.name>Scope()); }; separator="\n"> +>> + +ruleLabelDefs() ::= << +<[ruleDescriptor.tokenLabels,ruleDescriptor.tokenListLabels, + ruleDescriptor.wildcardTreeLabels,ruleDescriptor.wildcardTreeListLabels] + :{it |<labelType> <it.label.text> = NULL;}; separator="\n" +> +<[ruleDescriptor.tokenListLabels,ruleDescriptor.ruleListLabels,ruleDescriptor.wildcardTreeListLabels] + :{it |ImplTraits::TokenPtrsListType list_<it.label.text>;}; separator="\n" +> +<ruleDescriptor.ruleLabels:ruleLabelDef(); separator="\n"> +>> + +ruleLabelInitializations() ::= << +>> + +lexerRuleLabelDefs() ::= << +<[ruleDescriptor.tokenLabels, + ruleDescriptor.tokenListLabels, + ruleDescriptor.ruleLabels] + :{it |<labelType> <it.label.text> = NULL;}; separator="\n" +> +<ruleDescriptor.charLabels:{it |ANTLR_UINT32 <it.label.text>;}; separator="\n"> +<[ruleDescriptor.tokenListLabels, + ruleDescriptor.ruleListLabels, + ruleDescriptor.ruleListLabels] + :{it | ImplTraits::IntTrieType<CommonTokenType>* list_<it.label.text>;}; separator="\n" +> +>> + +lexerRuleLabelInit() ::= << +<[ruleDescriptor.tokenListLabels, + ruleDescriptor.ruleListLabels, + ruleDescriptor.ruleListLabels] + :{it |list_<it.label.text> = new ImplTraits::IntTrieType<CommonTokenType>(31);}; separator="\n" +> +>> + +lexerRuleLabelFree() ::= << +<[ruleDescriptor.tokenLabels, + ruleDescriptor.tokenListLabels, + ruleDescriptor.ruleLabels] + :{it |<it.label.text> = NULL;}; separator="\n" +> +<[ruleDescriptor.tokenListLabels, + ruleDescriptor.ruleListLabels, + ruleDescriptor.ruleListLabels] + :{it | delete list_<it.label.text>;}; separator="\n" +> +>> + +ruleReturnValue() ::= <% +<if(!ruleDescriptor.isSynPred)> +retval +<endif> +%> + +memoize() ::= << +<if(memoize)> +<if(backtracking)> +if ( this->get_backtracking() > 0 ) { this->memoize(<ruleDescriptor.index>, <ruleDescriptor.name>_StartIndex); } +<endif> +<endif> +>> + +ruleCleanUp() ::= << + +// This is where rules clean up and exit +// +goto rule<ruleDescriptor.name>Ex; /* Prevent compiler warnings */ +rule<ruleDescriptor.name>Ex: ; +>> + +scopeClean() ::= << +<ruleDescriptor.useScopes:{it | m_<it>_stack.pop(); }; separator="\n"> +<ruleDescriptor.ruleScope:{it | m_<it.name>_stack.pop(); }; separator="\n"> + +>> +/** How to generate a rule in the lexer; naked blocks are used for + * fragment rules, which do not produce tokens. + */ +lexerRule(ruleName,nakedBlock,ruleDescriptor,block,memoize) ::= << +// Comes from: <block.description> +/** \brief Lexer rule generated by ANTLR3 + * + * $ANTLR start <ruleName> + * + * Looks to match the characters the constitute the token <ruleName> + * from the attached input stream. + * + * + * \remark + * - lexer->error == true if an exception was thrown. + */ +void <name>::m<ruleName>(<ruleDescriptor.parameterScope:parameterScope()>) +{ + ANTLR_UINT32 _type; + <ruleDeclarations()> + <lexerRuleLabelDefs()> + <if(trace)> + Cout \<\< "enter <ruleName> '" \<\< (char)this->LA(1) + \<\< "' line=" \<\< this->getLine() \<\< ":" \<\< this->getCharPositionInLine() + \<\< " failed=" \<\< this->get_failedflag() \<\< " backtracking=" \<\< this->get_backtracking() \<\< Endl; + <endif> + +<if(nakedBlock)> + <ruleMemoization(rname=ruleName)> + <lexerRuleLabelInit()> + + <block><\n> +<else> + <ruleMemoization(rname=ruleName)> + <lexerRuleLabelInit()> + _type = TOKEN_<ruleName>; + + <block> + // TODO: FIX if (IsHiddenToken(_type)) { - this->get_lexstate()->set_channel(HIDDEN); - } - this->get_lexstate()->set_type(_type); -<endif> - <if(trace)> - Cout \<\< "exit <ruleName> '" \<\< (char)this->LA(1) - \<\< "' line=" \<\< this->getLine() \<\< ":" \<\< this->getCharPositionInLine() - \<\< " failed=" \<\< this->get_failedflag() \<\< " backtracking=" \<\< this->get_backtracking() \<\< Endl; - <endif> - <ruleCleanUp()> - <lexerRuleLabelFree()> - <memoize> -} -// $ANTLR end <ruleName> ->> - -/** How to generate code for the implicitly-defined lexer grammar rule - * that chooses between lexer rules. - */ -tokensRule(ruleName,nakedBlock,args,block,ruleDescriptor) ::= << -/** This is the entry point in to the lexer from an object that - * wants to generate the next token, such as a pCOMMON_TOKEN_STREAM - */ -void -<name>::mTokens() -{ - <block><\n> - - goto ruleTokensEx; /* Prevent compiler warnings */ -ruleTokensEx: ; -} ->> - -// S U B R U L E S - -/** A (...) subrule with multiple alternatives */ -block(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << - -// <fileName>:<description> -{ - int alt<decisionNumber>=<maxAlt>; - <decls> - <@predecision()> - <decision> - <@postdecision()> - <@prebranch()> - switch (alt<decisionNumber>) - { - <alts:{a | <blockAltSwitchCase(i,a,rest(alts))>}> - } - <@postbranch()> -} ->> - -/** A rule block with multiple alternatives */ -ruleBlock(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << -{ - // <fileName>:<description> - - ANTLR_UINT32 alt<decisionNumber>; - alt<decisionNumber>=<maxAlt>; - <decls> - <@predecision()> - <decision> - <@postdecision()> - switch (alt<decisionNumber>) - { - <alts:{a | <ruleAltSwitchCase(i,a,rest(alts))>}> - } -} ->> - -ruleBlockSingleAlt(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,description) ::= << -// <fileName>:<description> -<if(PARSER)> -<if(!ruleDescriptor.isSynPred)> -auto parent = retval; -auto parentBlock0 = parent; -<endif> -<endif> -<decls> -<@prealt()> -<alts> -<if(PARSER)> -<if(!ruleDescriptor.isSynPred)> -if (!parent) { - retval = nullptr; -} -<endif> -<endif> -<@postalt()> ->> - -/** A special case of a (...) subrule with a single alternative */ -blockSingleAlt(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,description) ::= << -// <fileName>:<description> -<if(PARSER)> -auto parentBlock<blockLevel> = parentBlock<enclosingBlockLevel>->MutableBlock<elemId>(); -auto parent = parentBlock<blockLevel>; -<endif> -<decls> -<@prealt()> -<alts> -<if(PARSER)> -if (!parent) { - parentBlock<enclosingBlockLevel>->ClearBlock<elemId>(); -} -<endif> -<@postalt()> ->> - -/** A (..)+ block with 1 or more alternatives */ -positiveClosureBlock(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << -// <fileName>:<description> -{ - int cnt<decisionNumber>=0; - <decls> - <@preloop()> - - for (;;) { - int alt<decisionNumber>=<maxAlt>; - <@predecision()> - <decision> - <@postdecision()> - switch (alt<decisionNumber>) { - <alts:{a | <closureBlockAltSwitchCase(i,a,rest(alts))>}> - default: - if ( cnt<decisionNumber> >= 1 ) { - goto loop<decisionNumber>; - } - <ruleBacktrackFailure()> - <earlyExitEx()> - <@earlyExitException()> - goto rule<ruleDescriptor.name>Ex; - } - cnt<decisionNumber>++; - } - loop<decisionNumber>: ; /* Jump to here if this rule does not match */ - <if(PARSER)> + this->get_lexstate()->set_channel(HIDDEN); + } + this->get_lexstate()->set_type(_type); +<endif> + <if(trace)> + Cout \<\< "exit <ruleName> '" \<\< (char)this->LA(1) + \<\< "' line=" \<\< this->getLine() \<\< ":" \<\< this->getCharPositionInLine() + \<\< " failed=" \<\< this->get_failedflag() \<\< " backtracking=" \<\< this->get_backtracking() \<\< Endl; + <endif> + <ruleCleanUp()> + <lexerRuleLabelFree()> + <memoize> +} +// $ANTLR end <ruleName> +>> + +/** How to generate code for the implicitly-defined lexer grammar rule + * that chooses between lexer rules. + */ +tokensRule(ruleName,nakedBlock,args,block,ruleDescriptor) ::= << +/** This is the entry point in to the lexer from an object that + * wants to generate the next token, such as a pCOMMON_TOKEN_STREAM + */ +void +<name>::mTokens() +{ + <block><\n> + + goto ruleTokensEx; /* Prevent compiler warnings */ +ruleTokensEx: ; +} +>> + +// S U B R U L E S + +/** A (...) subrule with multiple alternatives */ +block(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << + +// <fileName>:<description> +{ + int alt<decisionNumber>=<maxAlt>; + <decls> + <@predecision()> + <decision> + <@postdecision()> + <@prebranch()> + switch (alt<decisionNumber>) + { + <alts:{a | <blockAltSwitchCase(i,a,rest(alts))>}> + } + <@postbranch()> +} +>> + +/** A rule block with multiple alternatives */ +ruleBlock(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << +{ + // <fileName>:<description> + + ANTLR_UINT32 alt<decisionNumber>; + alt<decisionNumber>=<maxAlt>; + <decls> + <@predecision()> + <decision> + <@postdecision()> + switch (alt<decisionNumber>) + { + <alts:{a | <ruleAltSwitchCase(i,a,rest(alts))>}> + } +} +>> + +ruleBlockSingleAlt(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,description) ::= << +// <fileName>:<description> +<if(PARSER)> +<if(!ruleDescriptor.isSynPred)> +auto parent = retval; +auto parentBlock0 = parent; +<endif> +<endif> +<decls> +<@prealt()> +<alts> +<if(PARSER)> +<if(!ruleDescriptor.isSynPred)> +if (!parent) { + retval = nullptr; +} +<endif> +<endif> +<@postalt()> +>> + +/** A special case of a (...) subrule with a single alternative */ +blockSingleAlt(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,description) ::= << +// <fileName>:<description> +<if(PARSER)> +auto parentBlock<blockLevel> = parentBlock<enclosingBlockLevel>->MutableBlock<elemId>(); +auto parent = parentBlock<blockLevel>; +<endif> +<decls> +<@prealt()> +<alts> +<if(PARSER)> +if (!parent) { + parentBlock<enclosingBlockLevel>->ClearBlock<elemId>(); +} +<endif> +<@postalt()> +>> + +/** A (..)+ block with 1 or more alternatives */ +positiveClosureBlock(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << +// <fileName>:<description> +{ + int cnt<decisionNumber>=0; + <decls> + <@preloop()> + + for (;;) { + int alt<decisionNumber>=<maxAlt>; + <@predecision()> + <decision> + <@postdecision()> + switch (alt<decisionNumber>) { + <alts:{a | <closureBlockAltSwitchCase(i,a,rest(alts))>}> + default: + if ( cnt<decisionNumber> >= 1 ) { + goto loop<decisionNumber>; + } + <ruleBacktrackFailure()> + <earlyExitEx()> + <@earlyExitException()> + goto rule<ruleDescriptor.name>Ex; + } + cnt<decisionNumber>++; + } + loop<decisionNumber>: ; /* Jump to here if this rule does not match */ + <if(PARSER)> Y_VERIFY(parentBlock<enclosingBlockLevel>, "parentBlock is null"); - if (!parentBlock<enclosingBlockLevel>->Block<elemId>Size()) { - <ruleBacktrackFailure()> - <earlyExitEx()> - goto rule<ruleDescriptor.name>Ex; - } - <endif> - <@postloop()> -} ->> - -earlyExitEx() ::= << -/* mismatchedSetEx() - */ -new ANTLR_Exception\< <name>ImplTraits, EARLY_EXIT_EXCEPTION, StreamType>( this->get_rec(), "" ); -<\n> ->> -positiveClosureBlockSingleAlt ::= positiveClosureBlock - -/** A (..)* block with 1 or more alternatives */ -closureBlock(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << - -// <fileName>:<description> -<decls> - -<@preloop()> -for (;;) { - int alt<decisionNumber>=<maxAlt>; - <@predecision()> - <decision> - <@postdecision()> - switch (alt<decisionNumber>) { - <alts:{a | <closureBlockAltSwitchCase(i,a,rest(alts))>}> - default: - goto loop<decisionNumber>; /* break out of the loop */ - break; - } -} -loop<decisionNumber>: ; /* Jump out to here if this rule does not match */ -<@postloop()> ->> - -closureBlockSingleAlt ::= closureBlock - -/** Optional blocks (x)? are translated to (x|) by antlr before code generation - * so we can just use the normal block template - */ -optionalBlock ::= block - -optionalBlockSingleAlt ::= block - -/** A case in a switch that jumps to an alternative given the alternative - * number. A DFA predicts the alternative and then a simple switch - * does the jump to the code that actually matches that alternative. - */ -ruleAltSwitchCase(altNum,alt,multiple) ::= << -case <altNum>: { - <if(PARSER)> - auto parent = retval->MutableAlt_<ruleName><altNum>(); - auto parentBlock0 = parent; - <endif> - <@prealt()> - <alt> - <if(PARSER)> - if (!parent || !retval->Alt_case()) { - retval = nullptr; - } - <endif> - break;<\n> -} ->> - -blockAltSwitchCase(altNum,alt,multiple) ::= << -case <altNum>: { - <if(PARSER)> - auto parentBlock<blockLevel> = parentBlock<enclosingBlockLevel>->MutableBlock<elemId>()<if(multiple)>->MutableAlt<altNum>()<endif>; - auto parent = parentBlock<blockLevel>; - <endif> - <@prealt()> - <alt> - <if(PARSER)> - if (!parent) { - parentBlock<enclosingBlockLevel>->ClearBlock<elemId>(); - } - <endif> - break;<\n> -} ->> - -closureBlockAltSwitchCase(altNum,alt,multiple) ::= << -case <altNum>: { - <if(PARSER)> - auto parentBlock<blockLevel> = parentBlock<enclosingBlockLevel>->AddBlock<elemId>()<if(multiple)>->MutableAlt<altNum>()<endif>; - auto parent = parentBlock<blockLevel>; - <endif> - <@prealt()> - <alt> - <if(PARSER)> - if (!parent) { - parentBlock<enclosingBlockLevel>->MutableBlock<elemId>()->RemoveLast(); - } - <endif> - break;<\n> -} ->> - -/** An alternative is just a list of elements; at outermost level */ -alt(elements,altNum,description,autoAST,outerAlt,treeLevel,rew) ::= << -// <fileName>:<description> -{ - <@declarations()> - <@initializations()> - <if(PARSER)> - <if(!ruleDescriptor.isSynPred)> + if (!parentBlock<enclosingBlockLevel>->Block<elemId>Size()) { + <ruleBacktrackFailure()> + <earlyExitEx()> + goto rule<ruleDescriptor.name>Ex; + } + <endif> + <@postloop()> +} +>> + +earlyExitEx() ::= << +/* mismatchedSetEx() + */ +new ANTLR_Exception\< <name>ImplTraits, EARLY_EXIT_EXCEPTION, StreamType>( this->get_rec(), "" ); +<\n> +>> +positiveClosureBlockSingleAlt ::= positiveClosureBlock + +/** A (..)* block with 1 or more alternatives */ +closureBlock(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << + +// <fileName>:<description> +<decls> + +<@preloop()> +for (;;) { + int alt<decisionNumber>=<maxAlt>; + <@predecision()> + <decision> + <@postdecision()> + switch (alt<decisionNumber>) { + <alts:{a | <closureBlockAltSwitchCase(i,a,rest(alts))>}> + default: + goto loop<decisionNumber>; /* break out of the loop */ + break; + } +} +loop<decisionNumber>: ; /* Jump out to here if this rule does not match */ +<@postloop()> +>> + +closureBlockSingleAlt ::= closureBlock + +/** Optional blocks (x)? are translated to (x|) by antlr before code generation + * so we can just use the normal block template + */ +optionalBlock ::= block + +optionalBlockSingleAlt ::= block + +/** A case in a switch that jumps to an alternative given the alternative + * number. A DFA predicts the alternative and then a simple switch + * does the jump to the code that actually matches that alternative. + */ +ruleAltSwitchCase(altNum,alt,multiple) ::= << +case <altNum>: { + <if(PARSER)> + auto parent = retval->MutableAlt_<ruleName><altNum>(); + auto parentBlock0 = parent; + <endif> + <@prealt()> + <alt> + <if(PARSER)> + if (!parent || !retval->Alt_case()) { + retval = nullptr; + } + <endif> + break;<\n> +} +>> + +blockAltSwitchCase(altNum,alt,multiple) ::= << +case <altNum>: { + <if(PARSER)> + auto parentBlock<blockLevel> = parentBlock<enclosingBlockLevel>->MutableBlock<elemId>()<if(multiple)>->MutableAlt<altNum>()<endif>; + auto parent = parentBlock<blockLevel>; + <endif> + <@prealt()> + <alt> + <if(PARSER)> + if (!parent) { + parentBlock<enclosingBlockLevel>->ClearBlock<elemId>(); + } + <endif> + break;<\n> +} +>> + +closureBlockAltSwitchCase(altNum,alt,multiple) ::= << +case <altNum>: { + <if(PARSER)> + auto parentBlock<blockLevel> = parentBlock<enclosingBlockLevel>->AddBlock<elemId>()<if(multiple)>->MutableAlt<altNum>()<endif>; + auto parent = parentBlock<blockLevel>; + <endif> + <@prealt()> + <alt> + <if(PARSER)> + if (!parent) { + parentBlock<enclosingBlockLevel>->MutableBlock<elemId>()->RemoveLast(); + } + <endif> + break;<\n> +} +>> + +/** An alternative is just a list of elements; at outermost level */ +alt(elements,altNum,description,autoAST,outerAlt,treeLevel,rew) ::= << +// <fileName>:<description> +{ + <@declarations()> + <@initializations()> + <if(PARSER)> + <if(!ruleDescriptor.isSynPred)> Y_VERIFY(parent, "Parent is null"); - parent->SetDescr("<description>"); - <endif> - <endif> - <elements:{e | <element(e, i)>}; separator="\n"> - <rew> - <@cleanup()> -} ->> - -// E L E M E N T S -/** What to emit when there is no rewrite. For auto build - * mode, does nothing. - */ -noRewrite(rewriteBlockLevel, treeLevel) ::= "" - -/** Dump the elements one per line */ -element(e,elemId) ::= << -<@prematch()> -<e.el><\n> ->> - -/** match a token optionally with a label in front */ -tokenRef(token,label,elementIndex,terminalOptions) ::= << -{ - auto token = matchToken(TOKEN_<token>, &FOLLOW_<token>_in_<ruleName><elementIndex>); - if (token) { - <if(!ruleDescriptor.isSynPred)> + parent->SetDescr("<description>"); + <endif> + <endif> + <elements:{e | <element(e, i)>}; separator="\n"> + <rew> + <@cleanup()> +} +>> + +// E L E M E N T S +/** What to emit when there is no rewrite. For auto build + * mode, does nothing. + */ +noRewrite(rewriteBlockLevel, treeLevel) ::= "" + +/** Dump the elements one per line */ +element(e,elemId) ::= << +<@prematch()> +<e.el><\n> +>> + +/** match a token optionally with a label in front */ +tokenRef(token,label,elementIndex,terminalOptions) ::= << +{ + auto token = matchToken(TOKEN_<token>, &FOLLOW_<token>_in_<ruleName><elementIndex>); + if (token) { + <if(!ruleDescriptor.isSynPred)> Y_VERIFY(parent, "Parent is null"); - parent->MutableToken<elemId>()->SetId(token->get_type()); - parent->MutableToken<elemId>()->SetLine(token->get_line()); - parent->MutableToken<elemId>()->SetColumn(token->get_charPositionInLine()); - parent->MutableToken<elemId>()->SetElement(<elementIndex>); - parent->MutableToken<elemId>()->SetValue(token->getText().c_str()); - <endif> - } -} -<checkRuleBacktrackFailure()> ->> - -/** ids+=ID */ -tokenRefAndListLabel(token,label,elementIndex,terminalOptions) ::= << -<tokenRef(...)> -<listLabel(elem=label,...)> ->> - -listLabel(label,elem) ::= << - list_<label>.push_back(<elem>); ->> - - -/** match a character */ -charRef(char,label) ::= << -<if(label)> -<label> = this->LA(1);<\n> -<endif> - this->matchc(<char>); -<checkRuleBacktrackFailure()> ->> - -/** match a character range */ -charRangeRef(a,b,label) ::= << -<if(label)> -<label> = this->LA(1);<\n> -<endif> -this->matchRange(<a>, <b>); -<checkRuleBacktrackFailure()> ->> - -/** For now, sets are interval tests and must be tested inline */ -matchSet(s,label,elementIndex,terminalOptions,postmatchCode="") ::= << -{ - <if(label)> - <if(LEXER)> - <label>= this->LA(1);<\n> - <endif> - <endif> - <if(PARSER)> - auto token = this->LT(1); - <endif> - if ( <s> ) - { - this->consume(); - <if(PARSER)> - if (token) { - <if(!ruleDescriptor.isSynPred)> + parent->MutableToken<elemId>()->SetId(token->get_type()); + parent->MutableToken<elemId>()->SetLine(token->get_line()); + parent->MutableToken<elemId>()->SetColumn(token->get_charPositionInLine()); + parent->MutableToken<elemId>()->SetElement(<elementIndex>); + parent->MutableToken<elemId>()->SetValue(token->getText().c_str()); + <endif> + } +} +<checkRuleBacktrackFailure()> +>> + +/** ids+=ID */ +tokenRefAndListLabel(token,label,elementIndex,terminalOptions) ::= << +<tokenRef(...)> +<listLabel(elem=label,...)> +>> + +listLabel(label,elem) ::= << + list_<label>.push_back(<elem>); +>> + + +/** match a character */ +charRef(char,label) ::= << +<if(label)> +<label> = this->LA(1);<\n> +<endif> + this->matchc(<char>); +<checkRuleBacktrackFailure()> +>> + +/** match a character range */ +charRangeRef(a,b,label) ::= << +<if(label)> +<label> = this->LA(1);<\n> +<endif> +this->matchRange(<a>, <b>); +<checkRuleBacktrackFailure()> +>> + +/** For now, sets are interval tests and must be tested inline */ +matchSet(s,label,elementIndex,terminalOptions,postmatchCode="") ::= << +{ + <if(label)> + <if(LEXER)> + <label>= this->LA(1);<\n> + <endif> + <endif> + <if(PARSER)> + auto token = this->LT(1); + <endif> + if ( <s> ) + { + this->consume(); + <if(PARSER)> + if (token) { + <if(!ruleDescriptor.isSynPred)> Y_VERIFY(parent, "Parent is null"); - parent->MutableToken<elemId>()->SetId(token->get_type()); - parent->MutableToken<elemId>()->SetLine(token->get_line()); - parent->MutableToken<elemId>()->SetColumn(token->get_charPositionInLine()); - parent->MutableToken<elemId>()->SetElement(<elementIndex>); - parent->MutableToken<elemId>()->SetValue(token->getText().c_str()); - <endif> - } - <endif> - <postmatchCode> - <if(!LEXER)> - this->set_perror_recovery(false); - <endif> - <if(backtracking)> this->set_failedflag(false); <\n><endif> - } - else - { - <ruleBacktrackFailure()> - <mismatchedSetEx()> - <@mismatchedSetException()> - <if(LEXER)> - this->recover(); - <else> - <! use following code to make it recover inline; - this->recoverFromMismatchedSet(&FOLLOW_set_in_<ruleName><elementIndex>); - !> - <endif> - goto rule<ruleDescriptor.name>Ex; - }<\n> -} ->> - -mismatchedSetEx() ::= << -new ANTLR_Exception\< <name>ImplTraits, MISMATCHED_SET_EXCEPTION, StreamType>( this->get_rec(), "" ); -<if(PARSER)> -this->get_exception()->set_expectingSet(NULL); -<! use following code to make it recover inline; -this->get_exception()->set_expectingSet( &FOLLOW_set_in_<ruleName><elementIndex> ); -!> -<endif> ->> - -matchRuleBlockSet ::= matchSet - -matchSetAndListLabel(s,label,elementIndex,postmatchCode) ::= << -<matchSet(...)> -<listLabel(elem=label,...)> ->> - -/** Match a string literal */ -lexerStringRef(string,label,elementIndex) ::= << -<if(label)> -ANTLR_MARKER <label>Start = this->getCharIndex(); -ANTLR_UINT32 <label>StartLine<elementIndex> = this->getLine(); -ANTLR_UINT32 <label>StartCharPos<elementIndex> = this->getCharPositionInLine(); -this->matchs(<string>); -<checkRuleBacktrackFailure()> -<label> = new CommonTokenType; -<label>->set_type( CommonTokenType::TOKEN_INVALID ); -<label>->set_startIndex( <label>Start); -<label>->set_stopIndex( this->getCharIndex()-1); -<label>->set_input( this->get_input() ); -<label>->set_line( <label>StartLine<elementIndex> ); -<label>->set_charPositionInLine( <label>StartCharPos<elementIndex> ); -<else> -this->matchs(<string>); -<checkRuleBacktrackFailure()><\n> -<endif> ->> - -wildcard(token,label,elementIndex,terminalOptions) ::= << -<if(label)> -<label>=(<labelType>)this->LT(1);<\n> -<endif> -this->matchAnyToken(); -<checkRuleBacktrackFailure()> ->> - -wildcardAndListLabel(token,label,elementIndex,terminalOptions) ::= << -<wildcard(...)> -<listLabel(elem=label,...)> ->> - -/** Match . wildcard in lexer */ -wildcardChar(label, elementIndex) ::= << -<if(label)> -<label> = this->LA(1);<\n> -<endif> -this->matchAny(); -<checkRuleBacktrackFailure()> ->> - -wildcardCharListLabel(label, elementIndex) ::= << -<wildcardChar(...)> -<listLabel(elem=label,...)> ->> - -/** Match a rule reference by invoking it possibly with arguments - * and a return value or values. The 'rule' argument was the - * target rule name, but now is type Rule, whose toString is - * same: the rule name. Now though you can access full rule - * descriptor stuff. - */ -ruleRef(rule,label,elementIndex,args,scope) ::= << -this->followPush(FOLLOW_<rule.name>_in_<ruleName><elementIndex>); -res = <rule.name>(<if(args)><args; separator=", "><endif>); -if (res) { - <ruleAssert()> - <ruleResult()> -} -this->followPop(); -<checkRuleBacktrackFailure()> ->> - -ruleAssert() ::= << -<if(!ruleDescriptor.isSynPred)> -Y_ASSERT(dynamic_cast\<@ANTLR_PACKAGE_NAME@::TRule_<rule.name>* >(res)); -<endif> ->> - -ruleResult() ::= << -<if(!ruleDescriptor.isSynPred)> + parent->MutableToken<elemId>()->SetId(token->get_type()); + parent->MutableToken<elemId>()->SetLine(token->get_line()); + parent->MutableToken<elemId>()->SetColumn(token->get_charPositionInLine()); + parent->MutableToken<elemId>()->SetElement(<elementIndex>); + parent->MutableToken<elemId>()->SetValue(token->getText().c_str()); + <endif> + } + <endif> + <postmatchCode> + <if(!LEXER)> + this->set_perror_recovery(false); + <endif> + <if(backtracking)> this->set_failedflag(false); <\n><endif> + } + else + { + <ruleBacktrackFailure()> + <mismatchedSetEx()> + <@mismatchedSetException()> + <if(LEXER)> + this->recover(); + <else> + <! use following code to make it recover inline; + this->recoverFromMismatchedSet(&FOLLOW_set_in_<ruleName><elementIndex>); + !> + <endif> + goto rule<ruleDescriptor.name>Ex; + }<\n> +} +>> + +mismatchedSetEx() ::= << +new ANTLR_Exception\< <name>ImplTraits, MISMATCHED_SET_EXCEPTION, StreamType>( this->get_rec(), "" ); +<if(PARSER)> +this->get_exception()->set_expectingSet(NULL); +<! use following code to make it recover inline; +this->get_exception()->set_expectingSet( &FOLLOW_set_in_<ruleName><elementIndex> ); +!> +<endif> +>> + +matchRuleBlockSet ::= matchSet + +matchSetAndListLabel(s,label,elementIndex,postmatchCode) ::= << +<matchSet(...)> +<listLabel(elem=label,...)> +>> + +/** Match a string literal */ +lexerStringRef(string,label,elementIndex) ::= << +<if(label)> +ANTLR_MARKER <label>Start = this->getCharIndex(); +ANTLR_UINT32 <label>StartLine<elementIndex> = this->getLine(); +ANTLR_UINT32 <label>StartCharPos<elementIndex> = this->getCharPositionInLine(); +this->matchs(<string>); +<checkRuleBacktrackFailure()> +<label> = new CommonTokenType; +<label>->set_type( CommonTokenType::TOKEN_INVALID ); +<label>->set_startIndex( <label>Start); +<label>->set_stopIndex( this->getCharIndex()-1); +<label>->set_input( this->get_input() ); +<label>->set_line( <label>StartLine<elementIndex> ); +<label>->set_charPositionInLine( <label>StartCharPos<elementIndex> ); +<else> +this->matchs(<string>); +<checkRuleBacktrackFailure()><\n> +<endif> +>> + +wildcard(token,label,elementIndex,terminalOptions) ::= << +<if(label)> +<label>=(<labelType>)this->LT(1);<\n> +<endif> +this->matchAnyToken(); +<checkRuleBacktrackFailure()> +>> + +wildcardAndListLabel(token,label,elementIndex,terminalOptions) ::= << +<wildcard(...)> +<listLabel(elem=label,...)> +>> + +/** Match . wildcard in lexer */ +wildcardChar(label, elementIndex) ::= << +<if(label)> +<label> = this->LA(1);<\n> +<endif> +this->matchAny(); +<checkRuleBacktrackFailure()> +>> + +wildcardCharListLabel(label, elementIndex) ::= << +<wildcardChar(...)> +<listLabel(elem=label,...)> +>> + +/** Match a rule reference by invoking it possibly with arguments + * and a return value or values. The 'rule' argument was the + * target rule name, but now is type Rule, whose toString is + * same: the rule name. Now though you can access full rule + * descriptor stuff. + */ +ruleRef(rule,label,elementIndex,args,scope) ::= << +this->followPush(FOLLOW_<rule.name>_in_<ruleName><elementIndex>); +res = <rule.name>(<if(args)><args; separator=", "><endif>); +if (res) { + <ruleAssert()> + <ruleResult()> +} +this->followPop(); +<checkRuleBacktrackFailure()> +>> + +ruleAssert() ::= << +<if(!ruleDescriptor.isSynPred)> +Y_ASSERT(dynamic_cast\<@ANTLR_PACKAGE_NAME@::TRule_<rule.name>* >(res)); +<endif> +>> + +ruleResult() ::= << +<if(!ruleDescriptor.isSynPred)> Y_VERIFY(parent, "Parent is null"); -parent->unsafe_arena_set_allocated_rule_<rule.name><elemId>(static_cast\<@ANTLR_PACKAGE_NAME@::TRule_<rule.name>* >(res)); -<endif> ->> - -/** ids+=r */ -ruleRefAndListLabel(rule,label,elementIndex,args,scope) ::= << -<ruleRef(...)> -<listLabel(elem=label,...)> ->> - -/** A lexer rule reference - * The 'rule' argument was the target rule name, but now - * is type Rule, whose toString is same: the rule name. - * Now though you can access full rule descriptor stuff. - */ -lexerRuleRef(rule,label,args,elementIndex,scope) ::= << -/* <description> */ -<if(label)> -{ - ANTLR_MARKER <label>Start<elementIndex> = this->getCharIndex(); - ANTLR_UINT32 <label>StartLine<elementIndex> = this->getLine(); - ANTLR_UINT32 <label>StartCharPos<elementIndex> = this->getCharPositionInLine(); - <if(scope)>m_<scope:delegateName()>-><endif>m<rule.name>(<if(scope)>m_<scope:delegateName()><endif> <if(args)>, <endif><args; separator=", ">); - <checkRuleBacktrackFailure()> - <label> = new CommonTokenType(); - <label>->set_type( CommonTokenType::TOKEN_INVALID); - <label>->set_startIndex( <label>Start<elementIndex> ); - <label>->set_stopIndex( this->getCharIndex()-1 ); - <label>->set_input( this->get_input() ); - <label>->set_line( <label>StartLine<elementIndex> ); - <label>->set_charPositionInLine( <label>StartCharPos<elementIndex> ); -} -<else> -<if(scope)>m_<scope:delegateName()>-><endif>m<rule.name>(<args; separator=", ">); -<checkRuleBacktrackFailure()> -<endif> ->> - -/** i+=INT in lexer */ -lexerRuleRefAndListLabel(rule,label,args,elementIndex,scope) ::= << -<lexerRuleRef(...)> -<listLabel(elem=label,...)> ->> - -/** EOF in the lexer */ -lexerMatchEOF(label,elementIndex) ::= << -<if(label)> -{ - ANTLR_UINT32 <label>Start<elementIndex>; - ANTLR_UINT32 <label>StartLine<elementIndex> = this->getLine(); - ANTLR_UINT32 <label>StartCharPos<elementIndex> = this->getCharPositionInLine(); - <labelType> <label>; - <label>Start<elementIndex> = this->getCharIndex(); - this->matchc(ANTLR_CHARSTREAM_EOF); - <checkRuleBacktrackFailure()> - <label> = new CommonTokenType(); - <label>->set_type( CommonTokenType::TOKEN_EOF ); - <label>->set_startIndex(<label>Start<elementIndex>); - <label>->set_stopIndex(this->getCharIndex()-1); - <label>->set_input( this->get_input() ); - <label>->set_line( <label>StartLine<elementIndex> ); - <label>->set_charPositionInLine( <label>StartCharPos<elementIndex> ); -} -<else> - this->matchc(ANTLR_CHARSTREAM_EOF); - <checkRuleBacktrackFailure()> - <endif> ->> - -// used for left-recursive rules -recRuleDefArg() ::= "int <recRuleArg()>" -recRuleArg() ::= "_p" -recRuleAltPredicate(ruleName,opPrec) ::= "<recRuleArg()> \<= <opPrec>" -recRuleSetResultAction() ::= "root_0=$<ruleName>_primary.tree;" -recRuleSetReturnAction(src,name) ::= "$<name>=$<src>.<name>;" - -/** match ^(root children) in tree parser */ -tree(root, actionsAfterRoot, children, nullableChildList, enclosingTreeLevel, treeLevel) ::= << -<root:element()> -<actionsAfterRoot:element()> -<if(nullableChildList)> -if ( this->LA(1)== CommonTokenType::TOKEN_DOWN ) { - this->matchToken(CommonTokenType::TOKEN_DOWN, NULL); - <checkRuleBacktrackFailure()> - <children:element()> - this->matchToken(CommonTokenType::TOKEN_UP, NULL); - <checkRuleBacktrackFailure()> -} -<else> -this->matchToken(CommonTokenType::TOKEN_DOWN, NULL); -<checkRuleBacktrackFailure()> -<children:element()> -this->matchToken(CommonTokenType::TOKEN_UP, NULL); -<checkRuleBacktrackFailure()> -<endif> ->> - -/** Every predicate is used as a validating predicate (even when it is - * also hoisted into a prediction expression). - */ -validateSemanticPredicate(pred,description) ::= << -if ( !(<evalPredicate(...)>) ) -{ - <ruleBacktrackFailure()> - <newFPE(...)> -} ->> - -newFPE() ::= << - ExceptionBaseType* ex = new ANTLR_Exception\< <name>ImplTraits, FAILED_PREDICATE_EXCEPTION, StreamType>( this->get_rec(), "<description>" ); - ex->set_ruleName( "<ruleName>" ); - <\n> ->> - -// F i x e d D F A (if-then-else) - -dfaState(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= << - -{ - int LA<decisionNumber>_<stateNumber> = this->LA(<k>); - <edges; separator="\nelse "> - else - { -<if(eotPredictsAlt)> - alt<decisionNumber>=<eotPredictsAlt>; -<else> - <ruleBacktrackFailure()> - - <newNVException()> - goto rule<ruleDescriptor.name>Ex; - -<endif> - } -} ->> - -newNVException() ::= << -ExceptionBaseType* ex = new ANTLR_Exception\< <name>ImplTraits, NO_VIABLE_ALT_EXCEPTION, StreamType>( this->get_rec(), "<description>" ); -ex->set_decisionNum( <decisionNumber> ); -ex->set_state( <stateNumber> ); -<@noViableAltException()> -<\n> ->> - -/** Same as a normal DFA state except that we don't examine lookahead - * for the bypass alternative. It delays error detection but this - * is faster, smaller, and more what people expect. For (X)? people - * expect "if ( LA(1)==X ) match(X);" and that's it. - */ -dfaOptionalBlockState(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= << -{ - int LA<decisionNumber>_<stateNumber> = this->LA(<k>); - <edges; separator="\nelse "> -} ->> - -/** A DFA state that is actually the loopback decision of a closure - * loop. If end-of-token (EOT) predicts any of the targets then it - * should act like a default clause (i.e., no error can be generated). - * This is used only in the lexer so that for ('a')* on the end of a rule - * anything other than 'a' predicts exiting. - */ - -dfaLoopbackStateDecls()::= << -ANTLR_UINT32 LA<decisionNumber>_<stateNumber>; ->> -dfaLoopbackState(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= << -{ - /* dfaLoopbackState(k,edges,eotPredictsAlt,description,stateNumber,semPredState) - */ - int LA<decisionNumber>_<stateNumber> = this->LA(<k>); - <edges; separator="\nelse "><\n> - <if(eotPredictsAlt)> - <if(!edges)> - alt<decisionNumber>=<eotPredictsAlt>; <! if no edges, don't gen ELSE !> - <else> - else - { - alt<decisionNumber>=<eotPredictsAlt>; - }<\n> - <endif> - <endif> -} ->> - -/** An accept state indicates a unique alternative has been predicted */ -dfaAcceptState(alt) ::= "alt<decisionNumber>=<alt>;" - -/** A simple edge with an expression. If the expression is satisfied, - * enter to the target state. To handle gated productions, we may - * have to evaluate some predicates for this edge. - */ -dfaEdge(labelExpr, targetState, predicates) ::= << -if ( (<labelExpr>)<if(predicates)> && (<predicates>)<endif>) -{ - <targetState> -} ->> - -// F i x e d D F A (switch case) - -/** A DFA state where a SWITCH may be generated. The code generator - * decides if this is possible: CodeGenerator.canGenerateSwitch(). - */ -dfaStateSwitch(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= << -switch ( this->LA(<k>) ) -{ -<edges; separator="\n"> - -default: -<if(eotPredictsAlt)> - alt<decisionNumber>=<eotPredictsAlt>; -<else> - <ruleBacktrackFailure()> - <newNVException()> - goto rule<ruleDescriptor.name>Ex;<\n> -<endif> -}<\n> ->> - -dfaOptionalBlockStateSwitch(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= << -switch ( this->LA(<k>) ) -{ - <edges; separator="\n"> -}<\n> ->> - -dfaLoopbackStateSwitch(k, edges,eotPredictsAlt,description,stateNumber,semPredState) ::= << -switch ( this->LA(<k>) ) -{ -<edges; separator="\n"><\n> -<if(eotPredictsAlt)> -default: - alt<decisionNumber>=<eotPredictsAlt>; - break;<\n> -<endif> -}<\n> ->> - -dfaEdgeSwitch(labels, targetState) ::= << -<labels:{it |case <if(PARSER)>TOKEN_<endif><it>:}; separator="\n"> - { - <targetState> - } - break; ->> - -// C y c l i c D F A - -/** The code to initiate execution of a cyclic DFA; this is used - * in the rule to predict an alt just like the fixed DFA case. - * The <name> attribute is inherited via the parser, lexer, ... - */ -dfaDecision(decisionNumber,description) ::= << -alt<decisionNumber> = cdfa<decisionNumber>.predict(this, this->get_rec(), this->get_istream(), cdfa<decisionNumber> ); -<checkRuleBacktrackFailure()> ->> - -/* Dump DFA tables as static initialized arrays of shorts(16 bits)/characters(8 bits) - * which are then used to statically initialize the dfa structure, which means that there - * is no runtime initialization whatsoever, other than anything the C compiler might - * need to generate. In general the C compiler will lay out memory such that there is no - * runtime code required. - */ -cyclicDFA(dfa) ::= << -/** Static dfa state tables for Cyclic dfa: - * <dfa.description> - */ -static const ANTLR_INT32 dfa<dfa.decisionNumber>_eot[<dfa.numberOfStates>] = - { - <dfa.eot; wrap="\n", separator=",", null="-1"> - }; -static const ANTLR_INT32 dfa<dfa.decisionNumber>_eof[<dfa.numberOfStates>] = - { - <dfa.eof; wrap="\n", separator=",", null="-1"> - }; -static const ANTLR_INT32 dfa<dfa.decisionNumber>_min[<dfa.numberOfStates>] = - { - <dfa.min; wrap="\n", separator=",", null="-1"> - }; -static const ANTLR_INT32 dfa<dfa.decisionNumber>_max[<dfa.numberOfStates>] = - { - <dfa.max; wrap="\n", separator=",", null="-1"> - }; -static const ANTLR_INT32 dfa<dfa.decisionNumber>_accept[<dfa.numberOfStates>] = - { - <dfa.accept; wrap="\n", separator=",", null="-1"> - }; -static const ANTLR_INT32 dfa<dfa.decisionNumber>_special[<dfa.numberOfStates>] = - { - <dfa.special; wrap="\n", separator=",", null="-1"> - }; - -/** Used when there is no transition table entry for a particular state */ -static const ANTLR_INT32* dfa<dfa.decisionNumber>_T_empty = NULL; - -<dfa.edgeTransitionClassMap.keys:{ table | -static const ANTLR_INT32 dfa<dfa.decisionNumber>_T<i0>[] = - { - <table; separator=",", wrap="\n", null="-1"> - \};<\n>}; null = ""> - -/* Transition tables are a table of sub tables, with some tables - * reused for efficiency. - */ -static const ANTLR_INT32 * const dfa<dfa.decisionNumber>_transitions[] = -{ - <dfa.transitionEdgeTables:{xref|dfa<dfa.decisionNumber>_T<xref>}; separator=",", wrap="\n", null="NULL"> -}; - -<@errorMethod()> - -/* Declare tracking structure for Cyclic DFA <dfa.decisionNumber> - */ -class <name>CyclicDFA<dfa.decisionNumber> : public CyclicDFA\< <name>ImplTraits, <name> >, public <name>Tokens -{ -public: - typedef CyclicDFA\< <name>ImplTraits, <name> > BaseType; - typedef BaseType::ContextType CtxType; - -private: -<if(dfa.specialStateSTs)> - //to maintain C-Target compatibility, we need to make some of ctx functions look like member funcs - CtxType* m_ctx; -<endif> - -public: - <name>CyclicDFA<dfa.decisionNumber>( ANTLR_INT32 decisionNumber - , const ANTLR_UCHAR* description - , const ANTLR_INT32* const eot - , const ANTLR_INT32* const eof - , const ANTLR_INT32* const min - , const ANTLR_INT32* const max - , const ANTLR_INT32* const accept - , const ANTLR_INT32* const special - , const ANTLR_INT32* const *const transition) - :BaseType( decisionNumber, description, eot, eof, min, max, accept, - special, transition ) - { - <if(dfa.specialStateSTs)> - m_ctx = NULL; - <endif> - } - - <if(dfa.specialStateSTs)> - ANTLR_UINT32 LA(ANTLR_INT32 i) - { - return m_ctx->LA(i); - } - - <if(PARSER)> - const CtxType::CommonTokenType* LT(ANTLR_INT32 k) - { - return m_ctx->LT(k); - } - <endif> - <if(synpreds)> - template\<typename PredType> - bool msynpred( PredType pred ) - { - return m_ctx->msynpred(pred); - } - <endif> - - ANTLR_INT32 specialStateTransition(CtxType * ctx, RecognizerType* recognizer, IntStreamType* is, ANTLR_INT32 s) - { - ANTLR_INT32 _s; - - m_ctx = ctx; - _s = s; - switch (s) - { - <dfa.specialStateSTs:{state | - case <i0>: - - <state>}; separator="\n"> - } - <if(backtracking)> - if ( ctx->get_backtracking() > 0) - { - ctx->set_failedflag( true ); - return -1; - } - <endif> - ExceptionBaseType* ex = new ANTLR_Exception\< <name>ImplTraits, NO_VIABLE_ALT_EXCEPTION, StreamType>( recognizer, "<dfa.description>" ); - ex->set_decisionNum( <dfa.decisionNumber> ); - ex->set_state(_s); - <@noViableAltException()> - return -1; - } - <endif> -}; - -static <name>CyclicDFA<dfa.decisionNumber> cdfa<dfa.decisionNumber>( - <dfa.decisionNumber>, /* Decision number of this dfa */ - /* Which decision this represents: */ - (const ANTLR_UCHAR*)"<dfa.description>", - dfa<dfa.decisionNumber>_eot, /* EOT table */ - dfa<dfa.decisionNumber>_eof, /* EOF table */ - dfa<dfa.decisionNumber>_min, /* Minimum tokens for each state */ - dfa<dfa.decisionNumber>_max, /* Maximum tokens for each state */ - dfa<dfa.decisionNumber>_accept, /* Accept table */ - dfa<dfa.decisionNumber>_special, /* Special transition states */ - dfa<dfa.decisionNumber>_transitions /* Table of transition tables */ - - ); - - -/* End of Cyclic DFA <dfa.decisionNumber> - * --------------------- - */ ->> - -/** A state in a cyclic DFA; it's a special state and part of a big switch on - * state. - */ -cyclicDFAState(decisionNumber,stateNumber,edges,needErrorClause,semPredState) ::= << -{ - ANTLR_UINT32 LA<decisionNumber>_<stateNumber>;<\n> - ANTLR_MARKER index<decisionNumber>_<stateNumber>;<\n> - - LA<decisionNumber>_<stateNumber> = ctx->LA(1);<\n> - <if(semPredState)> <! get next lookahead symbol to test edges, then rewind !> - index<decisionNumber>_<stateNumber> = ctx->index();<\n> - ctx->rewindLast();<\n> - <endif> - s = -1; +parent->unsafe_arena_set_allocated_rule_<rule.name><elemId>(static_cast\<@ANTLR_PACKAGE_NAME@::TRule_<rule.name>* >(res)); +<endif> +>> + +/** ids+=r */ +ruleRefAndListLabel(rule,label,elementIndex,args,scope) ::= << +<ruleRef(...)> +<listLabel(elem=label,...)> +>> + +/** A lexer rule reference + * The 'rule' argument was the target rule name, but now + * is type Rule, whose toString is same: the rule name. + * Now though you can access full rule descriptor stuff. + */ +lexerRuleRef(rule,label,args,elementIndex,scope) ::= << +/* <description> */ +<if(label)> +{ + ANTLR_MARKER <label>Start<elementIndex> = this->getCharIndex(); + ANTLR_UINT32 <label>StartLine<elementIndex> = this->getLine(); + ANTLR_UINT32 <label>StartCharPos<elementIndex> = this->getCharPositionInLine(); + <if(scope)>m_<scope:delegateName()>-><endif>m<rule.name>(<if(scope)>m_<scope:delegateName()><endif> <if(args)>, <endif><args; separator=", ">); + <checkRuleBacktrackFailure()> + <label> = new CommonTokenType(); + <label>->set_type( CommonTokenType::TOKEN_INVALID); + <label>->set_startIndex( <label>Start<elementIndex> ); + <label>->set_stopIndex( this->getCharIndex()-1 ); + <label>->set_input( this->get_input() ); + <label>->set_line( <label>StartLine<elementIndex> ); + <label>->set_charPositionInLine( <label>StartCharPos<elementIndex> ); +} +<else> +<if(scope)>m_<scope:delegateName()>-><endif>m<rule.name>(<args; separator=", ">); +<checkRuleBacktrackFailure()> +<endif> +>> + +/** i+=INT in lexer */ +lexerRuleRefAndListLabel(rule,label,args,elementIndex,scope) ::= << +<lexerRuleRef(...)> +<listLabel(elem=label,...)> +>> + +/** EOF in the lexer */ +lexerMatchEOF(label,elementIndex) ::= << +<if(label)> +{ + ANTLR_UINT32 <label>Start<elementIndex>; + ANTLR_UINT32 <label>StartLine<elementIndex> = this->getLine(); + ANTLR_UINT32 <label>StartCharPos<elementIndex> = this->getCharPositionInLine(); + <labelType> <label>; + <label>Start<elementIndex> = this->getCharIndex(); + this->matchc(ANTLR_CHARSTREAM_EOF); + <checkRuleBacktrackFailure()> + <label> = new CommonTokenType(); + <label>->set_type( CommonTokenType::TOKEN_EOF ); + <label>->set_startIndex(<label>Start<elementIndex>); + <label>->set_stopIndex(this->getCharIndex()-1); + <label>->set_input( this->get_input() ); + <label>->set_line( <label>StartLine<elementIndex> ); + <label>->set_charPositionInLine( <label>StartCharPos<elementIndex> ); +} +<else> + this->matchc(ANTLR_CHARSTREAM_EOF); + <checkRuleBacktrackFailure()> + <endif> +>> + +// used for left-recursive rules +recRuleDefArg() ::= "int <recRuleArg()>" +recRuleArg() ::= "_p" +recRuleAltPredicate(ruleName,opPrec) ::= "<recRuleArg()> \<= <opPrec>" +recRuleSetResultAction() ::= "root_0=$<ruleName>_primary.tree;" +recRuleSetReturnAction(src,name) ::= "$<name>=$<src>.<name>;" + +/** match ^(root children) in tree parser */ +tree(root, actionsAfterRoot, children, nullableChildList, enclosingTreeLevel, treeLevel) ::= << +<root:element()> +<actionsAfterRoot:element()> +<if(nullableChildList)> +if ( this->LA(1)== CommonTokenType::TOKEN_DOWN ) { + this->matchToken(CommonTokenType::TOKEN_DOWN, NULL); + <checkRuleBacktrackFailure()> + <children:element()> + this->matchToken(CommonTokenType::TOKEN_UP, NULL); + <checkRuleBacktrackFailure()> +} +<else> +this->matchToken(CommonTokenType::TOKEN_DOWN, NULL); +<checkRuleBacktrackFailure()> +<children:element()> +this->matchToken(CommonTokenType::TOKEN_UP, NULL); +<checkRuleBacktrackFailure()> +<endif> +>> + +/** Every predicate is used as a validating predicate (even when it is + * also hoisted into a prediction expression). + */ +validateSemanticPredicate(pred,description) ::= << +if ( !(<evalPredicate(...)>) ) +{ + <ruleBacktrackFailure()> + <newFPE(...)> +} +>> + +newFPE() ::= << + ExceptionBaseType* ex = new ANTLR_Exception\< <name>ImplTraits, FAILED_PREDICATE_EXCEPTION, StreamType>( this->get_rec(), "<description>" ); + ex->set_ruleName( "<ruleName>" ); + <\n> +>> + +// F i x e d D F A (if-then-else) + +dfaState(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= << + +{ + int LA<decisionNumber>_<stateNumber> = this->LA(<k>); + <edges; separator="\nelse "> + else + { +<if(eotPredictsAlt)> + alt<decisionNumber>=<eotPredictsAlt>; +<else> + <ruleBacktrackFailure()> + + <newNVException()> + goto rule<ruleDescriptor.name>Ex; + +<endif> + } +} +>> + +newNVException() ::= << +ExceptionBaseType* ex = new ANTLR_Exception\< <name>ImplTraits, NO_VIABLE_ALT_EXCEPTION, StreamType>( this->get_rec(), "<description>" ); +ex->set_decisionNum( <decisionNumber> ); +ex->set_state( <stateNumber> ); +<@noViableAltException()> +<\n> +>> + +/** Same as a normal DFA state except that we don't examine lookahead + * for the bypass alternative. It delays error detection but this + * is faster, smaller, and more what people expect. For (X)? people + * expect "if ( LA(1)==X ) match(X);" and that's it. + */ +dfaOptionalBlockState(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= << +{ + int LA<decisionNumber>_<stateNumber> = this->LA(<k>); + <edges; separator="\nelse "> +} +>> + +/** A DFA state that is actually the loopback decision of a closure + * loop. If end-of-token (EOT) predicts any of the targets then it + * should act like a default clause (i.e., no error can be generated). + * This is used only in the lexer so that for ('a')* on the end of a rule + * anything other than 'a' predicts exiting. + */ + +dfaLoopbackStateDecls()::= << +ANTLR_UINT32 LA<decisionNumber>_<stateNumber>; +>> +dfaLoopbackState(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= << +{ + /* dfaLoopbackState(k,edges,eotPredictsAlt,description,stateNumber,semPredState) + */ + int LA<decisionNumber>_<stateNumber> = this->LA(<k>); + <edges; separator="\nelse "><\n> + <if(eotPredictsAlt)> + <if(!edges)> + alt<decisionNumber>=<eotPredictsAlt>; <! if no edges, don't gen ELSE !> + <else> + else + { + alt<decisionNumber>=<eotPredictsAlt>; + }<\n> + <endif> + <endif> +} +>> + +/** An accept state indicates a unique alternative has been predicted */ +dfaAcceptState(alt) ::= "alt<decisionNumber>=<alt>;" + +/** A simple edge with an expression. If the expression is satisfied, + * enter to the target state. To handle gated productions, we may + * have to evaluate some predicates for this edge. + */ +dfaEdge(labelExpr, targetState, predicates) ::= << +if ( (<labelExpr>)<if(predicates)> && (<predicates>)<endif>) +{ + <targetState> +} +>> + +// F i x e d D F A (switch case) + +/** A DFA state where a SWITCH may be generated. The code generator + * decides if this is possible: CodeGenerator.canGenerateSwitch(). + */ +dfaStateSwitch(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= << +switch ( this->LA(<k>) ) +{ +<edges; separator="\n"> + +default: +<if(eotPredictsAlt)> + alt<decisionNumber>=<eotPredictsAlt>; +<else> + <ruleBacktrackFailure()> + <newNVException()> + goto rule<ruleDescriptor.name>Ex;<\n> +<endif> +}<\n> +>> + +dfaOptionalBlockStateSwitch(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= << +switch ( this->LA(<k>) ) +{ + <edges; separator="\n"> +}<\n> +>> + +dfaLoopbackStateSwitch(k, edges,eotPredictsAlt,description,stateNumber,semPredState) ::= << +switch ( this->LA(<k>) ) +{ +<edges; separator="\n"><\n> +<if(eotPredictsAlt)> +default: + alt<decisionNumber>=<eotPredictsAlt>; + break;<\n> +<endif> +}<\n> +>> + +dfaEdgeSwitch(labels, targetState) ::= << +<labels:{it |case <if(PARSER)>TOKEN_<endif><it>:}; separator="\n"> + { + <targetState> + } + break; +>> + +// C y c l i c D F A + +/** The code to initiate execution of a cyclic DFA; this is used + * in the rule to predict an alt just like the fixed DFA case. + * The <name> attribute is inherited via the parser, lexer, ... + */ +dfaDecision(decisionNumber,description) ::= << +alt<decisionNumber> = cdfa<decisionNumber>.predict(this, this->get_rec(), this->get_istream(), cdfa<decisionNumber> ); +<checkRuleBacktrackFailure()> +>> + +/* Dump DFA tables as static initialized arrays of shorts(16 bits)/characters(8 bits) + * which are then used to statically initialize the dfa structure, which means that there + * is no runtime initialization whatsoever, other than anything the C compiler might + * need to generate. In general the C compiler will lay out memory such that there is no + * runtime code required. + */ +cyclicDFA(dfa) ::= << +/** Static dfa state tables for Cyclic dfa: + * <dfa.description> + */ +static const ANTLR_INT32 dfa<dfa.decisionNumber>_eot[<dfa.numberOfStates>] = + { + <dfa.eot; wrap="\n", separator=",", null="-1"> + }; +static const ANTLR_INT32 dfa<dfa.decisionNumber>_eof[<dfa.numberOfStates>] = + { + <dfa.eof; wrap="\n", separator=",", null="-1"> + }; +static const ANTLR_INT32 dfa<dfa.decisionNumber>_min[<dfa.numberOfStates>] = + { + <dfa.min; wrap="\n", separator=",", null="-1"> + }; +static const ANTLR_INT32 dfa<dfa.decisionNumber>_max[<dfa.numberOfStates>] = + { + <dfa.max; wrap="\n", separator=",", null="-1"> + }; +static const ANTLR_INT32 dfa<dfa.decisionNumber>_accept[<dfa.numberOfStates>] = + { + <dfa.accept; wrap="\n", separator=",", null="-1"> + }; +static const ANTLR_INT32 dfa<dfa.decisionNumber>_special[<dfa.numberOfStates>] = + { + <dfa.special; wrap="\n", separator=",", null="-1"> + }; + +/** Used when there is no transition table entry for a particular state */ +static const ANTLR_INT32* dfa<dfa.decisionNumber>_T_empty = NULL; + +<dfa.edgeTransitionClassMap.keys:{ table | +static const ANTLR_INT32 dfa<dfa.decisionNumber>_T<i0>[] = + { + <table; separator=",", wrap="\n", null="-1"> + \};<\n>}; null = ""> + +/* Transition tables are a table of sub tables, with some tables + * reused for efficiency. + */ +static const ANTLR_INT32 * const dfa<dfa.decisionNumber>_transitions[] = +{ + <dfa.transitionEdgeTables:{xref|dfa<dfa.decisionNumber>_T<xref>}; separator=",", wrap="\n", null="NULL"> +}; + +<@errorMethod()> + +/* Declare tracking structure for Cyclic DFA <dfa.decisionNumber> + */ +class <name>CyclicDFA<dfa.decisionNumber> : public CyclicDFA\< <name>ImplTraits, <name> >, public <name>Tokens +{ +public: + typedef CyclicDFA\< <name>ImplTraits, <name> > BaseType; + typedef BaseType::ContextType CtxType; + +private: +<if(dfa.specialStateSTs)> + //to maintain C-Target compatibility, we need to make some of ctx functions look like member funcs + CtxType* m_ctx; +<endif> + +public: + <name>CyclicDFA<dfa.decisionNumber>( ANTLR_INT32 decisionNumber + , const ANTLR_UCHAR* description + , const ANTLR_INT32* const eot + , const ANTLR_INT32* const eof + , const ANTLR_INT32* const min + , const ANTLR_INT32* const max + , const ANTLR_INT32* const accept + , const ANTLR_INT32* const special + , const ANTLR_INT32* const *const transition) + :BaseType( decisionNumber, description, eot, eof, min, max, accept, + special, transition ) + { + <if(dfa.specialStateSTs)> + m_ctx = NULL; + <endif> + } + + <if(dfa.specialStateSTs)> + ANTLR_UINT32 LA(ANTLR_INT32 i) + { + return m_ctx->LA(i); + } + + <if(PARSER)> + const CtxType::CommonTokenType* LT(ANTLR_INT32 k) + { + return m_ctx->LT(k); + } + <endif> + <if(synpreds)> + template\<typename PredType> + bool msynpred( PredType pred ) + { + return m_ctx->msynpred(pred); + } + <endif> + + ANTLR_INT32 specialStateTransition(CtxType * ctx, RecognizerType* recognizer, IntStreamType* is, ANTLR_INT32 s) + { + ANTLR_INT32 _s; + + m_ctx = ctx; + _s = s; + switch (s) + { + <dfa.specialStateSTs:{state | + case <i0>: + + <state>}; separator="\n"> + } + <if(backtracking)> + if ( ctx->get_backtracking() > 0) + { + ctx->set_failedflag( true ); + return -1; + } + <endif> + ExceptionBaseType* ex = new ANTLR_Exception\< <name>ImplTraits, NO_VIABLE_ALT_EXCEPTION, StreamType>( recognizer, "<dfa.description>" ); + ex->set_decisionNum( <dfa.decisionNumber> ); + ex->set_state(_s); + <@noViableAltException()> + return -1; + } + <endif> +}; + +static <name>CyclicDFA<dfa.decisionNumber> cdfa<dfa.decisionNumber>( + <dfa.decisionNumber>, /* Decision number of this dfa */ + /* Which decision this represents: */ + (const ANTLR_UCHAR*)"<dfa.description>", + dfa<dfa.decisionNumber>_eot, /* EOT table */ + dfa<dfa.decisionNumber>_eof, /* EOF table */ + dfa<dfa.decisionNumber>_min, /* Minimum tokens for each state */ + dfa<dfa.decisionNumber>_max, /* Maximum tokens for each state */ + dfa<dfa.decisionNumber>_accept, /* Accept table */ + dfa<dfa.decisionNumber>_special, /* Special transition states */ + dfa<dfa.decisionNumber>_transitions /* Table of transition tables */ + + ); + + +/* End of Cyclic DFA <dfa.decisionNumber> + * --------------------- + */ +>> + +/** A state in a cyclic DFA; it's a special state and part of a big switch on + * state. + */ +cyclicDFAState(decisionNumber,stateNumber,edges,needErrorClause,semPredState) ::= << +{ + ANTLR_UINT32 LA<decisionNumber>_<stateNumber>;<\n> + ANTLR_MARKER index<decisionNumber>_<stateNumber>;<\n> + + LA<decisionNumber>_<stateNumber> = ctx->LA(1);<\n> + <if(semPredState)> <! get next lookahead symbol to test edges, then rewind !> + index<decisionNumber>_<stateNumber> = ctx->index();<\n> + ctx->rewindLast();<\n> + <endif> + s = -1; [&]() -> void { <edges; separator="\nelse "> }(); - <if(semPredState)> <! return input cursor to state before we rewound !> - ctx->seek(index<decisionNumber>_<stateNumber>);<\n> - <endif> - if ( s>=0 ) - { - return s; - } -} -break; ->> - -/** Just like a fixed DFA edge, test the lookahead and indicate what - * state to jump to next if successful. - */ -cyclicDFAEdge(labelExpr, targetStateNumber, edgeNumber, predicates) ::= << -if ( (<labelExpr>) <if(predicates)>&& (<predicates>)<endif> ) -{ - s = <targetStateNumber>; -}<\n> ->> - -/** An edge pointing at end-of-token; essentially matches any char; - * always jump to the target. - */ -eotDFAEdge(targetStateNumber,edgeNumber, predicates) ::= << - s = <targetStateNumber>;<\n> ->> - - -// D F A E X P R E S S I O N S - -andPredicates(left,right) ::= "( (<left>) && (<right>) )" - -orPredicates(operands) ::= "(<operands:{o|(<o>)}; separator=\"||\">)" - -notPredicate(pred) ::= "!( <evalPredicate(pred,{})> )" - -evalPredicate(pred,description) ::= "(<pred>)" - -evalSynPredicate(pred,description) ::= "this->msynpred( antlr3::ClassForwarder\<<pred>>() )" - -lookaheadTest(atom,k,atomAsInt) ::= "LA<decisionNumber>_<stateNumber> == <if(PARSER)>TOKEN_<endif><atom>" - -/** Sometimes a lookahead test cannot assume that LA(k) is in a temp variable - * somewhere. Must ask for the lookahead directly. - */ -isolatedLookaheadTest(atom,k,atomAsInt) ::= "this->LA(<k>) == <if(PARSER)>TOKEN_<endif><atom>" - -lookaheadRangeTest(lower,upper,k,rangeNumber,lowerAsInt,upperAsInt) ::= <% + <if(semPredState)> <! return input cursor to state before we rewound !> + ctx->seek(index<decisionNumber>_<stateNumber>);<\n> + <endif> + if ( s>=0 ) + { + return s; + } +} +break; +>> + +/** Just like a fixed DFA edge, test the lookahead and indicate what + * state to jump to next if successful. + */ +cyclicDFAEdge(labelExpr, targetStateNumber, edgeNumber, predicates) ::= << +if ( (<labelExpr>) <if(predicates)>&& (<predicates>)<endif> ) +{ + s = <targetStateNumber>; +}<\n> +>> + +/** An edge pointing at end-of-token; essentially matches any char; + * always jump to the target. + */ +eotDFAEdge(targetStateNumber,edgeNumber, predicates) ::= << + s = <targetStateNumber>;<\n> +>> + + +// D F A E X P R E S S I O N S + +andPredicates(left,right) ::= "( (<left>) && (<right>) )" + +orPredicates(operands) ::= "(<operands:{o|(<o>)}; separator=\"||\">)" + +notPredicate(pred) ::= "!( <evalPredicate(pred,{})> )" + +evalPredicate(pred,description) ::= "(<pred>)" + +evalSynPredicate(pred,description) ::= "this->msynpred( antlr3::ClassForwarder\<<pred>>() )" + +lookaheadTest(atom,k,atomAsInt) ::= "LA<decisionNumber>_<stateNumber> == <if(PARSER)>TOKEN_<endif><atom>" + +/** Sometimes a lookahead test cannot assume that LA(k) is in a temp variable + * somewhere. Must ask for the lookahead directly. + */ +isolatedLookaheadTest(atom,k,atomAsInt) ::= "this->LA(<k>) == <if(PARSER)>TOKEN_<endif><atom>" + +lookaheadRangeTest(lower,upper,k,rangeNumber,lowerAsInt,upperAsInt) ::= <% IsBetween(LA<decisionNumber>_<stateNumber>, <if(PARSER)>TOKEN_<endif><lower>, <if(PARSER)>TOKEN_<endif><upper>) -%> - +%> + isolatedLookaheadRangeTest(lower,upper,k,rangeNumber,lowerAsInt,upperAsInt) ::= "IsBetween(this->LA(<k>), <if(PARSER)>TOKEN_<endif><lower>, <if(PARSER)>TOKEN_<endif><upper>)" - -setTest(ranges) ::= "<ranges; separator=\" || \">" - -// A T T R I B U T E S - -makeScopeSet() ::= << -/* makeScopeSet() - */ - /** Definition of the <scope.name> scope variable tracking - * structure. An instance of this structure is created by calling - * <name>_<scope.name>Push(). - */ -struct <scopeStruct(sname=scope.name,...)> -{ - /* ============================================================================= - * Programmer defined variables... - */ - <scope.attributes:{it |<it.decl>;}; separator="\n"> - - /* End of programmer defined variables - * ============================================================================= - */ -}; - ->> - -globalAttributeScopeDecl(scope) ::= << -<if(scope.attributes)> -/* globalAttributeScopeDecl(scope) - */ -<makeScopeSet(...)> -<endif> ->> - -ruleAttributeScopeDecl(scope) ::= << -<if(scope.attributes)> -/* ruleAttributeScopeDecl(scope) - */ -<makeScopeSet(...)> -<endif> ->> - -globalAttributeScopeDef(scope) ::= -<< -/* globalAttributeScopeDef(scope) - */ -<if(scope.attributes)> - -StackType\< <scopeStruct(sname=scope.name)> > <scopeStack(sname=scope.name)>; - -<endif> ->> - -ruleAttributeScopeDef(scope) ::= << -<if(scope.attributes)> -/* ruleAttributeScopeDef(scope) - */ -StackType\< <scopeStruct(sname=scope.name)> > <scopeStack(sname=scope.name,...)>; - -<endif> ->> - -scopeStruct(sname) ::= << -<sname>Scope ->> - -scopeStack(sname) ::= << -m_<sname>_stack ->> - -returnType() ::= <% -<if(!ruleDescriptor.isSynPred)> -RuleReturnType -<else> -bool -<endif> -%> - -/** Generate the C type associated with a single or multiple return - * value(s). - */ -ruleLabelType(referencedRule) ::= <% -RuleReturnType -%> - -delegateName(d) ::= << -<if(d.label)><d.label><else>g<d.name><endif> ->> - -/** Using a type to init value map, try to init a type; if not in table - * must be an object, default value is "0". - */ -initValue(typeName) ::= << - = <cTypeInitMap.(typeName)> ->> - -/** Define a rule label */ -ruleLabelDef(label) ::= << -<ruleLabelType(referencedRule=label.referencedRule)> <label.label.text>; ->> -/** Rule label default value */ -ruleLabelInitVal(label) ::= << ->> - -ASTLabelType() ::= "<if(recognizer.ASTLabelType)><recognizer.ASTLabelType><else>ImplTraits::TreeType*<endif>" - -/** Define a return struct for a rule if the code needs to access its - * start/stop tokens, tree stuff, attributes, ... Leave a hole for - * subgroups to stick in members. - */ -returnScope(scope) ::= "" - -parameterScope(scope) ::= << -<scope.attributes:{it |<it.decl>}; separator=", "> ->> - -parameterAttributeRef(attr) ::= "<attr.name>" -parameterSetAttributeRef(attr,expr) ::= "<attr.name>=<expr>;" - -/** Note that the scopeAttributeRef does not have access to the - * grammar name directly - */ -scopeAttributeRef(scope,attr,index,negIndex) ::= <% -<if(negIndex)> - m_<scope>_stack.at( m_<scope>_stack.size()-<negIndex>-1).<attr.name> -<else> -<if(index)> - m_<scope>_stack.at(<index>).<attr.name> -<else> - m_<scope>_stack.peek().<attr.name> -<endif> -<endif> -%> - -scopeSetAttributeRef(scope,attr,expr,index,negIndex) ::= <% -<if(negIndex)> - m_<scope>_stack.at( m_<scope>_stack.size()-<negIndex>-1).<attr.name> = <expr>; -<else> -<if(index)> - m_<scope>_stack.at(<index>).<attr.name> = <expr>; -<else> - m_<scope>_stack.peek().<attr.name> =<expr>; -<endif> -<endif> -%> - -/** $x is either global scope or x is rule with dynamic scope; refers - * to stack itself not top of stack. This is useful for predicates - * like {$function.size()>0 && $function::name.equals("foo")}? - */ -isolatedDynamicScopeRef(scope) ::= "<scope>_stack" - -/** reference an attribute of rule; might only have single return value */ -ruleLabelRef(referencedRule,scope,attr) ::= << -<if(referencedRule.hasMultipleReturnValues)> -<scope>.<attr.name> -<else> -<scope> -<endif> ->> - -returnAttributeRef(ruleDescriptor,attr) ::= "//--> returnAttributeRef" -returnSetAttributeRef(ruleDescriptor,attr,expr) ::= "//--> returnSetAttributeRef" - -/** How to translate $tokenLabel */ -tokenLabelRef(label) ::= "<label>" - -/** ids+=ID {$ids} or e+=expr {$e} */ -listLabelRef(label) ::= "list_<label>" - - -// not sure the next are the right approach -// -tokenLabelPropertyRef_text(scope,attr) ::= "(<scope>->getText())" -tokenLabelPropertyRef_type(scope,attr) ::= "(<scope>->get_type())" -tokenLabelPropertyRef_line(scope,attr) ::= "(<scope>->get_line())" -tokenLabelPropertyRef_pos(scope,attr) ::= "(<scope>->get_charPositionInLine())" -tokenLabelPropertyRef_channel(scope,attr) ::= "(<scope>->get_channel())" -tokenLabelPropertyRef_index(scope,attr) ::= "(<scope>->get_tokenIndex())" -tokenLabelPropertyRef_tree(scope,attr) ::= "(<scope>->get_tree())" -tokenLabelPropertyRef_int(scope,attr) ::= "(<name>ImplTraits::ConvertToInt32(<scope>->getText()))" - -ruleLabelPropertyRef_start(scope,attr) ::= "(<scope>.start)" -ruleLabelPropertyRef_stop(scope,attr) ::= "(<scope>.stop)" -ruleLabelPropertyRef_tree(scope,attr) ::= "(<scope>.tree)" -ruleLabelPropertyRef_text(scope,attr) ::= << -<if(TREE_PARSER)> -(this->get_strstream()->toStringSS(<scope>.start, <scope>.start)) -<else> -(this->get_strstream()->toStringTT(<scope>.start, <scope>.stop)) -<endif> ->> - -ruleLabelPropertyRef_st(scope,attr) ::= "<scope>.st" - -/** Isolated $RULE ref ok in lexer as it's a Token */ -lexerRuleLabel(label) ::= "<label>" - -lexerRuleLabelPropertyRef_type(scope,attr) ::= "(<scope>->get_type())" -lexerRuleLabelPropertyRef_line(scope,attr) ::= "(<scope>->get_line())" -lexerRuleLabelPropertyRef_pos(scope,attr) ::= "(<scope>->get_charPositionInLine())" -lexerRuleLabelPropertyRef_channel(scope,attr) ::= "(<scope>->get_channel())" -lexerRuleLabelPropertyRef_index(scope,attr) ::= "(<scope>->get_tokenIndex())" -lexerRuleLabelPropertyRef_text(scope,attr) ::= "(<scope>->getText())" - -// Somebody may ref $template or $tree or $stop within a rule: -rulePropertyRef_start(scope,attr) ::= "//--> rulePropertyRef_start" -rulePropertyRef_stop(scope,attr) ::= "//--> rulePropertyRef_stop" -rulePropertyRef_tree(scope,attr) ::= "//--> rulePropertyRef_tree" -rulePropertyRef_text(scope,attr) ::= "//--> rulePropertyRef_text" -rulePropertyRef_st(scope,attr) ::= "//--> rulePropertyRef_st" - -lexerRulePropertyRef_text(scope,attr) ::= "this->getText()" -lexerRulePropertyRef_type(scope,attr) ::= "_type" -lexerRulePropertyRef_line(scope,attr) ::= "this->get_state()->get_tokenStartLine()" -lexerRulePropertyRef_pos(scope,attr) ::= "this->get_state()->get_tokenStartCharPositionInLine()" -lexerRulePropertyRef_channel(scope,attr) ::= "this->get_state()->get_channel()" -lexerRulePropertyRef_start(scope,attr) ::= "this->get_state()->get_tokenStartCharIndex()" -lexerRulePropertyRef_stop(scope,attr) ::= "(this->getCharIndex()-1)" -lexerRulePropertyRef_index(scope,attr) ::= "-1" // undefined token index in lexer -lexerRulePropertyRef_int(scope,attr) ::= "(<name>ImplTraits::ConvertToInt32(<scope>->getText()))" - - -// setting $st and $tree is allowed in local rule. everything else is flagged as error -ruleSetPropertyRef_tree(scope,attr,expr) ::= "//--> ruleSetPropertyRef_tree" -ruleSetPropertyRef_st(scope,attr,expr) ::= "//--> ruleSetPropertyRef_st" - - -/** How to deal with an @after for C targets. Because we cannot rely on - * any garbage collection, after code is executed even in backtracking - * mode. Must be documented clearly. - */ -execAfter(action) ::= << -{ - <action> -} ->> - -/** How to execute an action (when not backtracking) */ -execAction(action) ::= "" - -// M I S C (properties, etc...) - -bitsetDeclare(bitsetname, words64, traits) ::= << - -/** Bitset defining follow set for error recovery in rule state: <name> */ -static ANTLR_BITWORD <bitsetname>_bits[] = { <words64:{it |ANTLR_UINT64_LIT(<it>)}; separator=", "> }; -static <traits>::BitsetListType <bitsetname>( <bitsetname>_bits, <length(words64)> ); ->> - -codeFileExtension() ::= ".cpp" - -true_value() ::= "true" -false_value() ::= "false" - + +setTest(ranges) ::= "<ranges; separator=\" || \">" + +// A T T R I B U T E S + +makeScopeSet() ::= << +/* makeScopeSet() + */ + /** Definition of the <scope.name> scope variable tracking + * structure. An instance of this structure is created by calling + * <name>_<scope.name>Push(). + */ +struct <scopeStruct(sname=scope.name,...)> +{ + /* ============================================================================= + * Programmer defined variables... + */ + <scope.attributes:{it |<it.decl>;}; separator="\n"> + + /* End of programmer defined variables + * ============================================================================= + */ +}; + +>> + +globalAttributeScopeDecl(scope) ::= << +<if(scope.attributes)> +/* globalAttributeScopeDecl(scope) + */ +<makeScopeSet(...)> +<endif> +>> + +ruleAttributeScopeDecl(scope) ::= << +<if(scope.attributes)> +/* ruleAttributeScopeDecl(scope) + */ +<makeScopeSet(...)> +<endif> +>> + +globalAttributeScopeDef(scope) ::= +<< +/* globalAttributeScopeDef(scope) + */ +<if(scope.attributes)> + +StackType\< <scopeStruct(sname=scope.name)> > <scopeStack(sname=scope.name)>; + +<endif> +>> + +ruleAttributeScopeDef(scope) ::= << +<if(scope.attributes)> +/* ruleAttributeScopeDef(scope) + */ +StackType\< <scopeStruct(sname=scope.name)> > <scopeStack(sname=scope.name,...)>; + +<endif> +>> + +scopeStruct(sname) ::= << +<sname>Scope +>> + +scopeStack(sname) ::= << +m_<sname>_stack +>> + +returnType() ::= <% +<if(!ruleDescriptor.isSynPred)> +RuleReturnType +<else> +bool +<endif> +%> + +/** Generate the C type associated with a single or multiple return + * value(s). + */ +ruleLabelType(referencedRule) ::= <% +RuleReturnType +%> + +delegateName(d) ::= << +<if(d.label)><d.label><else>g<d.name><endif> +>> + +/** Using a type to init value map, try to init a type; if not in table + * must be an object, default value is "0". + */ +initValue(typeName) ::= << + = <cTypeInitMap.(typeName)> +>> + +/** Define a rule label */ +ruleLabelDef(label) ::= << +<ruleLabelType(referencedRule=label.referencedRule)> <label.label.text>; +>> +/** Rule label default value */ +ruleLabelInitVal(label) ::= << +>> + +ASTLabelType() ::= "<if(recognizer.ASTLabelType)><recognizer.ASTLabelType><else>ImplTraits::TreeType*<endif>" + +/** Define a return struct for a rule if the code needs to access its + * start/stop tokens, tree stuff, attributes, ... Leave a hole for + * subgroups to stick in members. + */ +returnScope(scope) ::= "" + +parameterScope(scope) ::= << +<scope.attributes:{it |<it.decl>}; separator=", "> +>> + +parameterAttributeRef(attr) ::= "<attr.name>" +parameterSetAttributeRef(attr,expr) ::= "<attr.name>=<expr>;" + +/** Note that the scopeAttributeRef does not have access to the + * grammar name directly + */ +scopeAttributeRef(scope,attr,index,negIndex) ::= <% +<if(negIndex)> + m_<scope>_stack.at( m_<scope>_stack.size()-<negIndex>-1).<attr.name> +<else> +<if(index)> + m_<scope>_stack.at(<index>).<attr.name> +<else> + m_<scope>_stack.peek().<attr.name> +<endif> +<endif> +%> + +scopeSetAttributeRef(scope,attr,expr,index,negIndex) ::= <% +<if(negIndex)> + m_<scope>_stack.at( m_<scope>_stack.size()-<negIndex>-1).<attr.name> = <expr>; +<else> +<if(index)> + m_<scope>_stack.at(<index>).<attr.name> = <expr>; +<else> + m_<scope>_stack.peek().<attr.name> =<expr>; +<endif> +<endif> +%> + +/** $x is either global scope or x is rule with dynamic scope; refers + * to stack itself not top of stack. This is useful for predicates + * like {$function.size()>0 && $function::name.equals("foo")}? + */ +isolatedDynamicScopeRef(scope) ::= "<scope>_stack" + +/** reference an attribute of rule; might only have single return value */ +ruleLabelRef(referencedRule,scope,attr) ::= << +<if(referencedRule.hasMultipleReturnValues)> +<scope>.<attr.name> +<else> +<scope> +<endif> +>> + +returnAttributeRef(ruleDescriptor,attr) ::= "//--> returnAttributeRef" +returnSetAttributeRef(ruleDescriptor,attr,expr) ::= "//--> returnSetAttributeRef" + +/** How to translate $tokenLabel */ +tokenLabelRef(label) ::= "<label>" + +/** ids+=ID {$ids} or e+=expr {$e} */ +listLabelRef(label) ::= "list_<label>" + + +// not sure the next are the right approach +// +tokenLabelPropertyRef_text(scope,attr) ::= "(<scope>->getText())" +tokenLabelPropertyRef_type(scope,attr) ::= "(<scope>->get_type())" +tokenLabelPropertyRef_line(scope,attr) ::= "(<scope>->get_line())" +tokenLabelPropertyRef_pos(scope,attr) ::= "(<scope>->get_charPositionInLine())" +tokenLabelPropertyRef_channel(scope,attr) ::= "(<scope>->get_channel())" +tokenLabelPropertyRef_index(scope,attr) ::= "(<scope>->get_tokenIndex())" +tokenLabelPropertyRef_tree(scope,attr) ::= "(<scope>->get_tree())" +tokenLabelPropertyRef_int(scope,attr) ::= "(<name>ImplTraits::ConvertToInt32(<scope>->getText()))" + +ruleLabelPropertyRef_start(scope,attr) ::= "(<scope>.start)" +ruleLabelPropertyRef_stop(scope,attr) ::= "(<scope>.stop)" +ruleLabelPropertyRef_tree(scope,attr) ::= "(<scope>.tree)" +ruleLabelPropertyRef_text(scope,attr) ::= << +<if(TREE_PARSER)> +(this->get_strstream()->toStringSS(<scope>.start, <scope>.start)) +<else> +(this->get_strstream()->toStringTT(<scope>.start, <scope>.stop)) +<endif> +>> + +ruleLabelPropertyRef_st(scope,attr) ::= "<scope>.st" + +/** Isolated $RULE ref ok in lexer as it's a Token */ +lexerRuleLabel(label) ::= "<label>" + +lexerRuleLabelPropertyRef_type(scope,attr) ::= "(<scope>->get_type())" +lexerRuleLabelPropertyRef_line(scope,attr) ::= "(<scope>->get_line())" +lexerRuleLabelPropertyRef_pos(scope,attr) ::= "(<scope>->get_charPositionInLine())" +lexerRuleLabelPropertyRef_channel(scope,attr) ::= "(<scope>->get_channel())" +lexerRuleLabelPropertyRef_index(scope,attr) ::= "(<scope>->get_tokenIndex())" +lexerRuleLabelPropertyRef_text(scope,attr) ::= "(<scope>->getText())" + +// Somebody may ref $template or $tree or $stop within a rule: +rulePropertyRef_start(scope,attr) ::= "//--> rulePropertyRef_start" +rulePropertyRef_stop(scope,attr) ::= "//--> rulePropertyRef_stop" +rulePropertyRef_tree(scope,attr) ::= "//--> rulePropertyRef_tree" +rulePropertyRef_text(scope,attr) ::= "//--> rulePropertyRef_text" +rulePropertyRef_st(scope,attr) ::= "//--> rulePropertyRef_st" + +lexerRulePropertyRef_text(scope,attr) ::= "this->getText()" +lexerRulePropertyRef_type(scope,attr) ::= "_type" +lexerRulePropertyRef_line(scope,attr) ::= "this->get_state()->get_tokenStartLine()" +lexerRulePropertyRef_pos(scope,attr) ::= "this->get_state()->get_tokenStartCharPositionInLine()" +lexerRulePropertyRef_channel(scope,attr) ::= "this->get_state()->get_channel()" +lexerRulePropertyRef_start(scope,attr) ::= "this->get_state()->get_tokenStartCharIndex()" +lexerRulePropertyRef_stop(scope,attr) ::= "(this->getCharIndex()-1)" +lexerRulePropertyRef_index(scope,attr) ::= "-1" // undefined token index in lexer +lexerRulePropertyRef_int(scope,attr) ::= "(<name>ImplTraits::ConvertToInt32(<scope>->getText()))" + + +// setting $st and $tree is allowed in local rule. everything else is flagged as error +ruleSetPropertyRef_tree(scope,attr,expr) ::= "//--> ruleSetPropertyRef_tree" +ruleSetPropertyRef_st(scope,attr,expr) ::= "//--> ruleSetPropertyRef_st" + + +/** How to deal with an @after for C targets. Because we cannot rely on + * any garbage collection, after code is executed even in backtracking + * mode. Must be documented clearly. + */ +execAfter(action) ::= << +{ + <action> +} +>> + +/** How to execute an action (when not backtracking) */ +execAction(action) ::= "" + +// M I S C (properties, etc...) + +bitsetDeclare(bitsetname, words64, traits) ::= << + +/** Bitset defining follow set for error recovery in rule state: <name> */ +static ANTLR_BITWORD <bitsetname>_bits[] = { <words64:{it |ANTLR_UINT64_LIT(<it>)}; separator=", "> }; +static <traits>::BitsetListType <bitsetname>( <bitsetname>_bits, <length(words64)> ); +>> + +codeFileExtension() ::= ".cpp" + +true_value() ::= "true" +false_value() ::= "false" + diff --git a/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/protobuf/protobuf.stg.in b/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/protobuf/protobuf.stg.in index c232db86dc..433d3bfce0 100644 --- a/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/protobuf/protobuf.stg.in +++ b/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/protobuf/protobuf.stg.in @@ -1,472 +1,472 @@ -/* - [The "BSD license"] - Copyright (c) 2005-2006 Terence Parr - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/** The overall file structure of a recognizer; stores methods for rules - * and cyclic DFAs plus support code. - */ -outputFile(LEXER,PARSER,TREE_PARSER, actionScope, actions, - docComment, recognizer, - name, tokens, tokenNames, rules, cyclicDFAs, - bitsets, buildTemplate, buildAST, rewriteMode, profile, - backtracking, synpreds, memoize, numRules, - fileName, ANTLRVersion, generatedTimestamp, trace, - scopes, superClass, literals) ::= << -<if(PARSER)> -// $ANTLR <ANTLRVersion> <fileName> <generatedTimestamp> -// -// Generated with: -d64 -lib ./ -language protobuf - -syntax = "proto3"; -option cc_enable_arenas = true; - -package @ANTLR_PACKAGE_NAME@; - -message TToken { - uint32 Id = 1; - uint32 Line = 2; - uint32 Column = 3; - uint32 Element = 4; - string Value = 5; -} - -<recognizer> - -message T<name>AST { - <first(rules):{r | TRule_<r.ruleDescriptor.name> Rule_<r.ruleDescriptor.name> = 1;}; separator=""> -} -<endif> ->> - -lexer(grammar, name, tokens, scopes, rules, numRules, filterMode, - labelType="CommonToken", superClass="Lexer") ::= << ->> - -parser(grammar, name, scopes, tokens, tokenNames, rules, numRules, bitsets, - ASTLabelType="Object", superClass="Parser", labelType="Token", - members={<actions.parser.members>}, - init={<actions.parser.init>} - ) ::= << -<rules; separator="\n\n"> ->> - -treeParser(grammar, name, scopes, tokens, tokenNames, globalAction, rules, - numRules, bitsets, filterMode, labelType={<ASTLabelType>}, ASTLabelType="Object", - superClass={<if(filterMode)><if(buildAST)>TreeRewriter<else>TreeFilter<endif><else>TreeParser<endif>}, - members={<actions.treeparser.members>}, - init={<actions.treeparser.init>} - ) ::= << ->> - -/** A simpler version of a rule template that is specific to the imaginary - * rules created for syntactic predicates. As they never have return values - * nor parameters etc..., just give simplest possible method. Don't do - * any of the normal memoization stuff in here either; it's a waste. - * As predicates cannot be inlined into the invoking rule, they need to - * be in a rule by themselves. - */ -synpredRule(ruleName, ruleDescriptor, block, description, nakedBlock) ::= "//-->> synpredRule" -synpred(name) ::= "//-->> synpred" -lexerSynpred(name) ::= "//-->> lexerSynpred" -ruleMemoization(name) ::= "//-->> ruleMemoization" -ruleBacktrackFailure() ::= "//-->> ruleBacktrackFailure" - -/** How to generate code for a rule. This includes any return type - * data aggregates required for multiple return values. - */ -rule(ruleName,ruleDescriptor,block,emptyRule,description,exceptions,finally,memoize) ::= << -// rule $ANTLR start "<ruleName>" -// <fileName>:<description> -message TRule_<ruleName> { - <block> -} -// $ANTLR end "<ruleName>" ->> - -lexerRule(ruleName,nakedBlock,ruleDescriptor,block,memoize) ::= "//-->> lexerRule" -tokensRule(ruleName,nakedBlock,args,block,ruleDescriptor) ::= "//-->> tokensRule" - -// S U B R U L E S - -/** A (...) subrule with multiple alternatives */ -block(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << -<blockDecl()> -TBlock<elemId> Block<elemId> = <elemId>; ->> - -/** A rule block with multiple alternatives */ -ruleBlock(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << -<altDecls()> -oneof Alt { - <alts:{a | TAlt<i> Alt_<ruleName><i> = <i>; }; separator="\n"> -} ->> - -ruleBlockSingleAlt(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << -<alts> ->> - -/** A special case of a (...) subrule with a single alternative */ -blockSingleAlt ::= block - -/** A (..)* block with 1 or more alternatives */ -closureBlock(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << -<blockDecl()> -repeated TBlock<elemId> Block<elemId> = <elemId>; ->> - -closureBlockSingleAlt ::= closureBlock -positiveClosureBlock ::= closureBlock -positiveClosureBlockSingleAlt ::= closureBlock - -optionalBlock(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << -<blockDecl()> -TBlock<elemId> Block<elemId> = <elemId>; ->> - -optionalBlockSingleAlt ::= optionalBlock - -blockDecl() ::= << -message TBlock<elemId> { -<if(rest(alts))> - <altDecls()> - oneof Alt { - <alts:{a | TAlt<i> Alt<i> = <i>; }; separator="\n"> - } -<else> - <alts> -<endif> -} ->> - -altDecls() ::= << -<alts:{a | <altDecl(a,i)>}; separator="\n"> ->> - -altDecl(alt,altNum) ::= << -message TAlt<altNum> { - <alt> -} ->> - -/** An alternative is just a list of elements; at outermost level */ -alt(elements,altNum,description,autoAST,outerAlt, treeLevel,rew) ::= << -<elements:{e | <element(e, i)>}; separator="\n"> - -string Descr = <length([elements, altNum])>; ->> - -/** What to emit when there is no rewrite. For auto build - * mode, does nothing. - */ -noRewrite(rewriteBlockLevel, treeLevel) ::= "" - -// E L E M E N T S - -/** Dump the elements one per line */ -element(e,elemId) ::= << -<e.el> - ->> - -/** match a token optionally with a label in front */ -tokenRef(token,label,elementIndex,terminalOptions={}) ::= << -TToken Token<elemId> = <elemId>; ->> - -/** ids+=ID */ -tokenRefAndListLabel(token,label,elementIndex,terminalOptions={}) ::= << -<tokenRef(token,label,elementIndex,terminalOptions)> -<listLabel(label, label)> ->> - -listLabel(label, elem) ::= "//-->> listLabel" -charRef(char,label) ::= "//-->> charRef" -charRangeRef(a,b,label) ::= "//-->> charRangeRef" - -/** For now, sets are interval tests and must be tested inline */ -matchSet(s,label,elementIndex,postmatchCode="",terminalOptions={}) ::= << -TToken Token<elemId> = <elemId>; ->> - -matchRuleBlockSet ::= matchSet - -matchSetAndListLabel(s,label,elementIndex,postmatchCode) ::= << -<matchSet(...)> -<listLabel(label, label)> ->> - -/** Match a string literal */ -lexerStringRef(string,label,elementIndex="0") ::= "//-->> lexerStringRef" -wildcard(token,label,elementIndex,terminalOptions={}) ::= "//-->> wildcard" - -wildcardAndListLabel(token,label,elementIndex,terminalOptions={}) ::= << -<wildcard(...)> -<listLabel(label,label)> ->> - -/** Match . wildcard in lexer */ -wildcardChar(label, elementIndex) ::= "//-->> wildcardChar" - -wildcardCharListLabel(label, elementIndex) ::= << -<wildcardChar(label, elementIndex)> -<listLabel(label, label)> ->> - -/** Match a rule reference by invoking it possibly with arguments - * and a return value or values. The 'rule' argument was the - * target rule name, but now is type Rule, whose toString is - * same: the rule name. Now though you can access full rule - * descriptor stuff. - */ -ruleRef(rule,label,elementIndex,args,scope) ::= << -TRule_<rule.name> Rule_<rule.name><elemId> = <elemId>; ->> - -/** ids+=rule */ -ruleRefAndListLabel(rule,label,elementIndex,args,scope) ::= << -<ruleRef(rule,label,elementIndex,args,scope)> -<listLabel(label, label)> ->> - -/** A lexer rule reference - * The 'rule' argument was the target rule name, but now - * is type Rule, whose toString is same: the rule name. - * Now though you can access full rule descriptor stuff. - */ -lexerRuleRef(rule,label,args,elementIndex,scope) ::= "//-->> lexerRuleRef" - -/** i+=INT in lexer */ -lexerRuleRefAndListLabel(rule,label,args,elementIndex,scope) ::= "//-->> lexerRuleRefAndListLabel" - -/** EOF in the lexer */ -lexerMatchEOF(label,elementIndex) ::= "//-->> lexerMatchEOF" - -// used for left-recursive rules -recRuleDefArg() ::= "//-->> recRuleDefArg" -recRuleArg() ::= "//-->> recRuleArg" -recRuleAltPredicate(ruleName, opPrec) ::= "//-->> recRuleAltPredicate" -recRuleSetResultAction() ::= "//-->> recRuleSetResultAction" -recRuleSetReturnAction(src, name) ::= "//-->> recRuleSetReturnAction" - -/** match ^(root children) in tree parser */ -tree(root, actionsAfterRoot, children, nullableChildList, enclosingTreeLevel, treeLevel) ::= "//-->> tree" - -/** Every predicate is used as a validating predicate (even when it is - * also hoisted into a prediction expression). - */ -validateSemanticPredicate(pred,description) ::= "//-->> validateSemanticPredicate" - -// F i x e d D F A (if-then-else) - -dfaState(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= "//-->> dfaState" - -/** Same as a normal DFA state except that we don't examine lookahead - * for the bypass alternative. It delays error detection but this - * is faster, smaller, and more what people expect. For (X)? people - * expect "if ( LA(1)==X ) match(X);" and that's it. - */ -dfaOptionalBlockState(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= "//-->> dfaOptionalBlockState" - -/** A DFA state that is actually the loopback decision of a closure - * loop. If end-of-token (EOT) predicts any of the targets then it - * should act like a default clause (i.e., no error can be generated). - * This is used only in the lexer so that for ('a')* on the end of a rule - * anything other than 'a' predicts exiting. - */ -dfaLoopbackState(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= "//-->> dfaLoopbackState" - -/** An accept state indicates a unique alternative has been predicted */ -dfaAcceptState(alt) ::= "//-->> dfaAcceptState" - -/** A simple edge with an expression. If the expression is satisfied, - * enter to the target state. To handle gated productions, we may - * have to evaluate some predicates for this edge. - */ -dfaEdge(labelExpr, targetState, predicates) ::= "//-->> dfaEdge" - -// F i x e d D F A (switch case) - -/** A DFA state where a SWITCH may be generated. The code generator - * decides if this is possible: CodeGenerator.canGenerateSwitch(). - */ -dfaStateSwitch(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= "//-->> dfaStateSwitch" -dfaOptionalBlockStateSwitch(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= "//-->> dfaOptionalBlockStateSwitch" -dfaLoopbackStateSwitch(k, edges,eotPredictsAlt,description,stateNumber,semPredState) ::= "//-->> dfaLoopbackStateSwitch" -dfaEdgeSwitch(labels, targetState) ::= "//-->> dfaEdgeSwitch" - -// C y c l i c D F A - -/** The code to initiate execution of a cyclic DFA; this is used - * in the rule to predict an alt just like the fixed DFA case. - * The <name> attribute is inherited via the parser, lexer, ... - */ -dfaDecision(decisionNumber,description) ::= "//-->> dfaDecision" - -/* Dump DFA tables as run-length-encoded Strings of octal values. - * Can't use hex as compiler translates them before compilation. - * These strings are split into multiple, concatenated strings. - * Java puts them back together at compile time thankfully. - * Java cannot handle large static arrays, so we're stuck with this - * encode/decode approach. See analysis and runtime DFA for - * the encoding methods. - */ -cyclicDFA(dfa) ::= "//-->> cyclicDFA" - -/** A state in a cyclic DFA; it's a special state and part of a big switch on - * state. - */ -cyclicDFAState(decisionNumber,stateNumber,edges,needErrorClause,semPredState) ::= "//-->> cyclicDFAState" - -/** Just like a fixed DFA edge, test the lookahead and indicate what - * state to jump to next if successful. - */ -cyclicDFAEdge(labelExpr, targetStateNumber, edgeNumber, predicates) ::= "//-->> cyclicDFAEdge" - -/** An edge pointing at end-of-token; essentially matches any char; - * always jump to the target. - */ -eotDFAEdge(targetStateNumber,edgeNumber, predicates) ::= "//-->> eotDFAEdge" - -// D F A E X P R E S S I O N S - -andPredicates(left,right) ::= "//-->> andPredicates" -orPredicates(operands) ::= "//-->> orPredicates" -notPredicate(pred) ::= "//-->> notPredicate" -evalPredicate(pred,description) ::= "//-->> evalPredicate" -evalSynPredicate(pred,description) ::= "//-->> evalSynPredicate" -lookaheadTest(atom,k,atomAsInt) ::= "//-->> lookaheadTest" - -/** Sometimes a lookahead test cannot assume that LA(k) is in a temp variable - * somewhere. Must ask for the lookahead directly. - */ -isolatedLookaheadTest(atom,k,atomAsInt) ::= "//-->> isolatedLookaheadTest" -lookaheadRangeTest(lower,upper,k,rangeNumber,lowerAsInt,upperAsInt) ::= "//-->> lookaheadRangeTest" -isolatedLookaheadRangeTest(lower,upper,k,rangeNumber,lowerAsInt,upperAsInt) ::= "//-->> isolatedLookaheadRangeTest" -setTest(ranges) ::= "//-->> setTest" - -// A T T R I B U T E S - -globalAttributeScopeClass(scope) ::= "//-->> globalAttributeScopeClass" -globalAttributeScopeStack(scope) ::= "//-->> globalAttributeScopeStack" -ruleAttributeScopeClass(scope) ::= "//-->> ruleAttributeScopeClass" -ruleAttributeScopeStack(scope) ::= "//-->> ruleAttributeScopeStack" -delegateName(d) ::= "//-->> delegateName" - -/** Define a rule label including default value */ -ruleLabelDef(label) ::= "//-->> ruleLabelDef" -returnStructName(r) ::= "//-->> returnStructName" - -/** Define a return struct for a rule if the code needs to access its - * start/stop tokens, tree stuff, attributes, ... Leave a hole for - * subgroups to stick in members. - */ -returnScope(scope) ::= "//-->> returnScope" -parameterScope(scope) ::= "//-->> parameterScope" -parameterAttributeRef(attr) ::= "//-->> parameterAttributeRef" -parameterSetAttributeRef(attr,expr) ::= "//-->> parameterSetAttributeRef" -scopeAttributeRef(scope,attr,index,negIndex) ::= "//-->> scopeAttributeRef" -scopeSetAttributeRef(scope,attr,expr,index,negIndex) ::= "//-->> scopeSetAttributeRef" - -/** $x is either global scope or x is rule with dynamic scope; refers - * to stack itself not top of stack. This is useful for predicates - * like {$function.size()>0 && $function::name.equals("foo")}? - */ -isolatedDynamicScopeRef(scope) ::= "//-->> isolatedDynamicScopeRef" - -/** reference an attribute of rule; might only have single return value */ -ruleLabelRef(referencedRule,scope,attr) ::= "//-->> ruleLabelRef" -returnAttributeRef(ruleDescriptor,attr) ::= "//-->> returnAttributeRef" -returnSetAttributeRef(ruleDescriptor,attr,expr) ::= "//-->> returnSetAttributeRef" - -/** How to translate $tokenLabel */ -tokenLabelRef(label) ::= "//-->> tokenLabelRef" - -/** ids+=ID {$ids} or e+=expr {$e} */ -listLabelRef(label) ::= "//-->> listLabelRef" - - -// not sure the next are the right approach; and they are evaluated early; -// they cannot see TREE_PARSER or PARSER attributes for example. :( - -tokenLabelPropertyRef_text(scope,attr) ::= "//-->> tokenLabelPropertyRef_text" -tokenLabelPropertyRef_type(scope,attr) ::= "//-->> tokenLabelPropertyRef_type" -tokenLabelPropertyRef_line(scope,attr) ::= "//-->> tokenLabelPropertyRef_line" -tokenLabelPropertyRef_pos(scope,attr) ::= "//-->> tokenLabelPropertyRef_pos" -tokenLabelPropertyRef_channel(scope,attr) ::= "//-->> tokenLabelPropertyRef_channel" -tokenLabelPropertyRef_index(scope,attr) ::= "//-->> tokenLabelPropertyRef_index" -tokenLabelPropertyRef_tree(scope,attr) ::= "//-->> tokenLabelPropertyRef_tree" - -ruleLabelPropertyRef_start(scope,attr) ::= "//-->> ruleLabelPropertyRef_start" -ruleLabelPropertyRef_stop(scope,attr) ::= "//-->> ruleLabelPropertyRef_stop" -ruleLabelPropertyRef_tree(scope,attr) ::= "//-->> ruleLabelPropertyRef_tree" -ruleLabelPropertyRef_text(scope,attr) ::= "//-->> ruleLabelPropertyRef_text" -ruleLabelPropertyRef_st(scope,attr) ::= "//-->> ruleLabelPropertyRef_st" - -/** Isolated $RULE ref ok in lexer as it's a Token */ -lexerRuleLabel(label) ::= "//-->> lexerRuleLabel" - -lexerRuleLabelPropertyRef_type(scope,attr) ::= "//-->> lexerRuleLabelPropertyRef_type" -lexerRuleLabelPropertyRef_line(scope,attr) ::= "//-->> lexerRuleLabelPropertyRef_line" -lexerRuleLabelPropertyRef_pos(scope,attr) ::= "//-->> lexerRuleLabelPropertyRef_pos" -lexerRuleLabelPropertyRef_channel(scope,attr) ::= "//-->> lexerRuleLabelPropertyRef_channel" -lexerRuleLabelPropertyRef_index(scope,attr) ::= "//-->> lexerRuleLabelPropertyRef_index" -lexerRuleLabelPropertyRef_text(scope,attr) ::= "//-->> lexerRuleLabelPropertyRef_text" -lexerRuleLabelPropertyRef_int(scope,attr) ::= "//-->> lexerRuleLabelPropertyRef_int" - -// Somebody may ref $template or $tree or $stop within a rule: -rulePropertyRef_start(scope,attr) ::= "//-->> rulePropertyRef_start" -rulePropertyRef_stop(scope,attr) ::= "//-->> rulePropertyRef_stop" -rulePropertyRef_tree(scope,attr) ::= "//-->> rulePropertyRef_tree" -rulePropertyRef_text(scope,attr) ::= "//-->> rulePropertyRef_text" -rulePropertyRef_st(scope,attr) ::= "//-->> rulePropertyRef_st" - -lexerRulePropertyRef_text(scope,attr) ::= "//-->> lexerRulePropertyRef_text" -lexerRulePropertyRef_type(scope,attr) ::= "//-->> lexerRulePropertyRef_type" -lexerRulePropertyRef_line(scope,attr) ::= "//-->> lexerRulePropertyRef_line" -lexerRulePropertyRef_pos(scope,attr) ::= "//-->> lexerRulePropertyRef_pos" -lexerRulePropertyRef_index(scope,attr) ::= "//-->> lexerRulePropertyRef_index" -lexerRulePropertyRef_channel(scope,attr) ::= "//-->> lexerRulePropertyRef_channel" -lexerRulePropertyRef_start(scope,attr) ::= "//-->> lexerRulePropertyRef_start" -lexerRulePropertyRef_stop(scope,attr) ::= "//-->> lexerRulePropertyRef_stop" -lexerRulePropertyRef_int(scope,attr) ::= "//-->> lexerRulePropertyRef_int" - -// setting $st and $tree is allowed in local rule. everything else -// is flagged as error -ruleSetPropertyRef_tree(scope,attr,expr) ::= "//-->> ruleSetPropertyRef_tree" -ruleSetPropertyRef_st(scope,attr,expr) ::= "//-->> ruleSetPropertyRef_st" - - -/** How to execute an action (only when not backtracking) */ -execAction(action) ::= "" - -/** How to always execute an action even when backtracking */ -execForcedAction(action) ::= "" - -// M I S C (properties, etc...) - -codeFileExtension() ::= ".proto" +/* + [The "BSD license"] + Copyright (c) 2005-2006 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** The overall file structure of a recognizer; stores methods for rules + * and cyclic DFAs plus support code. + */ +outputFile(LEXER,PARSER,TREE_PARSER, actionScope, actions, + docComment, recognizer, + name, tokens, tokenNames, rules, cyclicDFAs, + bitsets, buildTemplate, buildAST, rewriteMode, profile, + backtracking, synpreds, memoize, numRules, + fileName, ANTLRVersion, generatedTimestamp, trace, + scopes, superClass, literals) ::= << +<if(PARSER)> +// $ANTLR <ANTLRVersion> <fileName> <generatedTimestamp> +// +// Generated with: -d64 -lib ./ -language protobuf + +syntax = "proto3"; +option cc_enable_arenas = true; + +package @ANTLR_PACKAGE_NAME@; + +message TToken { + uint32 Id = 1; + uint32 Line = 2; + uint32 Column = 3; + uint32 Element = 4; + string Value = 5; +} + +<recognizer> + +message T<name>AST { + <first(rules):{r | TRule_<r.ruleDescriptor.name> Rule_<r.ruleDescriptor.name> = 1;}; separator=""> +} +<endif> +>> + +lexer(grammar, name, tokens, scopes, rules, numRules, filterMode, + labelType="CommonToken", superClass="Lexer") ::= << +>> + +parser(grammar, name, scopes, tokens, tokenNames, rules, numRules, bitsets, + ASTLabelType="Object", superClass="Parser", labelType="Token", + members={<actions.parser.members>}, + init={<actions.parser.init>} + ) ::= << +<rules; separator="\n\n"> +>> + +treeParser(grammar, name, scopes, tokens, tokenNames, globalAction, rules, + numRules, bitsets, filterMode, labelType={<ASTLabelType>}, ASTLabelType="Object", + superClass={<if(filterMode)><if(buildAST)>TreeRewriter<else>TreeFilter<endif><else>TreeParser<endif>}, + members={<actions.treeparser.members>}, + init={<actions.treeparser.init>} + ) ::= << +>> + +/** A simpler version of a rule template that is specific to the imaginary + * rules created for syntactic predicates. As they never have return values + * nor parameters etc..., just give simplest possible method. Don't do + * any of the normal memoization stuff in here either; it's a waste. + * As predicates cannot be inlined into the invoking rule, they need to + * be in a rule by themselves. + */ +synpredRule(ruleName, ruleDescriptor, block, description, nakedBlock) ::= "//-->> synpredRule" +synpred(name) ::= "//-->> synpred" +lexerSynpred(name) ::= "//-->> lexerSynpred" +ruleMemoization(name) ::= "//-->> ruleMemoization" +ruleBacktrackFailure() ::= "//-->> ruleBacktrackFailure" + +/** How to generate code for a rule. This includes any return type + * data aggregates required for multiple return values. + */ +rule(ruleName,ruleDescriptor,block,emptyRule,description,exceptions,finally,memoize) ::= << +// rule $ANTLR start "<ruleName>" +// <fileName>:<description> +message TRule_<ruleName> { + <block> +} +// $ANTLR end "<ruleName>" +>> + +lexerRule(ruleName,nakedBlock,ruleDescriptor,block,memoize) ::= "//-->> lexerRule" +tokensRule(ruleName,nakedBlock,args,block,ruleDescriptor) ::= "//-->> tokensRule" + +// S U B R U L E S + +/** A (...) subrule with multiple alternatives */ +block(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << +<blockDecl()> +TBlock<elemId> Block<elemId> = <elemId>; +>> + +/** A rule block with multiple alternatives */ +ruleBlock(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << +<altDecls()> +oneof Alt { + <alts:{a | TAlt<i> Alt_<ruleName><i> = <i>; }; separator="\n"> +} +>> + +ruleBlockSingleAlt(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << +<alts> +>> + +/** A special case of a (...) subrule with a single alternative */ +blockSingleAlt ::= block + +/** A (..)* block with 1 or more alternatives */ +closureBlock(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << +<blockDecl()> +repeated TBlock<elemId> Block<elemId> = <elemId>; +>> + +closureBlockSingleAlt ::= closureBlock +positiveClosureBlock ::= closureBlock +positiveClosureBlockSingleAlt ::= closureBlock + +optionalBlock(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << +<blockDecl()> +TBlock<elemId> Block<elemId> = <elemId>; +>> + +optionalBlockSingleAlt ::= optionalBlock + +blockDecl() ::= << +message TBlock<elemId> { +<if(rest(alts))> + <altDecls()> + oneof Alt { + <alts:{a | TAlt<i> Alt<i> = <i>; }; separator="\n"> + } +<else> + <alts> +<endif> +} +>> + +altDecls() ::= << +<alts:{a | <altDecl(a,i)>}; separator="\n"> +>> + +altDecl(alt,altNum) ::= << +message TAlt<altNum> { + <alt> +} +>> + +/** An alternative is just a list of elements; at outermost level */ +alt(elements,altNum,description,autoAST,outerAlt, treeLevel,rew) ::= << +<elements:{e | <element(e, i)>}; separator="\n"> + +string Descr = <length([elements, altNum])>; +>> + +/** What to emit when there is no rewrite. For auto build + * mode, does nothing. + */ +noRewrite(rewriteBlockLevel, treeLevel) ::= "" + +// E L E M E N T S + +/** Dump the elements one per line */ +element(e,elemId) ::= << +<e.el> + +>> + +/** match a token optionally with a label in front */ +tokenRef(token,label,elementIndex,terminalOptions={}) ::= << +TToken Token<elemId> = <elemId>; +>> + +/** ids+=ID */ +tokenRefAndListLabel(token,label,elementIndex,terminalOptions={}) ::= << +<tokenRef(token,label,elementIndex,terminalOptions)> +<listLabel(label, label)> +>> + +listLabel(label, elem) ::= "//-->> listLabel" +charRef(char,label) ::= "//-->> charRef" +charRangeRef(a,b,label) ::= "//-->> charRangeRef" + +/** For now, sets are interval tests and must be tested inline */ +matchSet(s,label,elementIndex,postmatchCode="",terminalOptions={}) ::= << +TToken Token<elemId> = <elemId>; +>> + +matchRuleBlockSet ::= matchSet + +matchSetAndListLabel(s,label,elementIndex,postmatchCode) ::= << +<matchSet(...)> +<listLabel(label, label)> +>> + +/** Match a string literal */ +lexerStringRef(string,label,elementIndex="0") ::= "//-->> lexerStringRef" +wildcard(token,label,elementIndex,terminalOptions={}) ::= "//-->> wildcard" + +wildcardAndListLabel(token,label,elementIndex,terminalOptions={}) ::= << +<wildcard(...)> +<listLabel(label,label)> +>> + +/** Match . wildcard in lexer */ +wildcardChar(label, elementIndex) ::= "//-->> wildcardChar" + +wildcardCharListLabel(label, elementIndex) ::= << +<wildcardChar(label, elementIndex)> +<listLabel(label, label)> +>> + +/** Match a rule reference by invoking it possibly with arguments + * and a return value or values. The 'rule' argument was the + * target rule name, but now is type Rule, whose toString is + * same: the rule name. Now though you can access full rule + * descriptor stuff. + */ +ruleRef(rule,label,elementIndex,args,scope) ::= << +TRule_<rule.name> Rule_<rule.name><elemId> = <elemId>; +>> + +/** ids+=rule */ +ruleRefAndListLabel(rule,label,elementIndex,args,scope) ::= << +<ruleRef(rule,label,elementIndex,args,scope)> +<listLabel(label, label)> +>> + +/** A lexer rule reference + * The 'rule' argument was the target rule name, but now + * is type Rule, whose toString is same: the rule name. + * Now though you can access full rule descriptor stuff. + */ +lexerRuleRef(rule,label,args,elementIndex,scope) ::= "//-->> lexerRuleRef" + +/** i+=INT in lexer */ +lexerRuleRefAndListLabel(rule,label,args,elementIndex,scope) ::= "//-->> lexerRuleRefAndListLabel" + +/** EOF in the lexer */ +lexerMatchEOF(label,elementIndex) ::= "//-->> lexerMatchEOF" + +// used for left-recursive rules +recRuleDefArg() ::= "//-->> recRuleDefArg" +recRuleArg() ::= "//-->> recRuleArg" +recRuleAltPredicate(ruleName, opPrec) ::= "//-->> recRuleAltPredicate" +recRuleSetResultAction() ::= "//-->> recRuleSetResultAction" +recRuleSetReturnAction(src, name) ::= "//-->> recRuleSetReturnAction" + +/** match ^(root children) in tree parser */ +tree(root, actionsAfterRoot, children, nullableChildList, enclosingTreeLevel, treeLevel) ::= "//-->> tree" + +/** Every predicate is used as a validating predicate (even when it is + * also hoisted into a prediction expression). + */ +validateSemanticPredicate(pred,description) ::= "//-->> validateSemanticPredicate" + +// F i x e d D F A (if-then-else) + +dfaState(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= "//-->> dfaState" + +/** Same as a normal DFA state except that we don't examine lookahead + * for the bypass alternative. It delays error detection but this + * is faster, smaller, and more what people expect. For (X)? people + * expect "if ( LA(1)==X ) match(X);" and that's it. + */ +dfaOptionalBlockState(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= "//-->> dfaOptionalBlockState" + +/** A DFA state that is actually the loopback decision of a closure + * loop. If end-of-token (EOT) predicts any of the targets then it + * should act like a default clause (i.e., no error can be generated). + * This is used only in the lexer so that for ('a')* on the end of a rule + * anything other than 'a' predicts exiting. + */ +dfaLoopbackState(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= "//-->> dfaLoopbackState" + +/** An accept state indicates a unique alternative has been predicted */ +dfaAcceptState(alt) ::= "//-->> dfaAcceptState" + +/** A simple edge with an expression. If the expression is satisfied, + * enter to the target state. To handle gated productions, we may + * have to evaluate some predicates for this edge. + */ +dfaEdge(labelExpr, targetState, predicates) ::= "//-->> dfaEdge" + +// F i x e d D F A (switch case) + +/** A DFA state where a SWITCH may be generated. The code generator + * decides if this is possible: CodeGenerator.canGenerateSwitch(). + */ +dfaStateSwitch(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= "//-->> dfaStateSwitch" +dfaOptionalBlockStateSwitch(k,edges,eotPredictsAlt,description,stateNumber,semPredState) ::= "//-->> dfaOptionalBlockStateSwitch" +dfaLoopbackStateSwitch(k, edges,eotPredictsAlt,description,stateNumber,semPredState) ::= "//-->> dfaLoopbackStateSwitch" +dfaEdgeSwitch(labels, targetState) ::= "//-->> dfaEdgeSwitch" + +// C y c l i c D F A + +/** The code to initiate execution of a cyclic DFA; this is used + * in the rule to predict an alt just like the fixed DFA case. + * The <name> attribute is inherited via the parser, lexer, ... + */ +dfaDecision(decisionNumber,description) ::= "//-->> dfaDecision" + +/* Dump DFA tables as run-length-encoded Strings of octal values. + * Can't use hex as compiler translates them before compilation. + * These strings are split into multiple, concatenated strings. + * Java puts them back together at compile time thankfully. + * Java cannot handle large static arrays, so we're stuck with this + * encode/decode approach. See analysis and runtime DFA for + * the encoding methods. + */ +cyclicDFA(dfa) ::= "//-->> cyclicDFA" + +/** A state in a cyclic DFA; it's a special state and part of a big switch on + * state. + */ +cyclicDFAState(decisionNumber,stateNumber,edges,needErrorClause,semPredState) ::= "//-->> cyclicDFAState" + +/** Just like a fixed DFA edge, test the lookahead and indicate what + * state to jump to next if successful. + */ +cyclicDFAEdge(labelExpr, targetStateNumber, edgeNumber, predicates) ::= "//-->> cyclicDFAEdge" + +/** An edge pointing at end-of-token; essentially matches any char; + * always jump to the target. + */ +eotDFAEdge(targetStateNumber,edgeNumber, predicates) ::= "//-->> eotDFAEdge" + +// D F A E X P R E S S I O N S + +andPredicates(left,right) ::= "//-->> andPredicates" +orPredicates(operands) ::= "//-->> orPredicates" +notPredicate(pred) ::= "//-->> notPredicate" +evalPredicate(pred,description) ::= "//-->> evalPredicate" +evalSynPredicate(pred,description) ::= "//-->> evalSynPredicate" +lookaheadTest(atom,k,atomAsInt) ::= "//-->> lookaheadTest" + +/** Sometimes a lookahead test cannot assume that LA(k) is in a temp variable + * somewhere. Must ask for the lookahead directly. + */ +isolatedLookaheadTest(atom,k,atomAsInt) ::= "//-->> isolatedLookaheadTest" +lookaheadRangeTest(lower,upper,k,rangeNumber,lowerAsInt,upperAsInt) ::= "//-->> lookaheadRangeTest" +isolatedLookaheadRangeTest(lower,upper,k,rangeNumber,lowerAsInt,upperAsInt) ::= "//-->> isolatedLookaheadRangeTest" +setTest(ranges) ::= "//-->> setTest" + +// A T T R I B U T E S + +globalAttributeScopeClass(scope) ::= "//-->> globalAttributeScopeClass" +globalAttributeScopeStack(scope) ::= "//-->> globalAttributeScopeStack" +ruleAttributeScopeClass(scope) ::= "//-->> ruleAttributeScopeClass" +ruleAttributeScopeStack(scope) ::= "//-->> ruleAttributeScopeStack" +delegateName(d) ::= "//-->> delegateName" + +/** Define a rule label including default value */ +ruleLabelDef(label) ::= "//-->> ruleLabelDef" +returnStructName(r) ::= "//-->> returnStructName" + +/** Define a return struct for a rule if the code needs to access its + * start/stop tokens, tree stuff, attributes, ... Leave a hole for + * subgroups to stick in members. + */ +returnScope(scope) ::= "//-->> returnScope" +parameterScope(scope) ::= "//-->> parameterScope" +parameterAttributeRef(attr) ::= "//-->> parameterAttributeRef" +parameterSetAttributeRef(attr,expr) ::= "//-->> parameterSetAttributeRef" +scopeAttributeRef(scope,attr,index,negIndex) ::= "//-->> scopeAttributeRef" +scopeSetAttributeRef(scope,attr,expr,index,negIndex) ::= "//-->> scopeSetAttributeRef" + +/** $x is either global scope or x is rule with dynamic scope; refers + * to stack itself not top of stack. This is useful for predicates + * like {$function.size()>0 && $function::name.equals("foo")}? + */ +isolatedDynamicScopeRef(scope) ::= "//-->> isolatedDynamicScopeRef" + +/** reference an attribute of rule; might only have single return value */ +ruleLabelRef(referencedRule,scope,attr) ::= "//-->> ruleLabelRef" +returnAttributeRef(ruleDescriptor,attr) ::= "//-->> returnAttributeRef" +returnSetAttributeRef(ruleDescriptor,attr,expr) ::= "//-->> returnSetAttributeRef" + +/** How to translate $tokenLabel */ +tokenLabelRef(label) ::= "//-->> tokenLabelRef" + +/** ids+=ID {$ids} or e+=expr {$e} */ +listLabelRef(label) ::= "//-->> listLabelRef" + + +// not sure the next are the right approach; and they are evaluated early; +// they cannot see TREE_PARSER or PARSER attributes for example. :( + +tokenLabelPropertyRef_text(scope,attr) ::= "//-->> tokenLabelPropertyRef_text" +tokenLabelPropertyRef_type(scope,attr) ::= "//-->> tokenLabelPropertyRef_type" +tokenLabelPropertyRef_line(scope,attr) ::= "//-->> tokenLabelPropertyRef_line" +tokenLabelPropertyRef_pos(scope,attr) ::= "//-->> tokenLabelPropertyRef_pos" +tokenLabelPropertyRef_channel(scope,attr) ::= "//-->> tokenLabelPropertyRef_channel" +tokenLabelPropertyRef_index(scope,attr) ::= "//-->> tokenLabelPropertyRef_index" +tokenLabelPropertyRef_tree(scope,attr) ::= "//-->> tokenLabelPropertyRef_tree" + +ruleLabelPropertyRef_start(scope,attr) ::= "//-->> ruleLabelPropertyRef_start" +ruleLabelPropertyRef_stop(scope,attr) ::= "//-->> ruleLabelPropertyRef_stop" +ruleLabelPropertyRef_tree(scope,attr) ::= "//-->> ruleLabelPropertyRef_tree" +ruleLabelPropertyRef_text(scope,attr) ::= "//-->> ruleLabelPropertyRef_text" +ruleLabelPropertyRef_st(scope,attr) ::= "//-->> ruleLabelPropertyRef_st" + +/** Isolated $RULE ref ok in lexer as it's a Token */ +lexerRuleLabel(label) ::= "//-->> lexerRuleLabel" + +lexerRuleLabelPropertyRef_type(scope,attr) ::= "//-->> lexerRuleLabelPropertyRef_type" +lexerRuleLabelPropertyRef_line(scope,attr) ::= "//-->> lexerRuleLabelPropertyRef_line" +lexerRuleLabelPropertyRef_pos(scope,attr) ::= "//-->> lexerRuleLabelPropertyRef_pos" +lexerRuleLabelPropertyRef_channel(scope,attr) ::= "//-->> lexerRuleLabelPropertyRef_channel" +lexerRuleLabelPropertyRef_index(scope,attr) ::= "//-->> lexerRuleLabelPropertyRef_index" +lexerRuleLabelPropertyRef_text(scope,attr) ::= "//-->> lexerRuleLabelPropertyRef_text" +lexerRuleLabelPropertyRef_int(scope,attr) ::= "//-->> lexerRuleLabelPropertyRef_int" + +// Somebody may ref $template or $tree or $stop within a rule: +rulePropertyRef_start(scope,attr) ::= "//-->> rulePropertyRef_start" +rulePropertyRef_stop(scope,attr) ::= "//-->> rulePropertyRef_stop" +rulePropertyRef_tree(scope,attr) ::= "//-->> rulePropertyRef_tree" +rulePropertyRef_text(scope,attr) ::= "//-->> rulePropertyRef_text" +rulePropertyRef_st(scope,attr) ::= "//-->> rulePropertyRef_st" + +lexerRulePropertyRef_text(scope,attr) ::= "//-->> lexerRulePropertyRef_text" +lexerRulePropertyRef_type(scope,attr) ::= "//-->> lexerRulePropertyRef_type" +lexerRulePropertyRef_line(scope,attr) ::= "//-->> lexerRulePropertyRef_line" +lexerRulePropertyRef_pos(scope,attr) ::= "//-->> lexerRulePropertyRef_pos" +lexerRulePropertyRef_index(scope,attr) ::= "//-->> lexerRulePropertyRef_index" +lexerRulePropertyRef_channel(scope,attr) ::= "//-->> lexerRulePropertyRef_channel" +lexerRulePropertyRef_start(scope,attr) ::= "//-->> lexerRulePropertyRef_start" +lexerRulePropertyRef_stop(scope,attr) ::= "//-->> lexerRulePropertyRef_stop" +lexerRulePropertyRef_int(scope,attr) ::= "//-->> lexerRulePropertyRef_int" + +// setting $st and $tree is allowed in local rule. everything else +// is flagged as error +ruleSetPropertyRef_tree(scope,attr,expr) ::= "//-->> ruleSetPropertyRef_tree" +ruleSetPropertyRef_st(scope,attr,expr) ::= "//-->> ruleSetPropertyRef_st" + + +/** How to execute an action (only when not backtracking) */ +execAction(action) ::= "" + +/** How to always execute an action even when backtracking */ +execForcedAction(action) ::= "" + +// M I S C (properties, etc...) + +codeFileExtension() ::= ".proto" diff --git a/ydb/library/yql/providers/common/codec/yql_codec.cpp b/ydb/library/yql/providers/common/codec/yql_codec.cpp index 7587e188fa..414d715e26 100644 --- a/ydb/library/yql/providers/common/codec/yql_codec.cpp +++ b/ydb/library/yql/providers/common/codec/yql_codec.cpp @@ -92,7 +92,7 @@ void WriteYsonValueImpl(TYsonResultWriter& writer, const NUdf::TUnboxedValuePod& case NUdf::TDataType<char*>::Id: case NUdf::TDataType<NUdf::TUuid>::Id: - case NUdf::TDataType<NUdf::TDyNumber>::Id: + case NUdf::TDataType<NUdf::TDyNumber>::Id: writer.OnStringScalar(value.AsStringRef()); return; @@ -120,8 +120,8 @@ void WriteYsonValueImpl(TYsonResultWriter& writer, const NUdf::TUnboxedValuePod& return; case NUdf::TDataType<NUdf::TTzDate>::Id: case NUdf::TDataType<NUdf::TTzDatetime>::Id: - case NUdf::TDataType<NUdf::TTzTimestamp>::Id: - case NUdf::TDataType<NUdf::TJsonDocument>::Id: { + case NUdf::TDataType<NUdf::TTzTimestamp>::Id: + case NUdf::TDataType<NUdf::TJsonDocument>::Id: { const NUdf::TUnboxedValue out(ValueToString(*dataType->GetDataSlot(), value)); writer.OnUtf8StringScalar(out.AsStringRef()); return; @@ -352,10 +352,10 @@ NYT::TNode DataValueToNode(const NKikimr::NUdf::TUnboxedValuePod& value, NKikimr const auto params = static_cast<NKikimr::NMiniKQL::TDataDecimalType*>(type)->GetParams(); return NYT::TNode(NDecimal::ToString(value.GetInt128(), params.first, params.second)); } - case NUdf::TDataType<NUdf::TJsonDocument>::Id: { - NUdf::TUnboxedValue json = ValueToString(EDataSlot::JsonDocument, value); - return NYT::TNode(ToString(TStringBuf(value.AsStringRef()))); - } + case NUdf::TDataType<NUdf::TJsonDocument>::Id: { + NUdf::TUnboxedValue json = ValueToString(EDataSlot::JsonDocument, value); + return NYT::TNode(ToString(TStringBuf(value.AsStringRef()))); + } } YQL_ENSURE(false, "Unsupported type: " << static_cast<int>(dataType->GetSchemeType())); } @@ -459,10 +459,10 @@ TString DataValueToString(const NKikimr::NUdf::TUnboxedValuePod& value, const TD out << value.Get<ui64>() << "," << NKikimr::NMiniKQL::GetTimezoneIANAName(value.GetTimezoneId()); return out.Str(); } - case NUdf::EDataSlot::JsonDocument: { - NUdf::TUnboxedValue json = ValueToString(EDataSlot::JsonDocument, value); - return ToString(TStringBuf(value.AsStringRef())); - } + case NUdf::EDataSlot::JsonDocument: { + NUdf::TUnboxedValue json = ValueToString(EDataSlot::JsonDocument, value); + return ToString(TStringBuf(value.AsStringRef())); + } } Y_FAIL("Unexpected"); @@ -848,8 +848,8 @@ NUdf::TUnboxedValue ReadYsonValue(TType* type, case NUdf::TDataType<NUdf::TUtf8>::Id: case NUdf::TDataType<char*>::Id: case NUdf::TDataType<NUdf::TJson>::Id: - case NUdf::TDataType<NUdf::TDyNumber>::Id: - case NUdf::TDataType<NUdf::TUuid>::Id: { + case NUdf::TDataType<NUdf::TDyNumber>::Id: + case NUdf::TDataType<NUdf::TUuid>::Id: { if (isTableFormat) { auto nextString = ReadNextString(cmd, buf); return NUdf::TUnboxedValue(MakeString(NUdf::TStringRef(nextString))); @@ -965,15 +965,15 @@ NUdf::TUnboxedValue ReadYsonValue(TType* type, return data; } - case NUdf::TDataType<NUdf::TJsonDocument>::Id: { - if (isTableFormat) { - return ValueFromString(EDataSlot::JsonDocument, ReadNextString(cmd, buf)); - } - - const auto json = ReadYsonStringInResultFormat(cmd, buf); - return ValueFromString(EDataSlot::JsonDocument, json.AsStringRef()); - } - + case NUdf::TDataType<NUdf::TJsonDocument>::Id: { + if (isTableFormat) { + return ValueFromString(EDataSlot::JsonDocument, ReadNextString(cmd, buf)); + } + + const auto json = ReadYsonStringInResultFormat(cmd, buf); + return ValueFromString(EDataSlot::JsonDocument, json.AsStringRef()); + } + default: YQL_ENSURE(false, "Unsupported data type: " << schemeType); } @@ -1389,8 +1389,8 @@ NUdf::TUnboxedValue ReadSkiffData(TType* type, ui64 nativeYtTypeFlags, TInputBuf case NUdf::TDataType<char*>::Id: case NUdf::TDataType<NUdf::TJson>::Id: case NUdf::TDataType<NUdf::TYson>::Id: - case NUdf::TDataType<NUdf::TDyNumber>::Id: - case NUdf::TDataType<NUdf::TUuid>::Id: { + case NUdf::TDataType<NUdf::TDyNumber>::Id: + case NUdf::TDataType<NUdf::TUuid>::Id: { ui32 size; buf.ReadMany((char*)&size, sizeof(size)); CHECK_STRING_LENGTH_UNSIGNED(size); @@ -1475,15 +1475,15 @@ NUdf::TUnboxedValue ReadSkiffData(TType* type, ui64 nativeYtTypeFlags, TInputBuf return data; } - case NUdf::TDataType<NUdf::TJsonDocument>::Id: { - ui32 size; - buf.ReadMany((char*)&size, sizeof(size)); - CHECK_STRING_LENGTH_UNSIGNED(size); - auto json = NUdf::TUnboxedValue(MakeStringNotFilled(size)); - buf.ReadMany(json.AsStringRef().Data(), size); - return ValueFromString(EDataSlot::JsonDocument, json.AsStringRef()); - } - + case NUdf::TDataType<NUdf::TJsonDocument>::Id: { + ui32 size; + buf.ReadMany((char*)&size, sizeof(size)); + CHECK_STRING_LENGTH_UNSIGNED(size); + auto json = NUdf::TUnboxedValue(MakeStringNotFilled(size)); + buf.ReadMany(json.AsStringRef().Data(), size); + return ValueFromString(EDataSlot::JsonDocument, json.AsStringRef()); + } + default: YQL_ENSURE(false, "Unsupported data type: " << schemeType); } @@ -1780,8 +1780,8 @@ void WriteYsonValueInTableFormat(TOutputBuf& buf, TType* type, const NUdf::TUnbo case NUdf::TDataType<NUdf::TUtf8>::Id: case NUdf::TDataType<char*>::Id: case NUdf::TDataType<NUdf::TJson>::Id: - case NUdf::TDataType<NUdf::TDyNumber>::Id: - case NUdf::TDataType<NUdf::TUuid>::Id: { + case NUdf::TDataType<NUdf::TDyNumber>::Id: + case NUdf::TDataType<NUdf::TUuid>::Id: { buf.Write(StringMarker); auto str = value.AsStringRef(); buf.WriteVarI32(str.Size()); @@ -1857,15 +1857,15 @@ void WriteYsonValueInTableFormat(TOutputBuf& buf, TType* type, const NUdf::TUnbo break; } - case NUdf::TDataType<NUdf::TJsonDocument>::Id: { - buf.Write(StringMarker); - NUdf::TUnboxedValue json = ValueToString(EDataSlot::JsonDocument, value); - auto str = json.AsStringRef(); - buf.WriteVarI32(str.Size()); - buf.WriteMany(str); - break; - } - + case NUdf::TDataType<NUdf::TJsonDocument>::Id: { + buf.Write(StringMarker); + NUdf::TUnboxedValue json = ValueToString(EDataSlot::JsonDocument, value); + auto str = json.AsStringRef(); + buf.WriteVarI32(str.Size()); + buf.WriteMany(str); + break; + } + default: YQL_ENSURE(false, "Unsupported data type: " << schemeType); } @@ -2066,8 +2066,8 @@ void WriteSkiffData(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, cons case NUdf::TDataType<char*>::Id: case NUdf::TDataType<NUdf::TJson>::Id: case NUdf::TDataType<NUdf::TYson>::Id: - case NUdf::TDataType<NUdf::TDyNumber>::Id: - case NUdf::TDataType<NUdf::TUuid>::Id: { + case NUdf::TDataType<NUdf::TDyNumber>::Id: + case NUdf::TDataType<NUdf::TUuid>::Id: { auto str = value.AsStringRef(); ui32 size = str.Size(); buf.WriteMany((const char*)&size, sizeof(size)); @@ -2129,15 +2129,15 @@ void WriteSkiffData(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, cons break; } - case NUdf::TDataType<NUdf::TJsonDocument>::Id: { - NUdf::TUnboxedValue json = ValueToString(EDataSlot::JsonDocument, value); - auto str = json.AsStringRef(); - ui32 size = str.Size(); - buf.WriteMany((const char*)&size, sizeof(size)); - buf.WriteMany(str); - break; - } - + case NUdf::TDataType<NUdf::TJsonDocument>::Id: { + NUdf::TUnboxedValue json = ValueToString(EDataSlot::JsonDocument, value); + auto str = json.AsStringRef(); + ui32 size = str.Size(); + buf.WriteMany((const char*)&size, sizeof(size)); + buf.WriteMany(str); + break; + } + default: YQL_ENSURE(false, "Unsupported data type: " << schemeType); } diff --git a/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp b/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp index dd1f162d78..a0d69a8115 100644 --- a/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp +++ b/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp @@ -890,15 +890,15 @@ TMkqlCommonCallableCompiler::TShared::TShared() { return ctx.ProgramBuilder.NewDataLiteral<NUdf::EDataSlot::Json>(node.Head().Content()); }); - AddCallable("JsonDocument", [](const TExprNode& node, TMkqlBuildContext& ctx) { - // NOTE: ValueFromString returns TUnboxedValuePod. This type does not free string inside it during destruction. - // To get smart pointer-like behaviour we convert TUnboxedValuePod to TUnboxedValue. Without this conversion there - // will be a memory leak. - NUdf::TUnboxedValue jsonDocument = ValueFromString(NUdf::EDataSlot::JsonDocument, node.Head().Content()); - MKQL_ENSURE(bool(jsonDocument), "Invalid JsonDocument literal"); - return ctx.ProgramBuilder.NewDataLiteral<NUdf::EDataSlot::JsonDocument>(jsonDocument.AsStringRef()); - }); - + AddCallable("JsonDocument", [](const TExprNode& node, TMkqlBuildContext& ctx) { + // NOTE: ValueFromString returns TUnboxedValuePod. This type does not free string inside it during destruction. + // To get smart pointer-like behaviour we convert TUnboxedValuePod to TUnboxedValue. Without this conversion there + // will be a memory leak. + NUdf::TUnboxedValue jsonDocument = ValueFromString(NUdf::EDataSlot::JsonDocument, node.Head().Content()); + MKQL_ENSURE(bool(jsonDocument), "Invalid JsonDocument literal"); + return ctx.ProgramBuilder.NewDataLiteral<NUdf::EDataSlot::JsonDocument>(jsonDocument.AsStringRef()); + }); + AddCallable("Uuid", [](const TExprNode& node, TMkqlBuildContext& ctx) { return ctx.ProgramBuilder.NewDataLiteral<NUdf::EDataSlot::Uuid>(node.Head().Content()); }); diff --git a/ydb/library/yql/providers/common/schema/skiff/yql_skiff_schema.cpp b/ydb/library/yql/providers/common/schema/skiff/yql_skiff_schema.cpp index 2461a17094..604b01def5 100644 --- a/ydb/library/yql/providers/common/schema/skiff/yql_skiff_schema.cpp +++ b/ydb/library/yql/providers/common/schema/skiff/yql_skiff_schema.cpp @@ -65,7 +65,7 @@ struct TSkiffTypeLoader { case NUdf::EDataSlot::Json: case NUdf::EDataSlot::Uuid: case NUdf::EDataSlot::DyNumber: - case NUdf::EDataSlot::JsonDocument: + case NUdf::EDataSlot::JsonDocument: return NYT::TNode()("wire_type", "string32"); case NUdf::EDataSlot::Yson: return NYT::TNode()("wire_type", "yson32"); diff --git a/ydb/library/yql/providers/config/yql_config_provider.cpp b/ydb/library/yql/providers/config/yql_config_provider.cpp index 6952e96cf0..0b177775e4 100644 --- a/ydb/library/yql/providers/config/yql_config_provider.cpp +++ b/ydb/library/yql/providers/config/yql_config_provider.cpp @@ -718,15 +718,15 @@ namespace { .emplace( TUserDataKey::File(TStringBuf("/home/geodata6.bin")), TUserDataBlock{EUserDataType::URL, {}, TString(args[0]), {}, {}}).first->second.Usage.Set(EUserDataBlockUsage::Path); - } - else if (name == "JsonQueryReturnsJsonDocument" || name == "DisableJsonQueryReturnsJsonDocument") { - if (args.size() != 0) { - ctx.AddError(TIssue(pos, TStringBuilder() << "Expected no arguments, but got " << args.size())); - return false; - } - - Types.JsonQueryReturnsJsonDocument = (name == "DisableJsonQueryReturnsJsonDocument"); - } + } + else if (name == "JsonQueryReturnsJsonDocument" || name == "DisableJsonQueryReturnsJsonDocument") { + if (args.size() != 0) { + ctx.AddError(TIssue(pos, TStringBuilder() << "Expected no arguments, but got " << args.size())); + return false; + } + + Types.JsonQueryReturnsJsonDocument = (name == "DisableJsonQueryReturnsJsonDocument"); + } else if (name == "OrderedColumns" || name == "DisableOrderedColumns") { if (args.size() != 0) { ctx.AddError(TIssue(pos, TStringBuilder() << "Expected no arguments, but got " << args.size())); @@ -745,7 +745,7 @@ namespace { return false; } } - else { + else { ctx.AddError(TIssue(pos, TStringBuilder() << "Unsupported command: " << name)); return false; } diff --git a/ydb/library/yql/public/types/yql_types.proto b/ydb/library/yql/public/types/yql_types.proto index f9ebaf1390..c01647e938 100644 --- a/ydb/library/yql/public/types/yql_types.proto +++ b/ydb/library/yql/public/types/yql_types.proto @@ -20,7 +20,7 @@ enum TypeIds { Yson = 0x1201; Json = 0x1202; Uuid = 0x1203; - JsonDocument = 0x1204; + JsonDocument = 0x1204; Date = 0x0030; Datetime = 0x0031; Timestamp = 0x0032; @@ -29,5 +29,5 @@ enum TypeIds { TzDatetime = 0x0035; TzTimestamp = 0x0036; Decimal = 0x1301; - DyNumber = 0x1302; + DyNumber = 0x1302; } diff --git a/ydb/library/yql/public/udf/udf_type_ops.h b/ydb/library/yql/public/udf/udf_type_ops.h index 4b2446c8fd..0cbd751e14 100644 --- a/ydb/library/yql/public/udf/udf_type_ops.h +++ b/ydb/library/yql/public/udf/udf_type_ops.h @@ -140,11 +140,11 @@ inline THashType GetValueHash<EDataSlot::Json>(const TUnboxedValuePod&) { } template <> -inline THashType GetValueHash<EDataSlot::JsonDocument>(const TUnboxedValuePod&) { - Y_FAIL("JsonDocument isn't hashable."); -} - -template <> +inline THashType GetValueHash<EDataSlot::JsonDocument>(const TUnboxedValuePod&) { + Y_FAIL("JsonDocument isn't hashable."); +} + +template <> inline THashType GetValueHash<EDataSlot::Date>(const TUnboxedValuePod& value) { return GetIntegerHash<ui16>(value); } @@ -331,11 +331,11 @@ inline int CompareValues<EDataSlot::Json>(const TUnboxedValuePod&, const TUnboxe } template <> -inline int CompareValues<EDataSlot::JsonDocument>(const TUnboxedValuePod&, const TUnboxedValuePod&) { - Y_FAIL("JsonDocument isn't comparable."); -} - -template <> +inline int CompareValues<EDataSlot::JsonDocument>(const TUnboxedValuePod&, const TUnboxedValuePod&) { + Y_FAIL("JsonDocument isn't comparable."); +} + +template <> inline int CompareValues<EDataSlot::Date>(const TUnboxedValuePod& lhs, const TUnboxedValuePod& rhs) { return CompareIntegers<ui16>(lhs, rhs); } @@ -501,11 +501,11 @@ inline bool EquateValues<EDataSlot::Json>(const TUnboxedValuePod&, const TUnboxe } template <> -inline bool EquateValues<EDataSlot::JsonDocument>(const TUnboxedValuePod&, const TUnboxedValuePod&) { - Y_FAIL("JsonDocument isn't comparable."); -} - -template <> +inline bool EquateValues<EDataSlot::JsonDocument>(const TUnboxedValuePod&, const TUnboxedValuePod&) { + Y_FAIL("JsonDocument isn't comparable."); +} + +template <> inline bool EquateValues<EDataSlot::Date>(const TUnboxedValuePod& lhs, const TUnboxedValuePod& rhs) { return EquateIntegers<ui16>(lhs, rhs); } diff --git a/ydb/library/yql/sql/v0/aggregation.cpp b/ydb/library/yql/sql/v0/aggregation.cpp index 6440604881..28e7eb416a 100644 --- a/ydb/library/yql/sql/v0/aggregation.cpp +++ b/ydb/library/yql/sql/v0/aggregation.cpp @@ -381,7 +381,7 @@ public: private: bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final { - ui32 adjustArgsCount = isFactory ? 0 : 2; + ui32 adjustArgsCount = isFactory ? 0 : 2; if (exprs.size() != adjustArgsCount) { ctx.Error(Pos) << "Aggregation function " << (isFactory ? "factory " : "") << Name << " requires " << adjustArgsCount << " arguments, given: " << exprs.size(); @@ -1048,9 +1048,9 @@ public: private: bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final { - ui32 adjustArgsCount = isFactory ? 0 : 1; - ui32 minArgs = (0 + adjustArgsCount); - ui32 maxArgs = (1 + adjustArgsCount); + ui32 adjustArgsCount = isFactory ? 0 : 1; + ui32 minArgs = (0 + adjustArgsCount); + ui32 maxArgs = (1 + adjustArgsCount); if (exprs.size() < minArgs || exprs.size() > maxArgs) { ctx.Error(Pos) << "List aggregation " << (isFactory ? "factory " : "") << "function require " << minArgs << " or " << maxArgs << " arguments, given: " << exprs.size(); @@ -1112,7 +1112,7 @@ public: private: bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final { - ui32 adjustArgsCount = isFactory ? 0 : 1; + ui32 adjustArgsCount = isFactory ? 0 : 1; if (exprs.size() < (3 + adjustArgsCount) || exprs.size() > (7 + adjustArgsCount)) { ctx.Error(Pos) << "User defined aggregation function " << (isFactory ? "factory " : "") << " requires " << (3 + adjustArgsCount) << " to " << (7 + adjustArgsCount) << " arguments, given: " << exprs.size(); diff --git a/ydb/library/yql/sql/v0/builtin.cpp b/ydb/library/yql/sql/v0/builtin.cpp index a0945f9d05..d1382a78d7 100644 --- a/ydb/library/yql/sql/v0/builtin.cpp +++ b/ydb/library/yql/sql/v0/builtin.cpp @@ -310,8 +310,8 @@ public: ctx.Warning(Pos, TIssuesIds::YQL_DEPRECATED_INTERVAL_CONSTANT) << "Time prefix 'T' at end of interval constant"; } break; - default: - Y_FAIL("Unexpected data slot"); + default: + Y_FAIL("Unexpected data slot"); } if (NUdf::GetDataTypeInfo(*slot).Features & NUdf::TzDateType) { @@ -1584,9 +1584,9 @@ class THoppingTime final: public TAstListNode { public: THoppingTime(TPosition pos, const TVector<TNodePtr>& args = {}) : TAstListNode(pos) - { - Y_UNUSED(args); - } + { + Y_UNUSED(args); + } private: TNodePtr DoClone() const override { diff --git a/ydb/library/yql/sql/v0/context.cpp b/ydb/library/yql/sql/v0/context.cpp index a3a9013f2b..db80ce3587 100644 --- a/ydb/library/yql/sql/v0/context.cpp +++ b/ydb/library/yql/sql/v0/context.cpp @@ -18,7 +18,7 @@ namespace NSQLTranslationV0 { namespace { TNodePtr AddTablePathPrefix(TContext &ctx, TStringBuf prefixPath, const TDeferredAtom& path) { - Y_UNUSED(ctx); + Y_UNUSED(ctx); if (prefixPath.empty()) { return path.Build(); } diff --git a/ydb/library/yql/sql/v0/insert.cpp b/ydb/library/yql/sql/v0/insert.cpp index 1d8aec7134..38044742b5 100644 --- a/ydb/library/yql/sql/v0/insert.cpp +++ b/ydb/library/yql/sql/v0/insert.cpp @@ -84,7 +84,7 @@ public: } TNodePtr Build(TContext& ctx) override { - Y_UNUSED(ctx); + Y_UNUSED(ctx); YQL_ENSURE(Values.size() == ColumnsHint.size()); auto structObj = Y("AsStruct"); @@ -120,7 +120,7 @@ public: } bool DoInit(TContext& ctx, ISource* src) override { - Y_UNUSED(src); + Y_UNUSED(src); bool hasError = false; for (const auto& row: Values) { if (ColumnsHint.empty()) { @@ -144,7 +144,7 @@ public: } TNodePtr Build(TContext& ctx) override { - Y_UNUSED(ctx); + Y_UNUSED(ctx); auto tuple = Y(); for (const auto& row: Values) { auto rowValues = Y("AsStruct"); diff --git a/ydb/library/yql/sql/v0/join.cpp b/ydb/library/yql/sql/v0/join.cpp index 78f5affb49..119b0af56f 100644 --- a/ydb/library/yql/sql/v0/join.cpp +++ b/ydb/library/yql/sql/v0/join.cpp @@ -568,7 +568,7 @@ private: const TVector<std::pair<TJoinDescr::TFullColumn, TJoinDescr::TFullColumn>>& keys, bool left ) { - Y_UNUSED(ctx); + Y_UNUSED(ctx); auto res = Y(); for (auto& it: keys) { auto tableName = Sources[left ? it.first.Source : it.second.Source]->GetLabel(); diff --git a/ydb/library/yql/sql/v0/list_builtin.cpp b/ydb/library/yql/sql/v0/list_builtin.cpp index 64b016610d..7125799a38 100644 --- a/ydb/library/yql/sql/v0/list_builtin.cpp +++ b/ydb/library/yql/sql/v0/list_builtin.cpp @@ -98,7 +98,7 @@ TNodePtr TListProcessBuiltin::PrepareResult() { result = Y("Apply", Args[1], "item"); } - for (size_t i = 0; i < Args.size(); ++i) { + for (size_t i = 0; i < Args.size(); ++i) { if (i > 1) { result->Add(Args[i]); } diff --git a/ydb/library/yql/sql/v0/node.cpp b/ydb/library/yql/sql/v0/node.cpp index 57f0c6a561..fb31a63cbc 100644 --- a/ydb/library/yql/sql/v0/node.cpp +++ b/ydb/library/yql/sql/v0/node.cpp @@ -243,7 +243,7 @@ TAggregationPtr INode::GetAggregation() const { } INode::TPtr INode::WindowSpecFunc(const TPtr& type) const { - Y_UNUSED(type); + Y_UNUSED(type); return {}; } @@ -486,22 +486,22 @@ TString TCallNode::GetCallExplain() const { if (derivedName != OpName) { sb << ", converted to " << OpName << "()"; } - return std::move(sb); + return std::move(sb); } bool TCallNode::ValidateArguments(TContext& ctx) const { - const auto argsCount = static_cast<i32>(Args.size()); - if (MinArgs >= 0 && MaxArgs == MinArgs && argsCount != MinArgs) { + const auto argsCount = static_cast<i32>(Args.size()); + if (MinArgs >= 0 && MaxArgs == MinArgs && argsCount != MinArgs) { ctx.Error(Pos) << GetCallExplain() << " requires exactly " << MinArgs << " arguments, given: " << Args.size(); return false; } - if (MinArgs >= 0 && argsCount < MinArgs) { + if (MinArgs >= 0 && argsCount < MinArgs) { ctx.Error(Pos) << GetCallExplain() << " requires at least " << MinArgs << " arguments, given: " << Args.size(); return false; } - if (MaxArgs >= 0 && argsCount > MaxArgs) { + if (MaxArgs >= 0 && argsCount > MaxArgs) { ctx.Error(Pos) << GetCallExplain() << " requires at most " << MaxArgs << " arguments, given: " << Args.size(); return false; } @@ -1159,11 +1159,11 @@ TString ISource::MakeLocalName(const TString& name) { TStringBuilder str; str << name << iter->second; ++iter->second; - return std::move(str); + return std::move(str); } bool ISource::AddAggregation(TContext& ctx, TAggregationPtr aggr) { - Y_UNUSED(ctx); + Y_UNUSED(ctx); Aggregations.push_back(aggr); return true; } @@ -1177,8 +1177,8 @@ void ISource::AddWindowSpecs(TWinSpecs winSpecs) { } bool ISource::AddFuncOverWindow(TContext& ctx, TNodePtr expr) { - Y_UNUSED(ctx); - Y_UNUSED(expr); + Y_UNUSED(ctx); + Y_UNUSED(expr); return false; } @@ -1347,8 +1347,8 @@ bool ISource::SetSamplingOptions(TContext& ctx, } bool ISource::CalculateGroupingHint(TContext& ctx, const TVector<TString>& columns, ui64& hint) const { - Y_UNUSED(columns); - Y_UNUSED(hint); + Y_UNUSED(columns); + Y_UNUSED(hint); ctx.Error() << "Source not support grouping hint"; return false; } @@ -1503,7 +1503,7 @@ public: return iter != WindowMap.end() ? iter->second : 0; } size_t CreateWindowBySpec(const TString& windowName, const TWindowSpecificationPtr& winSpec) { - Y_UNUSED(windowName); + Y_UNUSED(windowName); auto curPartitions = winSpec->Partitions; auto curOrderBy = winSpec->OrderBy; auto partition = std::find_if(Partitions.begin(), Partitions.end(), [&curPartitions, &curOrderBy](const TWinPartition& other) { @@ -1559,7 +1559,7 @@ public: return Evals[frameId-1]; } TNodePtr BuildFrame(TPosition pos, size_t frameId) { - Y_UNUSED(frameId); + Y_UNUSED(frameId); /// \todo support not default frame return BuildLiteralVoid(pos); } @@ -1639,8 +1639,8 @@ TNodePtr ISource::BuildCalcOverWindow(TContext& ctx, const TString& label, const } TNodePtr ISource::BuildSort(TContext& ctx, const TString& label) { - Y_UNUSED(ctx); - Y_UNUSED(label); + Y_UNUSED(ctx); + Y_UNUSED(label); return nullptr; } @@ -1936,7 +1936,7 @@ TNodePtr TLiteralNumberNode<T>::DoClone() const { template<typename T> bool TLiteralNumberNode<T>::DoInit(TContext& ctx, ISource* src) { - Y_UNUSED(src); + Y_UNUSED(src); T val; if (!TryFromString(Value, val)) { ctx.Error(Pos) << "Failed to convert string: " << Value << " to " << Type << " value"; @@ -2132,13 +2132,13 @@ TArgPlaceholderNode::TArgPlaceholderNode(TPosition pos, const TString &name) : } bool TArgPlaceholderNode::DoInit(TContext& ctx, ISource* src) { - Y_UNUSED(src); + Y_UNUSED(src); ctx.Error(Pos) << Name << " can't be used as a part of expression."; return false; } TAstNode* TArgPlaceholderNode::Translate(TContext& ctx) const { - Y_UNUSED(ctx); + Y_UNUSED(ctx); return nullptr; } @@ -2497,7 +2497,7 @@ bool TCastNode<true>::DoInit(TContext& ctx, ISource* src) { } TNodePtr BuildCast(TContext& ctx, TPosition pos, TNodePtr expr, const TString& typeName, const TString& paramOne, const TString& paramTwo) { - Y_UNUSED(ctx); + Y_UNUSED(ctx); if (!expr) { return nullptr; } @@ -2505,7 +2505,7 @@ TNodePtr BuildCast(TContext& ctx, TPosition pos, TNodePtr expr, const TString& t } TNodePtr BuildBitCast(TContext& ctx, TPosition pos, TNodePtr expr, const TString& typeName, const TString& paramOne, const TString& paramTwo) { - Y_UNUSED(ctx); + Y_UNUSED(ctx); if (!expr) { return nullptr; } @@ -2717,7 +2717,7 @@ public: } bool DoInit(TContext& ctx, ISource* src) final { - Y_UNUSED(src); + Y_UNUSED(src); ctx.PushBlockShortcuts(); if (!Node->Init(ctx, FakeSource.Get())) { return false; diff --git a/ydb/library/yql/sql/v0/node.h b/ydb/library/yql/sql/v0/node.h index 4c7cfc273b..b690b1d41f 100644 --- a/ydb/library/yql/sql/v0/node.h +++ b/ydb/library/yql/sql/v0/node.h @@ -361,7 +361,7 @@ namespace NSQLTranslationV0 { protected: void DoUpdateState() const override; bool DoInit(TContext& ctx, ISource* src) override; - TPtr WindowSpecFunc(const TNodePtr& type) const override; + TPtr WindowSpecFunc(const TNodePtr& type) const override; public: TWinAggrEmulation(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args); protected: diff --git a/ydb/library/yql/sql/v0/query.cpp b/ydb/library/yql/sql/v0/query.cpp index 7752b28b01..cb7ee4fbb2 100644 --- a/ydb/library/yql/sql/v0/query.cpp +++ b/ydb/library/yql/sql/v0/query.cpp @@ -146,8 +146,8 @@ public: func == "regexp" || func == "regexp_strict" || func == "filter" || func == "filter_strict") { bool isRange = func.StartsWith("range"); bool isFilter = func.StartsWith("filter"); - size_t minArgs = isRange ? 1 : 2; - size_t maxArgs = isRange ? 5 : 4; + size_t minArgs = isRange ? 1 : 2; + size_t maxArgs = isRange ? 5 : 4; if (Args.size() < minArgs || Args.size() > maxArgs) { ctx.Error(Pos) << Func << " requires from " << minArgs << " to " << maxArgs << " arguments, but got: " << Args.size(); return nullptr; @@ -273,8 +273,8 @@ public: return each; } else if (func == "folder") { - size_t minArgs = 1; - size_t maxArgs = 2; + size_t minArgs = 1; + size_t maxArgs = 2; if (Args.size() < minArgs || Args.size() > maxArgs) { ctx.Error(Pos) << Func << " requires from " << minArgs << " to " << maxArgs << " arguments, but found: " << Args.size(); return nullptr; @@ -324,7 +324,7 @@ public: } bool DoInit(TContext& ctx, ISource* src) override { - Y_UNUSED(src); + Y_UNUSED(src); TSet<TString> used; for (auto& hint: Hints) { TMaybe<TIssue> normalizeError = NormalizeName(Pos, hint); @@ -860,8 +860,8 @@ public: : TClustersSinkOperationBase(pos, clusters) {} TPtr ProduceOperation(TContext& ctx, const TString& sinkName, const TString& service) override { - Y_UNUSED(ctx); - Y_UNUSED(service); + Y_UNUSED(ctx); + Y_UNUSED(service); return Y("let", "world", Y(TString(CommitName), "world", sinkName)); } }; @@ -1076,7 +1076,7 @@ public: } bool DoInit(TContext& ctx, ISource* src) override { - Y_UNUSED(src); + Y_UNUSED(src); TString serviceName; TString cluster; if (std::find(Providers.cbegin(), Providers.cend(), Prefix) != Providers.cend()) { diff --git a/ydb/library/yql/sql/v0/select.cpp b/ydb/library/yql/sql/v0/select.cpp index ef47023be1..9689f900d6 100644 --- a/ydb/library/yql/sql/v0/select.cpp +++ b/ydb/library/yql/sql/v0/select.cpp @@ -187,7 +187,7 @@ public: } bool IsGroupByColumn(const TString& column) const override { - Y_UNUSED(column); + Y_UNUSED(column); return false; } @@ -227,7 +227,7 @@ public: } TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) final { - Y_UNUSED(ctx); + Y_UNUSED(ctx); if (UseAllColumns) { return true; } @@ -596,7 +596,7 @@ public: ESampleMode mode, TNodePtr samplingRate, TNodePtr samplingSeed) override { - Y_UNUSED(pos); + Y_UNUSED(pos); TString modeName; if (!samplingSeed) { samplingSeed = Y("Int32", Q("0")); @@ -1066,7 +1066,7 @@ public: , Terms(terms) , Without(without) , Distinct(distinct) - , HoppingWindowSpec(hoppingWindowSpec) + , HoppingWindowSpec(hoppingWindowSpec) , Stream(stream) , Settings(settings) { @@ -1835,7 +1835,7 @@ public: private: TNodePtr BuildColumnsTerms(TContext& ctx) { - Y_UNUSED(ctx); + Y_UNUSED(ctx); TNodePtr terms; Y_VERIFY_DEBUG(Terms.size() == 1); if (Columns.All) { @@ -1913,7 +1913,7 @@ public: } bool CalculateGroupingHint(TContext& ctx, const TVector<TString>& columns, ui64& hint) const override { - Y_UNUSED(ctx); + Y_UNUSED(ctx); hint = 0; if (GroupByColumns.empty()) { for (const auto& groupByNode: GroupBy) { @@ -2121,7 +2121,7 @@ public: } TNodePtr Build(TContext& ctx) override { - Y_UNUSED(ctx); + Y_UNUSED(ctx); Y_FAIL("Unexpected call"); } diff --git a/ydb/library/yql/sql/v0/sql.cpp b/ydb/library/yql/sql/v0/sql.cpp index b028fc0439..84ba27ccd0 100644 --- a/ydb/library/yql/sql/v0/sql.cpp +++ b/ydb/library/yql/sql/v0/sql.cpp @@ -40,7 +40,7 @@ using NALP::SQLLexerTokens; TMutex SanitizerSQLTranslationMutex; #endif -using namespace NSQLGenerated; +using namespace NSQLGenerated; static TPosition GetPos(const TToken& token) { return TPosition(token.GetColumn(), token.GetLine()); @@ -410,8 +410,8 @@ static TVector<TString> TableHintsImpl(const TRule_table_hints& node, TTranslati PureColumnListStr(block.GetAlt2().GetRule_pure_column_list1(), ctx, hints); break; } - default: - Y_FAIL("You should change implementation according grammar changes"); + default: + Y_FAIL("You should change implementation according grammar changes"); } return hints; } @@ -943,8 +943,8 @@ TTableRef TSqlTranslation::TableRefImpl(const TRule_table_ref& node) { tr.Keys = BuildTableKeys(pos, cluster, func, args); break; } - default: - Y_FAIL("You should change implementation according grammar changes"); + default: + Y_FAIL("You should change implementation according grammar changes"); } TVector<TString> hints = GetContextHints(Ctx); if (node.HasBlock3()) { @@ -1236,7 +1236,7 @@ bool ParseNumbers(TContext& ctx, const TString& strOrig, ui64& value, TString& s value = 0; const TString digString(str.begin() + (base == 10 ? 0 : 2), str.end() - suffix.size()); for (const char& cur: digString) { - const ui64 curDigit = Char2DigitTable[static_cast<int>(cur)]; + const ui64 curDigit = Char2DigitTable[static_cast<int>(cur)]; if (curDigit >= base) { ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << strOrig << ", char: '" << cur << "' is out of base: " << base; @@ -1380,8 +1380,8 @@ TNodePtr TSqlExpression::UnaryExpr(const TUnarySubExprType& node) { ids.push_back(expr); break; } - default: - Y_FAIL("You should change implementation according grammar changes"); + default: + Y_FAIL("You should change implementation according grammar changes"); } bool isLookup = false; for (auto& b: node.GetBlock2()) { @@ -1423,8 +1423,8 @@ TNodePtr TSqlExpression::UnaryExpr(const TUnarySubExprType& node) { ids.push_back(IdOrString(bb.GetAlt3().GetRule_id_or_string1(), *this)); break; } - default: - Y_FAIL("You should change implementation according grammar changes"); + default: + Y_FAIL("You should change implementation according grammar changes"); } for (auto& b: dotBlock.GetBlock3()) { auto expr = KeyExpr(b.GetRule_key_expr1()); @@ -1801,8 +1801,8 @@ TNodePtr TSqlExpression::SubExpr(const TRule_con_subexpr& node) { Ctx.IncrementMonCounter("sql_unary_operations", opName); return BuildUnaryOp(pos, opName, UnaryExpr(node.GetAlt_con_subexpr2().GetRule_unary_subexpr2())); } - default: - Y_FAIL("You should change implementation according grammar changes"); + default: + Y_FAIL("You should change implementation according grammar changes"); } return nullptr; } @@ -1931,7 +1931,7 @@ TNodePtr TSqlExpression::SubExpr(const TRule_xor_subexpr& node) { bool madeIncrement = false; for (i64 i = lowerBound.size() - 1; i >=0 ; --i) { - if (!madeIncrement) { + if (!madeIncrement) { upperBound.append(lowerBound[i] + 1); madeIncrement = true; } else { @@ -3565,8 +3565,8 @@ bool TGroupByClause::GroupingElement(const TRule_grouping_element& node) { } break; } - default: - Y_FAIL("You should change implementation according grammar changes"); + default: + Y_FAIL("You should change implementation according grammar changes"); } return true; } @@ -3816,8 +3816,8 @@ TSourcePtr TSqlSelect::SelectKind(const TRule_select_kind& node, TPosition& sele case TRule_select_kind_TBlock2::kAlt3: res = SelectCore(node.GetBlock2().GetAlt3().GetRule_select_core1(), settings, selectPos); break; - default: - Y_FAIL("You should change implementation according grammar changes"); + default: + Y_FAIL("You should change implementation according grammar changes"); } return res; @@ -4012,8 +4012,8 @@ TNodePtr TSqlIntoTable::Build(const TRule_into_table_stmt& node) { case TRule_into_table_stmt_TBlock1::AltCase::kAlt6: modeTokens = {modeBlock.GetAlt6().GetToken1()}; break; - default: - Y_FAIL("You should change implementation according grammar changes"); + default: + Y_FAIL("You should change implementation according grammar changes"); } TVector<TString> modeStrings; @@ -4146,7 +4146,7 @@ TNodePtr TSqlIntoTable::Build(const TRule_into_table_stmt& node) { bool TSqlIntoTable::ValidateServiceName(const TRule_into_table_stmt& node, const TTableRef& table, ESQLWriteColumnMode mode, const TPosition& pos) { - Y_UNUSED(node); + Y_UNUSED(node); if (!table.Check(Ctx)) { return false; } diff --git a/ydb/library/yql/sql/v1/SQLv1.g.in b/ydb/library/yql/sql/v1/SQLv1.g.in index ef394c7a00..a401cfb899 100644 --- a/ydb/library/yql/sql/v1/SQLv1.g.in +++ b/ydb/library/yql/sql/v1/SQLv1.g.in @@ -91,14 +91,14 @@ unary_op: PLUS | MINUS | TILDA | NOT; unary_subexpr_suffix: (key_expr | invoke_expr |(DOT (bind_parameter | DIGITS | an_id_or_type)))* (COLLATE an_id)?; -unary_casual_subexpr: (id_expr | atom_expr) unary_subexpr_suffix; +unary_casual_subexpr: (id_expr | atom_expr) unary_subexpr_suffix; in_unary_casual_subexpr: (id_expr_in | in_atom_expr) unary_subexpr_suffix; -unary_subexpr: unary_casual_subexpr | json_api_expr; - +unary_subexpr: unary_casual_subexpr | json_api_expr; + in_unary_subexpr: in_unary_casual_subexpr | json_api_expr; - + list_literal: LBRACE_SQUARE expr_list? COMMA? RBRACE_SQUARE; expr_dict_list: expr (COLON expr)? (COMMA expr (COLON expr)?)*; @@ -153,44 +153,44 @@ lambda: smart_parenthesis (ARROW ((LPAREN expr RPAREN) | (LBRACE_CURLY lambda_bo in_expr: in_unary_subexpr; -// ANSI SQL JSON support -json_api_expr: json_value | json_exists | json_query; - +// ANSI SQL JSON support +json_api_expr: json_value | json_exists | json_query; + jsonpath_spec: STRING_VALUE; - -json_variable_name: id_expr | STRING_VALUE; - -json_variable: expr AS json_variable_name; - -json_variables: json_variable (COMMA json_variable)*; - -json_common_args: expr COMMA jsonpath_spec (PASSING json_variables)?; - -json_case_handler: ERROR | NULL | (DEFAULT expr); - -json_value: JSON_VALUE LPAREN - json_common_args - (RETURNING type_name_simple)? - (json_case_handler ON (EMPTY | ERROR))* -RPAREN; - -json_exists_handler: (TRUE | FALSE | UNKNOWN | ERROR) ON ERROR; - -json_exists: JSON_EXISTS LPAREN - json_common_args - json_exists_handler? -RPAREN; - -json_query_wrapper: (WITHOUT ARRAY?) | (WITH (CONDITIONAL | UNCONDITIONAL)? ARRAY?); -json_query_handler: ERROR | NULL | (EMPTY ARRAY) | (EMPTY OBJECT); - -json_query: JSON_QUERY LPAREN - json_common_args - (json_query_wrapper WRAPPER)? - (json_query_handler ON EMPTY)? - (json_query_handler ON ERROR)? -RPAREN; - + +json_variable_name: id_expr | STRING_VALUE; + +json_variable: expr AS json_variable_name; + +json_variables: json_variable (COMMA json_variable)*; + +json_common_args: expr COMMA jsonpath_spec (PASSING json_variables)?; + +json_case_handler: ERROR | NULL | (DEFAULT expr); + +json_value: JSON_VALUE LPAREN + json_common_args + (RETURNING type_name_simple)? + (json_case_handler ON (EMPTY | ERROR))* +RPAREN; + +json_exists_handler: (TRUE | FALSE | UNKNOWN | ERROR) ON ERROR; + +json_exists: JSON_EXISTS LPAREN + json_common_args + json_exists_handler? +RPAREN; + +json_query_wrapper: (WITHOUT ARRAY?) | (WITH (CONDITIONAL | UNCONDITIONAL)? ARRAY?); +json_query_handler: ERROR | NULL | (EMPTY ARRAY) | (EMPTY OBJECT); + +json_query: JSON_QUERY LPAREN + json_common_args + (json_query_wrapper WRAPPER)? + (json_query_handler ON EMPTY)? + (json_query_handler ON ERROR)? +RPAREN; + // struct, tuple or named list smart_parenthesis: LPAREN named_expr_list? COMMA? RPAREN; @@ -787,7 +787,7 @@ keyword_expr_uncompat: | CASE | CAST | CUBE - | CURRENT_DATE + | CURRENT_DATE | CURRENT_TIME | CURRENT_TIMESTAMP | EMPTY_ACTION @@ -795,9 +795,9 @@ keyword_expr_uncompat: | FROM | FULL | HOP - | JSON_EXISTS - | JSON_VALUE - | JSON_QUERY + | JSON_EXISTS + | JSON_VALUE + | JSON_QUERY | LOCAL | NOT | NULL @@ -863,7 +863,7 @@ keyword_compat: ( | ANALYZE | AND | ANSI - | ARRAY + | ARRAY | ASC | ASYNC | ATTACH @@ -877,7 +877,7 @@ keyword_compat: ( | CHECK | COLLATE | COMMIT - | CONDITIONAL + | CONDITIONAL | CONFLICT | CONSTRAINT | COVER @@ -900,14 +900,14 @@ keyword_compat: ( | DROP | EACH | ELSE - | EMPTY + | EMPTY | ENCRYPTED | END - | ERROR + | ERROR | ESCAPE | EVALUATE | EXCLUDE - | EXCLUSION + | EXCLUSION | EXCLUSIVE | EXPLAIN | EXPORT @@ -945,8 +945,8 @@ keyword_compat: ( | NATURAL | NO | NOTNULL - | NULLS - | OBJECT + | NULLS + | OBJECT | OF | OFFSET | ON @@ -956,8 +956,8 @@ keyword_compat: ( | OTHERS | OUTER | OVER - | PARTITION - | PASSING + | PARTITION + | PASSING | PASSWORD | PLAN | PRAGMA @@ -976,7 +976,7 @@ keyword_compat: ( | RESPECT | RESTRICT | RESULT - | RETURNING + | RETURNING | REVERT | RIGHT | RLIKE @@ -995,14 +995,14 @@ keyword_compat: ( | TABLESAMPLE | TEMP | TEMPORARY - | THEN + | THEN | TIES | TO | TRANSACTION | TRIGGER - | UNCONDITIONAL + | UNCONDITIONAL | UNIQUE - | UNKNOWN + | UNKNOWN | UPDATE | UPSERT | USER @@ -1012,7 +1012,7 @@ keyword_compat: ( | VIEW | VIRTUAL | WITH - | WRAPPER + | WRAPPER | XOR ); @@ -1121,7 +1121,7 @@ ANALYZE: A N A L Y Z E; AND: A N D; ANSI: A N S I; ANY: A N Y; -ARRAY: A R R A Y; +ARRAY: A R R A Y; AS: A S; ASC: A S C; ASSUME: A S S U M E; @@ -1146,7 +1146,7 @@ COLUMN: C O L U M N; COLUMNS: C O L U M N S; COMMIT: C O M M I T; COMPACT: C O M P A C T; -CONDITIONAL: C O N D I T I O N A L; +CONDITIONAL: C O N D I T I O N A L; CONFLICT: C O N F L I C T; CONSTRAINT: C O N S T R A I N T; COVER: C O V E R; @@ -1175,8 +1175,8 @@ DO: D O; DROP: D R O P; EACH: E A C H; ELSE: E L S E; -ERROR: E R R O R; -EMPTY: E M P T Y; +ERROR: E R R O R; +EMPTY: E M P T Y; EMPTY_ACTION: E M P T Y '_' A C T I O N; ENCRYPTED: E N C R Y P T E D; END: E N D; @@ -1243,7 +1243,7 @@ NOT: N O T; NOTNULL: N O T N U L L; NULL: N U L L; NULLS: N U L L S; -OBJECT: O B J E C T; +OBJECT: O B J E C T; OF: O F; OFFSET: O F F S E T; ON: O N; @@ -1255,7 +1255,7 @@ OTHERS: O T H E R S; OUTER: O U T E R; OVER: O V E R; PARTITION: P A R T I T I O N; -PASSING: P A S S I N G; +PASSING: P A S S I N G; PASSWORD: P A S S W O R D; PLAN: P L A N; PRAGMA: P R A G M A; @@ -1280,7 +1280,7 @@ RESPECT: R E S P E C T; RESTRICT: R E S T R I C T; RESULT: R E S U L T; RETURN: R E T U R N; -RETURNING: R E T U R N I N G; +RETURNING: R E T U R N I N G; REVERT: R E V E R T; RIGHT: R I G H T; RLIKE: R L I K E; @@ -1314,10 +1314,10 @@ TRANSACTION: T R A N S A C T I O N; TRIGGER: T R I G G E R; TUPLE: T U P L E; UNBOUNDED: U N B O U N D E D; -UNCONDITIONAL: U N C O N D I T I O N A L; +UNCONDITIONAL: U N C O N D I T I O N A L; UNION: U N I O N; UNIQUE: U N I Q U E; -UNKNOWN: U N K N O W N; +UNKNOWN: U N K N O W N; UPDATE: U P D A T E; UPSERT: U P S E R T; USE: U S E; @@ -1333,7 +1333,7 @@ WHERE: W H E R E; WINDOW: W I N D O W; WITH: W I T H; WITHOUT: W I T H O U T; -WRAPPER: W R A P P E R; +WRAPPER: W R A P P E R; XOR: X O R; TRUE: T R U E; FALSE: F A L S E; diff --git a/ydb/library/yql/sql/v1/aggregation.cpp b/ydb/library/yql/sql/v1/aggregation.cpp index 850e0e5056..fb3a694285 100644 --- a/ydb/library/yql/sql/v1/aggregation.cpp +++ b/ydb/library/yql/sql/v1/aggregation.cpp @@ -34,7 +34,7 @@ public: TAggregationFactory(TPosition pos, const TString& name, const TString& func, EAggregateMode aggMode, bool multi = false) : IAggregation(pos, name, func, aggMode), Factory(!func.empty() ? BuildBind(Pos, aggMode == EAggregateMode::OverWindow ? "window_module" : "aggregate_module", func) : nullptr), - Multi(multi), DynamicFactory(!Factory) + Multi(multi), DynamicFactory(!Factory) { if (!Factory) { FakeSource = BuildFakeSource(pos); @@ -369,7 +369,7 @@ public: private: bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final { - ui32 adjustArgsCount = isFactory ? 0 : 2; + ui32 adjustArgsCount = isFactory ? 0 : 2; if (exprs.size() != adjustArgsCount) { ctx.Error(Pos) << "Aggregation function " << (isFactory ? "factory " : "") << Name << " requires " << adjustArgsCount << " arguments, given: " << exprs.size(); @@ -1041,9 +1041,9 @@ public: private: bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final { - ui32 adjustArgsCount = isFactory ? 0 : 1; - ui32 minArgs = (0 + adjustArgsCount); - ui32 maxArgs = (1 + adjustArgsCount); + ui32 adjustArgsCount = isFactory ? 0 : 1; + ui32 minArgs = (0 + adjustArgsCount); + ui32 maxArgs = (1 + adjustArgsCount); if (exprs.size() < minArgs || exprs.size() > maxArgs) { ctx.Error(Pos) << "List aggregation " << (isFactory ? "factory " : "") << "function require " << minArgs << " or " << maxArgs << " arguments, given: " << exprs.size(); @@ -1117,7 +1117,7 @@ public: private: bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final { - ui32 adjustArgsCount = isFactory ? 0 : 1; + ui32 adjustArgsCount = isFactory ? 0 : 1; if (exprs.size() < (3 + adjustArgsCount) || exprs.size() > (7 + adjustArgsCount)) { ctx.Error(Pos) << "User defined aggregation function " << (isFactory ? "factory " : "") << " requires " << (3 + adjustArgsCount) << " to " << (7 + adjustArgsCount) << " arguments, given: " << exprs.size(); diff --git a/ydb/library/yql/sql/v1/builtin.cpp b/ydb/library/yql/sql/v1/builtin.cpp index 43ef1243f1..fb7ea911b7 100644 --- a/ydb/library/yql/sql/v1/builtin.cpp +++ b/ydb/library/yql/sql/v1/builtin.cpp @@ -404,8 +404,8 @@ public: return false; } break; - default: - Y_FAIL("Unexpected data slot"); + default: + Y_FAIL("Unexpected data slot"); } if (NUdf::GetDataTypeInfo(*slot).Features & NUdf::TzDateType) { @@ -980,9 +980,9 @@ TString NormalizeTypeString(const TString& str) { if (ret.StartsWith("Tz")) { ret = "Tz" + to_title(ret.substr(2)); } - if (ret.StartsWith("Json")) { - ret = "Json" + to_title(ret.substr(4)); - } + if (ret.StartsWith("Json")) { + ret = "Json" + to_title(ret.substr(4)); + } if (ret.StartsWith("Dy")) { ret = "Dy" + to_title(ret.substr(2)); } @@ -990,7 +990,7 @@ TString NormalizeTypeString(const TString& str) { return ret; } -static const TSet<TString> AvailableDataTypes = {"Bool", "String", "Uint32", "Uint64", "Int32", "Int64", "Float", "Double", "Utf8", "Yson", "Json", "JsonDocument", +static const TSet<TString> AvailableDataTypes = {"Bool", "String", "Uint32", "Uint64", "Int32", "Int64", "Float", "Double", "Utf8", "Yson", "Json", "JsonDocument", "Date", "Datetime", "Timestamp", "Interval", "Uint8", "Int8", "Uint16", "Int16", "TzDate", "TzDatetime", "TzTimestamp", "Uuid", "Decimal", "DyNumber"}; TNodePtr GetDataTypeStringNode(TContext& ctx, TCallNode& node, unsigned argNum, TString* outTypeStrPtr = nullptr) { auto errMsgFunc = [&node, argNum]() { @@ -2223,9 +2223,9 @@ class THoppingTime final: public TAstListNode { public: THoppingTime(TPosition pos, const TVector<TNodePtr>& args = {}) : TAstListNode(pos) - { - Y_UNUSED(args); - } + { + Y_UNUSED(args); + } private: TNodePtr DoClone() const override { @@ -2940,8 +2940,8 @@ TNodePtr BuildBuiltinFunc(TContext& ctx, TPosition pos, TString name, const TVec case NKikimr::NMiniKQL::EScriptType::SystemPython2: scriptType = NKikimr::NMiniKQL::EScriptType::Python2; break; - default: - break; + default: + break; } if (ns == "yql" || ns == "@yql") { @@ -3175,38 +3175,38 @@ TNodePtr BuildBuiltinFunc(TContext& ctx, TPosition pos, TString name, const TVec return args[1]; } else { Y_VERIFY_DEBUG(dynamic_cast<TStructNode*>(args[1].Get())); - auto namedArgs = static_cast<TStructNode*>(args[1].Get()); - return new TStructTypeNode(pos, namedArgs->GetExprs()); + auto namedArgs = static_cast<TStructNode*>(args[1].Get()); + return new TStructTypeNode(pos, namedArgs->GetExprs()); } } } return new TInvalidBuiltin(pos, TStringBuilder() << (normalizedName == "asstruct" ? "AsStruct" : "StructType") << " requires all argument to be named"); - } else if (normalizedName == "expandstruct") { + } else if (normalizedName == "expandstruct") { if (mustUseNamed) { if (!*mustUseNamed) { return new TInvalidBuiltin(pos, TStringBuilder() << "ExpandStruct requires at least one named argument"); } *mustUseNamed = false; - } - YQL_ENSURE(args.size() == 2); - auto posArgs = static_cast<TTupleNode*>(args[0].Get()); - Y_VERIFY_DEBUG(dynamic_cast<TTupleNode*>(args[0].Get())); - Y_VERIFY_DEBUG(dynamic_cast<TStructNode*>(args[1].Get())); - if (posArgs->GetTupleSize() != 1) { - return new TInvalidBuiltin(pos, TStringBuilder() << "ExpandStruct requires all arguments except first to be named"); - } - - TVector<TNodePtr> flattenMembersArgs = { - BuildTuple(pos, {BuildQuotedAtom(pos, ""), posArgs->GetTupleElement(0)}), - BuildTuple(pos, {BuildQuotedAtom(pos, ""), args[1]}), - }; - return new TCallNodeImpl(pos, "FlattenMembers", 2, 2, flattenMembersArgs); + } + YQL_ENSURE(args.size() == 2); + auto posArgs = static_cast<TTupleNode*>(args[0].Get()); + Y_VERIFY_DEBUG(dynamic_cast<TTupleNode*>(args[0].Get())); + Y_VERIFY_DEBUG(dynamic_cast<TStructNode*>(args[1].Get())); + if (posArgs->GetTupleSize() != 1) { + return new TInvalidBuiltin(pos, TStringBuilder() << "ExpandStruct requires all arguments except first to be named"); + } + + TVector<TNodePtr> flattenMembersArgs = { + BuildTuple(pos, {BuildQuotedAtom(pos, ""), posArgs->GetTupleElement(0)}), + BuildTuple(pos, {BuildQuotedAtom(pos, ""), args[1]}), + }; + return new TCallNodeImpl(pos, "FlattenMembers", 2, 2, flattenMembersArgs); } else if (normalizedName == "sqlexternalfunction") { return new TCallNodeImpl(pos, "SqlExternalFunction", args); - } else { - return new TInvalidBuiltin(pos, TStringBuilder() << "Unknown builtin: " << name); + } else { + return new TInvalidBuiltin(pos, TStringBuilder() << "Unknown builtin: " << name); } } @@ -3223,8 +3223,8 @@ TNodePtr BuildBuiltinFunc(TContext& ctx, TPosition pos, TString name, const TVec TNodePtr customUserType = nullptr; if (ns == "json") { - ctx.Warning(pos, TIssuesIds::YQL_DEPRECATED_JSON_UDF) << "Json UDF is deprecated. Please use JSON API instead"; - + ctx.Warning(pos, TIssuesIds::YQL_DEPRECATED_JSON_UDF) << "Json UDF is deprecated. Please use JSON API instead"; + ns = "yson"; nameSpace = "Yson"; if (name == "Serialize") { diff --git a/ydb/library/yql/sql/v1/context.cpp b/ydb/library/yql/sql/v1/context.cpp index c08fe96cae..d3de353406 100644 --- a/ydb/library/yql/sql/v1/context.cpp +++ b/ydb/library/yql/sql/v1/context.cpp @@ -19,7 +19,7 @@ namespace NSQLTranslationV1 { namespace { TNodePtr AddTablePathPrefix(TContext& ctx, TStringBuf prefixPath, const TDeferredAtom& path) { - Y_UNUSED(ctx); + Y_UNUSED(ctx); if (prefixPath.empty()) { return path.Build(); } diff --git a/ydb/library/yql/sql/v1/context.h b/ydb/library/yql/sql/v1/context.h index 8951757486..64fc147b52 100644 --- a/ydb/library/yql/sql/v1/context.h +++ b/ydb/library/yql/sql/v1/context.h @@ -236,7 +236,7 @@ namespace NSQLTranslationV1 { bool EnableSystemColumns = true; bool DqEngineEnable = false; bool DqEngineForce = false; - TMaybe<bool> JsonQueryReturnsJsonDocument; + TMaybe<bool> JsonQueryReturnsJsonDocument; TMaybe<bool> AnsiInForEmptyOrNullableItemsCollections; TMaybe<bool> AnsiRankForNullableKeys = true; TMaybe<bool> AnsiOrderByLimitInUnionAll = true; diff --git a/ydb/library/yql/sql/v1/insert.cpp b/ydb/library/yql/sql/v1/insert.cpp index 4f4c863018..5209dcea28 100644 --- a/ydb/library/yql/sql/v1/insert.cpp +++ b/ydb/library/yql/sql/v1/insert.cpp @@ -83,7 +83,7 @@ public: } TNodePtr Build(TContext& ctx) override { - Y_UNUSED(ctx); + Y_UNUSED(ctx); YQL_ENSURE(Values.size() == ColumnsHint.size()); auto structObj = Y("AsStruct"); @@ -119,7 +119,7 @@ public: } bool DoInit(TContext& ctx, ISource* src) override { - Y_UNUSED(src); + Y_UNUSED(src); bool hasError = false; for (const auto& row: Values) { if (ColumnsHint.empty()) { @@ -143,7 +143,7 @@ public: } TNodePtr Build(TContext& ctx) override { - Y_UNUSED(ctx); + Y_UNUSED(ctx); auto tuple = Y(); for (const auto& row: Values) { auto rowValues = Y("AsStruct"); // ordered struct diff --git a/ydb/library/yql/sql/v1/join.cpp b/ydb/library/yql/sql/v1/join.cpp index 29f5b3ec4b..ef18edb7ac 100644 --- a/ydb/library/yql/sql/v1/join.cpp +++ b/ydb/library/yql/sql/v1/join.cpp @@ -626,7 +626,7 @@ private: const TVector<std::pair<TJoinDescr::TFullColumn, TJoinDescr::TFullColumn>>& keys, bool left ) { - Y_UNUSED(ctx); + Y_UNUSED(ctx); auto res = Y(); for (auto& it: keys) { auto tableName = Sources[left ? it.first.Source : it.second.Source]->GetLabel(); diff --git a/ydb/library/yql/sql/v1/node.cpp b/ydb/library/yql/sql/v1/node.cpp index 870bbd9cc7..30671a7972 100644 --- a/ydb/library/yql/sql/v1/node.cpp +++ b/ydb/library/yql/sql/v1/node.cpp @@ -261,7 +261,7 @@ void INode::CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode: } INode::TPtr INode::WindowSpecFunc(const TPtr& type) const { - Y_UNUSED(type); + Y_UNUSED(type); return {}; } @@ -566,7 +566,7 @@ TString TCallNode::GetCallExplain() const { if (derivedName != OpName) { sb << ", converted to " << OpName << "()"; } - return std::move(sb); + return std::move(sb); } void TCallNode::CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) { @@ -576,18 +576,18 @@ void TCallNode::CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<IN } bool TCallNode::ValidateArguments(TContext& ctx) const { - const auto argsCount = static_cast<i32>(Args.size()); - if (MinArgs >= 0 && MaxArgs == MinArgs && argsCount != MinArgs) { + const auto argsCount = static_cast<i32>(Args.size()); + if (MinArgs >= 0 && MaxArgs == MinArgs && argsCount != MinArgs) { ctx.Error(Pos) << GetCallExplain() << " requires exactly " << MinArgs << " arguments, given: " << Args.size(); return false; } - if (MinArgs >= 0 && argsCount < MinArgs) { + if (MinArgs >= 0 && argsCount < MinArgs) { ctx.Error(Pos) << GetCallExplain() << " requires at least " << MinArgs << " arguments, given: " << Args.size(); return false; } - if (MaxArgs >= 0 && argsCount > MaxArgs) { + if (MaxArgs >= 0 && argsCount > MaxArgs) { ctx.Error(Pos) << GetCallExplain() << " requires at most " << MaxArgs << " arguments, given: " << Args.size(); return false; } @@ -1411,11 +1411,11 @@ TString ISource::MakeLocalName(const TString& name) { TStringBuilder str; str << name << iter->second; ++iter->second; - return std::move(str); + return std::move(str); } bool ISource::AddAggregation(TContext& ctx, TAggregationPtr aggr) { - Y_UNUSED(ctx); + Y_UNUSED(ctx); YQL_ENSURE(aggr); Aggregations.push_back(aggr); return true; @@ -1430,8 +1430,8 @@ void ISource::AddWindowSpecs(TWinSpecs winSpecs) { } bool ISource::AddFuncOverWindow(TContext& ctx, TNodePtr expr) { - Y_UNUSED(ctx); - Y_UNUSED(expr); + Y_UNUSED(ctx); + Y_UNUSED(expr); return false; } @@ -1617,8 +1617,8 @@ bool ISource::SetTableHints(TContext& ctx, TPosition pos, const TTableHints& hin } bool ISource::CalculateGroupingHint(TContext& ctx, const TVector<TString>& columns, ui64& hint) const { - Y_UNUSED(columns); - Y_UNUSED(hint); + Y_UNUSED(columns); + Y_UNUSED(hint); ctx.Error() << "Source not support grouping hint"; return false; } @@ -1982,8 +1982,8 @@ TNodePtr ISource::BuildCalcOverWindow(TContext& ctx, const TString& label) { } TNodePtr ISource::BuildSort(TContext& ctx, const TString& label) { - Y_UNUSED(ctx); - Y_UNUSED(label); + Y_UNUSED(ctx); + Y_UNUSED(label); return nullptr; } @@ -2386,7 +2386,7 @@ TNodePtr TLiteralNumberNode<T>::DoClone() const { template<typename T> bool TLiteralNumberNode<T>::DoInit(TContext& ctx, ISource* src) { - Y_UNUSED(src); + Y_UNUSED(src); T val; if (!TryFromString(Value, val)) { ctx.Error(Pos) << "Failed to parse " << Value << " as integer literal of " << Type << " type: value out of range for " << Type; @@ -2701,13 +2701,13 @@ TArgPlaceholderNode::TArgPlaceholderNode(TPosition pos, const TString &name) : } bool TArgPlaceholderNode::DoInit(TContext& ctx, ISource* src) { - Y_UNUSED(src); + Y_UNUSED(src); ctx.Error(Pos) << Name << " can't be used as a part of expression."; return false; } TAstNode* TArgPlaceholderNode::Translate(TContext& ctx) const { - Y_UNUSED(ctx); + Y_UNUSED(ctx); return nullptr; } @@ -3282,7 +3282,7 @@ public: } bool DoInit(TContext& ctx, ISource* src) final { - Y_UNUSED(src); + Y_UNUSED(src); if (!Node->Init(ctx, FakeSource.Get())) { return false; } diff --git a/ydb/library/yql/sql/v1/query.cpp b/ydb/library/yql/sql/v1/query.cpp index 725356b9f5..9349bfd619 100644 --- a/ydb/library/yql/sql/v1/query.cpp +++ b/ydb/library/yql/sql/v1/query.cpp @@ -229,8 +229,8 @@ public: func == "regexp" || func == "regexp_strict" || func == "filter" || func == "filter_strict") { bool isRange = func.StartsWith("range"); bool isFilter = func.StartsWith("filter"); - size_t minArgs = isRange ? 1 : 2; - size_t maxArgs = isRange ? 5 : 4; + size_t minArgs = isRange ? 1 : 2; + size_t maxArgs = isRange ? 5 : 4; if (Args.size() < minArgs || Args.size() > maxArgs) { ctx.Error(Pos) << Func << " requires from " << minArgs << " to " << maxArgs << " arguments, but got: " << Args.size(); return nullptr; @@ -366,8 +366,8 @@ public: return each; } else if (func == "folder") { - size_t minArgs = 1; - size_t maxArgs = 2; + size_t minArgs = 1; + size_t maxArgs = 2; if (Args.size() < minArgs || Args.size() > maxArgs) { ctx.Error(Pos) << Func << " requires from " << minArgs << " to " << maxArgs << " arguments, but found: " << Args.size(); return nullptr; @@ -1672,14 +1672,14 @@ public: BuildQuotedAtom(Pos, "DqEngine"), BuildQuotedAtom(Pos, mode)))); } - if (ctx.JsonQueryReturnsJsonDocument.Defined()) { - TString pragmaName = "DisableJsonQueryReturnsJsonDocument"; - if (*ctx.JsonQueryReturnsJsonDocument) { - pragmaName = "JsonQueryReturnsJsonDocument"; - } - - Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, BuildQuotedAtom(Pos, pragmaName)))); - } + if (ctx.JsonQueryReturnsJsonDocument.Defined()) { + TString pragmaName = "DisableJsonQueryReturnsJsonDocument"; + if (*ctx.JsonQueryReturnsJsonDocument) { + pragmaName = "JsonQueryReturnsJsonDocument"; + } + + Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, BuildQuotedAtom(Pos, pragmaName)))); + } if (ctx.OrderedColumns) { Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, @@ -1791,7 +1791,7 @@ public: } bool DoInit(TContext& ctx, ISource* src) override { - Y_UNUSED(src); + Y_UNUSED(src); TString serviceName; TString cluster; if (std::find(Providers.cbegin(), Providers.cend(), Prefix) != Providers.cend()) { diff --git a/ydb/library/yql/sql/v1/select.cpp b/ydb/library/yql/sql/v1/select.cpp index 2841f05a5b..ee71410b7c 100644 --- a/ydb/library/yql/sql/v1/select.cpp +++ b/ydb/library/yql/sql/v1/select.cpp @@ -226,7 +226,7 @@ public: } bool IsGroupByColumn(const TString& column) const override { - Y_UNUSED(column); + Y_UNUSED(column); return false; } @@ -272,7 +272,7 @@ public: } TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) final { - Y_UNUSED(ctx); + Y_UNUSED(ctx); if (UseAllColumns) { return true; } @@ -720,15 +720,15 @@ public: return true; } - bool SetTableHints(TContext& ctx, TPosition pos, const TTableHints& hints, const TTableHints& contextHints) override { - Y_UNUSED(ctx); + bool SetTableHints(TContext& ctx, TPosition pos, const TTableHints& hints, const TTableHints& contextHints) override { + Y_UNUSED(ctx); TTableHints merged = contextHints; MergeHints(merged, hints); Table.Options = BuildInputOptions(pos, merged); return true; } - bool SetViewName(TContext& ctx, TPosition pos, const TString& view) override { + bool SetViewName(TContext& ctx, TPosition pos, const TString& view) override { return Table.Keys->SetViewName(ctx, pos, view); } @@ -788,7 +788,7 @@ public: return true; } - bool SetViewName(TContext& ctx, TPosition pos, const TString& view) override { + bool SetViewName(TContext& ctx, TPosition pos, const TString& view) override { Y_UNUSED(ctx); ViewPos = pos; View = view; @@ -1337,7 +1337,7 @@ public: return Source.Get(); } - TWriteSettings GetWriteSettings() const override { + TWriteSettings GetWriteSettings() const override { return Settings; } @@ -1411,7 +1411,7 @@ public: , Source(std::move(source)) , GroupByExpr(groupByExpr) , GroupBy(groupBy) - , AssumeSorted(assumeSorted) + , AssumeSorted(assumeSorted) , CompactGroupBy(compactGroupBy) , OrderBy(orderBy) , Having(having) @@ -1419,7 +1419,7 @@ public: , Terms(terms) , Without(without) , Distinct(distinct) - , HoppingWindowSpec(hoppingWindowSpec) + , HoppingWindowSpec(hoppingWindowSpec) , SelectStream(selectStream) , Settings(settings) { @@ -2321,7 +2321,7 @@ public: private: TNodePtr BuildColumnsTerms(TContext& ctx) { - Y_UNUSED(ctx); + Y_UNUSED(ctx); TNodePtr terms; Y_VERIFY_DEBUG(Terms.size() == 1); if (Columns.All) { @@ -2404,7 +2404,7 @@ public: } bool CalculateGroupingHint(TContext& ctx, const TVector<TString>& columns, ui64& hint) const override { - Y_UNUSED(ctx); + Y_UNUSED(ctx); hint = 0; if (GroupByColumns.empty()) { for (const auto& groupByNode: GroupBy) { @@ -2658,7 +2658,7 @@ public: } TNodePtr Build(TContext& ctx) override { - Y_UNUSED(ctx); + Y_UNUSED(ctx); Y_FAIL("Unexpected call"); } diff --git a/ydb/library/yql/sql/v1/sql.cpp b/ydb/library/yql/sql/v1/sql.cpp index ab5a864788..1ac1a814ca 100644 --- a/ydb/library/yql/sql/v1/sql.cpp +++ b/ydb/library/yql/sql/v1/sql.cpp @@ -48,7 +48,7 @@ using NALPDefault::SQLv1LexerTokens; TMutex SanitizerSQLTranslationMutex; #endif -using namespace NSQLv1Generated; +using namespace NSQLv1Generated; static TPosition GetPos(const TToken& token) { return TPosition(token.GetColumn(), token.GetLine()); @@ -947,8 +947,8 @@ private: TMaybe<TExprOrIdent> InAtomExpr(const TRule_in_atom_expr& node, const TTrailingQuestions& tail); TNodePtr JsonInputArg(const TRule_json_common_args& node); - TNodePtr JsonPathSpecification(const TRule_jsonpath_spec& node); - TNodePtr JsonReturningTypeRule(const TRule_type_name_simple& node); + TNodePtr JsonPathSpecification(const TRule_jsonpath_spec& node); + TNodePtr JsonReturningTypeRule(const TRule_type_name_simple& node); TNodePtr JsonValueCaseHandler(const TRule_json_case_handler& node, EJsonValueHandlerMode& mode); void AddJsonValueCaseHandlers(const TRule_json_value& node, TVector<TNodePtr>& children); void AddJsonVariable(const TRule_json_variable& node, TVector<TNodePtr>& children); @@ -959,13 +959,13 @@ private: void AddJsonExistsHandler(const TRule_json_exists& node, TVector<TNodePtr>& children); TNodePtr JsonExistsExpr(const TRule_json_exists& node); EJsonQueryWrap JsonQueryWrapper(const TRule_json_query& node); - EJsonQueryHandler JsonQueryHandler(const TRule_json_query_handler& node); + EJsonQueryHandler JsonQueryHandler(const TRule_json_query_handler& node); TNodePtr JsonQueryExpr(const TRule_json_query& node); TNodePtr JsonApiExpr(const TRule_json_api_expr& node); - - template<typename TUnaryCasualExprRule> + + template<typename TUnaryCasualExprRule> TNodePtr UnaryCasualExpr(const TUnaryCasualExprRule& node, const TTrailingQuestions& tail); - + template<typename TUnarySubExprRule> TNodePtr UnaryExpr(const TUnarySubExprRule& node, const TTrailingQuestions& tail); @@ -1721,8 +1721,8 @@ bool TSqlTranslation::TableRefImpl(const TRule_table_ref& node, TTableRef& resul result.Source = ret; return true; } - default: - Y_FAIL("You should change implementation according to grammar changes"); + default: + Y_FAIL("You should change implementation according to grammar changes"); } MergeHints(hints, tableHints); @@ -2479,8 +2479,8 @@ TNodePtr TSqlTranslation::TypeNodeOrBind(const TRule_type_name_or_bind& node) { } return GetNamedNode(bindName); } - default: - Y_FAIL("You should change implementation according to grammar changes"); + default: + Y_FAIL("You should change implementation according to grammar changes"); } } @@ -3057,8 +3057,8 @@ TMaybe<TTableHints> TSqlTranslation::TableHintsImpl(const TRule_table_hints& nod break; } - default: - Y_FAIL("You should change implementation according to grammar changes"); + default: + Y_FAIL("You should change implementation according to grammar changes"); } if (hasErrors) { return Nothing(); @@ -3643,7 +3643,7 @@ bool ParseNumbers(TContext& ctx, const TString& strOrig, ui64& value, TString& s value = 0; const TString digString(str.begin() + (base == 10 ? 0 : 2), str.end() - suffix.size()); for (const char& cur: digString) { - const ui64 curDigit = Char2DigitTable[static_cast<int>(cur)]; + const ui64 curDigit = Char2DigitTable[static_cast<int>(cur)]; if (curDigit >= base) { ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << strOrig << ", char: '" << cur << "' is out of base: " << base; @@ -3756,71 +3756,71 @@ TMaybe<TExprOrIdent> TSqlExpression::LiteralExpr(const TRule_literal_value& node template<typename TUnarySubExprType> TNodePtr TSqlExpression::UnaryExpr(const TUnarySubExprType& node, const TTrailingQuestions& tail) { - if constexpr (std::is_same_v<TUnarySubExprType, TRule_unary_subexpr>) { - if (node.Alt_case() == TRule_unary_subexpr::kAltUnarySubexpr1) { + if constexpr (std::is_same_v<TUnarySubExprType, TRule_unary_subexpr>) { + if (node.Alt_case() == TRule_unary_subexpr::kAltUnarySubexpr1) { return UnaryCasualExpr(node.GetAlt_unary_subexpr1().GetRule_unary_casual_subexpr1(), tail); } else if (tail.Count) { UnexpectedQuestionToken(tail); return {}; - } else { - return JsonApiExpr(node.GetAlt_unary_subexpr2().GetRule_json_api_expr1()); - } - } else { - if (node.Alt_case() == TRule_in_unary_subexpr::kAltInUnarySubexpr1) { + } else { + return JsonApiExpr(node.GetAlt_unary_subexpr2().GetRule_json_api_expr1()); + } + } else { + if (node.Alt_case() == TRule_in_unary_subexpr::kAltInUnarySubexpr1) { return UnaryCasualExpr(node.GetAlt_in_unary_subexpr1().GetRule_in_unary_casual_subexpr1(), tail); } else if (tail.Count) { UnexpectedQuestionToken(tail); return {}; - } else { + } else { return JsonApiExpr(node.GetAlt_in_unary_subexpr2().GetRule_json_api_expr1()); - } - } -} + } + } +} -TNodePtr TSqlExpression::JsonPathSpecification(const TRule_jsonpath_spec& node) { - /* +TNodePtr TSqlExpression::JsonPathSpecification(const TRule_jsonpath_spec& node) { + /* jsonpath_spec: STRING_VALUE; - */ + */ TString value = Token(node.GetToken1()); TPosition pos = Ctx.Pos(); - + auto parsed = StringContent(Ctx, pos, value); if (!parsed) { - return nullptr; - } + return nullptr; + } return new TCallNodeImpl(pos, "Utf8", {BuildQuotedAtom(pos, parsed->Content, parsed->Flags)}); -} - -TNodePtr TSqlExpression::JsonReturningTypeRule(const TRule_type_name_simple& node) { - /* - (RETURNING type_name_simple)? - */ +} + +TNodePtr TSqlExpression::JsonReturningTypeRule(const TRule_type_name_simple& node) { + /* + (RETURNING type_name_simple)? + */ return TypeSimple(node, /* onlyDataAllowed */ true); -} - +} + TNodePtr TSqlExpression::JsonInputArg(const TRule_json_common_args& node) { /* json_common_args: expr COMMA jsonpath_spec (PASSING json_variables)?; */ TNodePtr jsonExpr = Build(node.GetRule_expr1()); - if (!jsonExpr || jsonExpr->IsNull()) { - jsonExpr = new TCallNodeImpl(Ctx.Pos(), "Nothing", { - new TCallNodeImpl(Ctx.Pos(), "OptionalType", {BuildDataType(Ctx.Pos(), "Json")}) - }); - } - - return jsonExpr; -} - + if (!jsonExpr || jsonExpr->IsNull()) { + jsonExpr = new TCallNodeImpl(Ctx.Pos(), "Nothing", { + new TCallNodeImpl(Ctx.Pos(), "OptionalType", {BuildDataType(Ctx.Pos(), "Json")}) + }); + } + + return jsonExpr; +} + void TSqlExpression::AddJsonVariable(const TRule_json_variable& node, TVector<TNodePtr>& children) { - /* - json_variable: expr AS json_variable_name; - */ - TNodePtr expr; - TString rawName; - TPosition namePos = Ctx.Pos(); - ui32 nameFlags = 0; - + /* + json_variable: expr AS json_variable_name; + */ + TNodePtr expr; + TString rawName; + TPosition namePos = Ctx.Pos(); + ui32 nameFlags = 0; + expr = Build(node.GetRule_expr1()); const auto& nameRule = node.GetRule_json_variable_name3(); switch (nameRule.GetAltCase()) { @@ -3834,165 +3834,165 @@ void TSqlExpression::AddJsonVariable(const TRule_json_variable& node, TVector<TN auto parsed = StringContentOrIdContent(Ctx, namePos, token.GetValue()); if (!parsed) { return; - } + } rawName = parsed->Content; nameFlags = parsed->Flags; break; - } + } default: Y_FAIL("You should change implementation according to grammar changes"); - } - - TNodePtr nameExpr = BuildQuotedAtom(namePos, rawName, nameFlags); - children.push_back(BuildTuple(namePos, {nameExpr, expr})); -} - + } + + TNodePtr nameExpr = BuildQuotedAtom(namePos, rawName, nameFlags); + children.push_back(BuildTuple(namePos, {nameExpr, expr})); +} + void TSqlExpression::AddJsonVariables(const TRule_json_variables& node, TVector<TNodePtr>& children) { - /* - json_variables: json_variable (COMMA json_variable)*; - */ + /* + json_variables: json_variable (COMMA json_variable)*; + */ AddJsonVariable(node.GetRule_json_variable1(), children); - for (size_t i = 0; i < node.Block2Size(); i++) { - AddJsonVariable(node.GetBlock2(i).GetRule_json_variable2(), children); - } -} - + for (size_t i = 0; i < node.Block2Size(); i++) { + AddJsonVariable(node.GetBlock2(i).GetRule_json_variable2(), children); + } +} + TNodePtr TSqlExpression::JsonVariables(const TRule_json_common_args& node) { - /* + /* json_common_args: expr COMMA jsonpath_spec (PASSING json_variables)?; - */ - TVector<TNodePtr> variables; - TPosition pos = Ctx.Pos(); - if (node.HasBlock4()) { - const auto& block = node.GetBlock4(); - pos = GetPos(block.GetToken1()); + */ + TVector<TNodePtr> variables; + TPosition pos = Ctx.Pos(); + if (node.HasBlock4()) { + const auto& block = node.GetBlock4(); + pos = GetPos(block.GetToken1()); AddJsonVariables(block.GetRule_json_variables2(), variables); - } - return new TCallNodeImpl(pos, "JsonVariables", variables); -} - + } + return new TCallNodeImpl(pos, "JsonVariables", variables); +} + void TSqlExpression::AddJsonCommonArgs(const TRule_json_common_args& node, TVector<TNodePtr>& children) { - /* - json_common_args: expr COMMA jsonpath_spec (PASSING json_variables)?; - */ - TNodePtr jsonExpr = JsonInputArg(node); - TNodePtr jsonPath = JsonPathSpecification(node.GetRule_jsonpath_spec3()); - TNodePtr variables = JsonVariables(node); - - children.push_back(jsonExpr); - children.push_back(jsonPath); - children.push_back(variables); -} - + /* + json_common_args: expr COMMA jsonpath_spec (PASSING json_variables)?; + */ + TNodePtr jsonExpr = JsonInputArg(node); + TNodePtr jsonPath = JsonPathSpecification(node.GetRule_jsonpath_spec3()); + TNodePtr variables = JsonVariables(node); + + children.push_back(jsonExpr); + children.push_back(jsonPath); + children.push_back(variables); +} + TNodePtr TSqlExpression::JsonValueCaseHandler(const TRule_json_case_handler& node, EJsonValueHandlerMode& mode) { - /* - json_case_handler: ERROR | NULL | (DEFAULT expr); - */ - + /* + json_case_handler: ERROR | NULL | (DEFAULT expr); + */ + switch (node.GetAltCase()) { case TRule_json_case_handler::kAltJsonCaseHandler1: { const auto pos = GetPos(node.GetAlt_json_case_handler1().GetToken1()); mode = EJsonValueHandlerMode::Error; return new TCallNodeImpl(pos, "Null", {}); - } + } case TRule_json_case_handler::kAltJsonCaseHandler2: { const auto pos = GetPos(node.GetAlt_json_case_handler2().GetToken1()); mode = EJsonValueHandlerMode::DefaultValue; return new TCallNodeImpl(pos, "Null", {}); - } + } case TRule_json_case_handler::kAltJsonCaseHandler3: mode = EJsonValueHandlerMode::DefaultValue; return Build(node.GetAlt_json_case_handler3().GetBlock1().GetRule_expr2()); default: Y_FAIL("You should change implementation according to grammar changes"); - } -} - + } +} + void TSqlExpression::AddJsonValueCaseHandlers(const TRule_json_value& node, TVector<TNodePtr>& children) { - /* - json_case_handler* - */ - if (node.Block5Size() > 2) { - Ctx.Error() << "Only 1 ON EMPTY and/or 1 ON ERROR clause is expected"; - Ctx.IncrementMonCounter("sql_errors", "JsonValueTooManyHandleClauses"); - return; - } - - TNodePtr onEmpty; - EJsonValueHandlerMode onEmptyMode = EJsonValueHandlerMode::DefaultValue; - TNodePtr onError; - EJsonValueHandlerMode onErrorMode = EJsonValueHandlerMode::DefaultValue; - for (size_t i = 0; i < node.Block5Size(); i++) { - const auto block = node.GetBlock5(i); - const bool isEmptyClause = to_lower(block.GetToken3().GetValue()) == "empty"; - - if (isEmptyClause && onEmpty != nullptr) { - Ctx.Error() << "Only 1 ON EMPTY clause is expected"; - Ctx.IncrementMonCounter("sql_errors", "JsonValueMultipleOnEmptyClauses"); - return; - } - - if (!isEmptyClause && onError != nullptr) { - Ctx.Error() << "Only 1 ON ERROR clause is expected"; - Ctx.IncrementMonCounter("sql_errors", "JsonValueMultipleOnErrorClauses"); - return; - } - - if (isEmptyClause && onError != nullptr) { - Ctx.Error() << "ON EMPTY clause must be before ON ERROR clause"; - Ctx.IncrementMonCounter("sql_errors", "JsonValueOnEmptyAfterOnError"); - return; - } - - EJsonValueHandlerMode currentMode; + /* + json_case_handler* + */ + if (node.Block5Size() > 2) { + Ctx.Error() << "Only 1 ON EMPTY and/or 1 ON ERROR clause is expected"; + Ctx.IncrementMonCounter("sql_errors", "JsonValueTooManyHandleClauses"); + return; + } + + TNodePtr onEmpty; + EJsonValueHandlerMode onEmptyMode = EJsonValueHandlerMode::DefaultValue; + TNodePtr onError; + EJsonValueHandlerMode onErrorMode = EJsonValueHandlerMode::DefaultValue; + for (size_t i = 0; i < node.Block5Size(); i++) { + const auto block = node.GetBlock5(i); + const bool isEmptyClause = to_lower(block.GetToken3().GetValue()) == "empty"; + + if (isEmptyClause && onEmpty != nullptr) { + Ctx.Error() << "Only 1 ON EMPTY clause is expected"; + Ctx.IncrementMonCounter("sql_errors", "JsonValueMultipleOnEmptyClauses"); + return; + } + + if (!isEmptyClause && onError != nullptr) { + Ctx.Error() << "Only 1 ON ERROR clause is expected"; + Ctx.IncrementMonCounter("sql_errors", "JsonValueMultipleOnErrorClauses"); + return; + } + + if (isEmptyClause && onError != nullptr) { + Ctx.Error() << "ON EMPTY clause must be before ON ERROR clause"; + Ctx.IncrementMonCounter("sql_errors", "JsonValueOnEmptyAfterOnError"); + return; + } + + EJsonValueHandlerMode currentMode; TNodePtr currentHandler = JsonValueCaseHandler(block.GetRule_json_case_handler1(), currentMode); - - if (isEmptyClause) { - onEmpty = currentHandler; - onEmptyMode = currentMode; - } else { - onError = currentHandler; - onErrorMode = currentMode; - } - } - - if (onEmpty == nullptr) { - onEmpty = new TCallNodeImpl(Ctx.Pos(), "Null", {}); - } - - if (onError == nullptr) { - onError = new TCallNodeImpl(Ctx.Pos(), "Null", {}); - } - - children.push_back(BuildQuotedAtom(Ctx.Pos(), ToString(onEmptyMode), TNodeFlags::Default)); - children.push_back(onEmpty); - children.push_back(BuildQuotedAtom(Ctx.Pos(), ToString(onErrorMode), TNodeFlags::Default)); - children.push_back(onError); -} - + + if (isEmptyClause) { + onEmpty = currentHandler; + onEmptyMode = currentMode; + } else { + onError = currentHandler; + onErrorMode = currentMode; + } + } + + if (onEmpty == nullptr) { + onEmpty = new TCallNodeImpl(Ctx.Pos(), "Null", {}); + } + + if (onError == nullptr) { + onError = new TCallNodeImpl(Ctx.Pos(), "Null", {}); + } + + children.push_back(BuildQuotedAtom(Ctx.Pos(), ToString(onEmptyMode), TNodeFlags::Default)); + children.push_back(onEmpty); + children.push_back(BuildQuotedAtom(Ctx.Pos(), ToString(onErrorMode), TNodeFlags::Default)); + children.push_back(onError); +} + TNodePtr TSqlExpression::JsonValueExpr(const TRule_json_value& node) { - /* - json_value: JSON_VALUE LPAREN - json_common_args - (RETURNING type_name_simple)? + /* + json_value: JSON_VALUE LPAREN + json_common_args + (RETURNING type_name_simple)? (json_case_handler ON (EMPTY | ERROR))* - RPAREN; - */ - TVector<TNodePtr> children; + RPAREN; + */ + TVector<TNodePtr> children; AddJsonCommonArgs(node.GetRule_json_common_args3(), children); - AddJsonValueCaseHandlers(node, children); - - if (node.HasBlock4()) { - auto returningType = JsonReturningTypeRule(node.GetBlock4().GetRule_type_name_simple2()); + AddJsonValueCaseHandlers(node, children); + + if (node.HasBlock4()) { + auto returningType = JsonReturningTypeRule(node.GetBlock4().GetRule_type_name_simple2()); if (!returningType) { return {}; } - children.push_back(returningType); - } - - return new TCallNodeImpl(GetPos(node.GetToken1()), "JsonValue", children); -} - + children.push_back(returningType); + } + + return new TCallNodeImpl(GetPos(node.GetToken1()), "JsonValue", children); +} + void TSqlExpression::AddJsonExistsHandler(const TRule_json_exists& node, TVector<TNodePtr>& children) { /* json_exists: JSON_EXISTS LPAREN @@ -4001,154 +4001,154 @@ void TSqlExpression::AddJsonExistsHandler(const TRule_json_exists& node, TVector RPAREN; */ auto buildJustBool = [&](const TPosition& pos, bool value) { - return new TCallNodeImpl(pos, "Just", {BuildLiteralBool(pos, value)}); - }; - - if (!node.HasBlock4()) { + return new TCallNodeImpl(pos, "Just", {BuildLiteralBool(pos, value)}); + }; + + if (!node.HasBlock4()) { children.push_back(buildJustBool(Ctx.Pos(), false)); - return; - } - - const auto& handlerRule = node.GetBlock4().GetRule_json_exists_handler1(); - const auto& token = handlerRule.GetToken1(); - const auto pos = GetPos(token); - const auto mode = to_lower(token.GetValue()); - if (mode == "unknown") { - const auto nothingNode = new TCallNodeImpl(pos, "Nothing", { - new TCallNodeImpl(pos, "OptionalType", {BuildDataType(pos, "Bool")}) - }); - children.push_back(nothingNode); - } else if (mode != "error") { + return; + } + + const auto& handlerRule = node.GetBlock4().GetRule_json_exists_handler1(); + const auto& token = handlerRule.GetToken1(); + const auto pos = GetPos(token); + const auto mode = to_lower(token.GetValue()); + if (mode == "unknown") { + const auto nothingNode = new TCallNodeImpl(pos, "Nothing", { + new TCallNodeImpl(pos, "OptionalType", {BuildDataType(pos, "Bool")}) + }); + children.push_back(nothingNode); + } else if (mode != "error") { children.push_back(buildJustBool(pos, FromString<bool>(mode))); - } -} - + } +} + TNodePtr TSqlExpression::JsonExistsExpr(const TRule_json_exists& node) { - /* - json_exists: JSON_EXISTS LPAREN - json_common_args - json_exists_handler? - RPAREN; - */ - TVector<TNodePtr> children; + /* + json_exists: JSON_EXISTS LPAREN + json_common_args + json_exists_handler? + RPAREN; + */ + TVector<TNodePtr> children; AddJsonCommonArgs(node.GetRule_json_common_args3(), children); - - AddJsonExistsHandler(node, children); - - return new TCallNodeImpl(GetPos(node.GetToken1()), "JsonExists", children); -} - + + AddJsonExistsHandler(node, children); + + return new TCallNodeImpl(GetPos(node.GetToken1()), "JsonExists", children); +} + EJsonQueryWrap TSqlExpression::JsonQueryWrapper(const TRule_json_query& node) { - /* + /* json_query: JSON_QUERY LPAREN json_common_args (json_query_wrapper WRAPPER)? (json_query_handler ON EMPTY)? (json_query_handler ON ERROR)? RPAREN; - */ - // default behaviour - no wrapping - if (!node.HasBlock4()) { - return EJsonQueryWrap::NoWrap; - } - - // WITHOUT ARRAY? - no wrapping - const auto& wrapperRule = node.GetBlock4().GetRule_json_query_wrapper1(); - if (wrapperRule.GetAltCase() == TRule_json_query_wrapper::kAltJsonQueryWrapper1) { - return EJsonQueryWrap::NoWrap; - } - - // WITH (CONDITIONAL | UNCONDITIONAL)? ARRAY? - wrapping depends on 2nd token. Default is UNCONDITIONAL - const auto& withWrapperRule = wrapperRule.GetAlt_json_query_wrapper2().GetBlock1(); - if (!withWrapperRule.HasBlock2()) { - return EJsonQueryWrap::Wrap; - } - - const auto& token = withWrapperRule.GetBlock2().GetToken1(); - if (to_lower(token.GetValue()) == "conditional") { - return EJsonQueryWrap::ConditionalWrap; - } else { - return EJsonQueryWrap::Wrap; - } -} - -EJsonQueryHandler TSqlExpression::JsonQueryHandler(const TRule_json_query_handler& node) { - /* - json_query_handler: ERROR | NULL | (EMPTY ARRAY) | (EMPTY OBJECT); - */ - switch (node.GetAltCase()) { - case TRule_json_query_handler::kAltJsonQueryHandler1: - return EJsonQueryHandler::Error; - case TRule_json_query_handler::kAltJsonQueryHandler2: - return EJsonQueryHandler::Null; - case TRule_json_query_handler::kAltJsonQueryHandler3: - return EJsonQueryHandler::EmptyArray; - case TRule_json_query_handler::kAltJsonQueryHandler4: - return EJsonQueryHandler::EmptyObject; - default: - Y_FAIL("You should change implementation according to grammar changes"); - } -} - + */ + // default behaviour - no wrapping + if (!node.HasBlock4()) { + return EJsonQueryWrap::NoWrap; + } + + // WITHOUT ARRAY? - no wrapping + const auto& wrapperRule = node.GetBlock4().GetRule_json_query_wrapper1(); + if (wrapperRule.GetAltCase() == TRule_json_query_wrapper::kAltJsonQueryWrapper1) { + return EJsonQueryWrap::NoWrap; + } + + // WITH (CONDITIONAL | UNCONDITIONAL)? ARRAY? - wrapping depends on 2nd token. Default is UNCONDITIONAL + const auto& withWrapperRule = wrapperRule.GetAlt_json_query_wrapper2().GetBlock1(); + if (!withWrapperRule.HasBlock2()) { + return EJsonQueryWrap::Wrap; + } + + const auto& token = withWrapperRule.GetBlock2().GetToken1(); + if (to_lower(token.GetValue()) == "conditional") { + return EJsonQueryWrap::ConditionalWrap; + } else { + return EJsonQueryWrap::Wrap; + } +} + +EJsonQueryHandler TSqlExpression::JsonQueryHandler(const TRule_json_query_handler& node) { + /* + json_query_handler: ERROR | NULL | (EMPTY ARRAY) | (EMPTY OBJECT); + */ + switch (node.GetAltCase()) { + case TRule_json_query_handler::kAltJsonQueryHandler1: + return EJsonQueryHandler::Error; + case TRule_json_query_handler::kAltJsonQueryHandler2: + return EJsonQueryHandler::Null; + case TRule_json_query_handler::kAltJsonQueryHandler3: + return EJsonQueryHandler::EmptyArray; + case TRule_json_query_handler::kAltJsonQueryHandler4: + return EJsonQueryHandler::EmptyObject; + default: + Y_FAIL("You should change implementation according to grammar changes"); + } +} + TNodePtr TSqlExpression::JsonQueryExpr(const TRule_json_query& node) { - /* - json_query: JSON_QUERY LPAREN - json_common_args + /* + json_query: JSON_QUERY LPAREN + json_common_args (json_query_wrapper WRAPPER)? - (json_query_handler ON EMPTY)? - (json_query_handler ON ERROR)? - RPAREN; - */ - - TVector<TNodePtr> children; + (json_query_handler ON EMPTY)? + (json_query_handler ON ERROR)? + RPAREN; + */ + + TVector<TNodePtr> children; AddJsonCommonArgs(node.GetRule_json_common_args3(), children); - - auto addChild = [&](TPosition pos, const TString& content) { - children.push_back(BuildQuotedAtom(pos, content, TNodeFlags::Default)); - }; - - const auto wrapMode = JsonQueryWrapper(node); - addChild(Ctx.Pos(), ToString(wrapMode)); - - auto onEmpty = EJsonQueryHandler::Null; - if (node.HasBlock5()) { - if (wrapMode != EJsonQueryWrap::NoWrap) { - Ctx.Error() << "ON EMPTY is prohibited because WRAPPER clause is specified"; - Ctx.IncrementMonCounter("sql_errors", "JsonQueryOnEmptyWithWrapper"); - return nullptr; - } - onEmpty = JsonQueryHandler(node.GetBlock5().GetRule_json_query_handler1()); - } - addChild(Ctx.Pos(), ToString(onEmpty)); - - auto onError = EJsonQueryHandler::Null; - if (node.HasBlock6()) { - onError = JsonQueryHandler(node.GetBlock6().GetRule_json_query_handler1()); - } - addChild(Ctx.Pos(), ToString(onError)); - - return new TCallNodeImpl(GetPos(node.GetToken1()), "JsonQuery", children); -} - + + auto addChild = [&](TPosition pos, const TString& content) { + children.push_back(BuildQuotedAtom(pos, content, TNodeFlags::Default)); + }; + + const auto wrapMode = JsonQueryWrapper(node); + addChild(Ctx.Pos(), ToString(wrapMode)); + + auto onEmpty = EJsonQueryHandler::Null; + if (node.HasBlock5()) { + if (wrapMode != EJsonQueryWrap::NoWrap) { + Ctx.Error() << "ON EMPTY is prohibited because WRAPPER clause is specified"; + Ctx.IncrementMonCounter("sql_errors", "JsonQueryOnEmptyWithWrapper"); + return nullptr; + } + onEmpty = JsonQueryHandler(node.GetBlock5().GetRule_json_query_handler1()); + } + addChild(Ctx.Pos(), ToString(onEmpty)); + + auto onError = EJsonQueryHandler::Null; + if (node.HasBlock6()) { + onError = JsonQueryHandler(node.GetBlock6().GetRule_json_query_handler1()); + } + addChild(Ctx.Pos(), ToString(onError)); + + return new TCallNodeImpl(GetPos(node.GetToken1()), "JsonQuery", children); +} + TNodePtr TSqlExpression::JsonApiExpr(const TRule_json_api_expr& node) { - /* - json_api_expr: json_value | json_exists | json_query; - */ - TPosition pos = Ctx.Pos(); - TNodePtr result = nullptr; + /* + json_api_expr: json_value | json_exists | json_query; + */ + TPosition pos = Ctx.Pos(); + TNodePtr result = nullptr; switch (node.GetAltCase()) { case TRule_json_api_expr::kAltJsonApiExpr1: { const auto& jsonValue = node.GetAlt_json_api_expr1().GetRule_json_value1(); pos = GetPos(jsonValue.GetToken1()); result = JsonValueExpr(jsonValue); break; - } + } case TRule_json_api_expr::kAltJsonApiExpr2: { const auto& jsonExists = node.GetAlt_json_api_expr2().GetRule_json_exists1(); pos = GetPos(jsonExists.GetToken1()); result = JsonExistsExpr(jsonExists); break; - } + } case TRule_json_api_expr::kAltJsonApiExpr3: { const auto& jsonQuery = node.GetAlt_json_api_expr3().GetRule_json_query1(); pos = GetPos(jsonQuery.GetToken1()); @@ -4157,19 +4157,19 @@ TNodePtr TSqlExpression::JsonApiExpr(const TRule_json_api_expr& node) { } default: Y_FAIL("You should change implementation according to grammar changes"); - } - - return result; -} - -template<typename TUnaryCasualExprRule> + } + + return result; +} + +template<typename TUnaryCasualExprRule> TNodePtr TSqlExpression::UnaryCasualExpr(const TUnaryCasualExprRule& node, const TTrailingQuestions& tail) { - // unary_casual_subexpr: (id_expr | atom_expr) unary_subexpr_suffix; + // unary_casual_subexpr: (id_expr | atom_expr) unary_subexpr_suffix; // OR // in_unary_casual_subexpr: (id_expr_in | in_atom_expr) unary_subexpr_suffix; - // where - // unary_subexpr_suffix: (key_expr | invoke_expr |(DOT (bind_parameter | DIGITS | id)))* (COLLATE id)?; - + // where + // unary_subexpr_suffix: (key_expr | invoke_expr |(DOT (bind_parameter | DIGITS | id)))* (COLLATE id)?; + const auto& suffix = node.GetRule_unary_subexpr_suffix2(); const bool suffixIsEmpty = suffix.GetBlock1().empty() && !suffix.HasBlock2(); TString name; @@ -4177,9 +4177,9 @@ TNodePtr TSqlExpression::UnaryCasualExpr(const TUnaryCasualExprRule& node, const bool typePossible = false; auto& block = node.GetBlock1(); switch (block.Alt_case()) { - case TUnaryCasualExprRule::TBlock1::kAlt1: { + case TUnaryCasualExprRule::TBlock1::kAlt1: { auto& alt = block.GetAlt1(); - if constexpr (std::is_same_v<TUnaryCasualExprRule, TRule_unary_casual_subexpr>) { + if constexpr (std::is_same_v<TUnaryCasualExprRule, TRule_unary_casual_subexpr>) { name = Id(alt.GetRule_id_expr1(), *this); typePossible = !IsQuotedId(alt.GetRule_id_expr1(), *this); } else { @@ -4188,10 +4188,10 @@ TNodePtr TSqlExpression::UnaryCasualExpr(const TUnaryCasualExprRule& node, const } break; } - case TUnaryCasualExprRule::TBlock1::kAlt2: { - auto& alt = block.GetAlt2(); + case TUnaryCasualExprRule::TBlock1::kAlt2: { + auto& alt = block.GetAlt2(); TMaybe<TExprOrIdent> exprOrId; - if constexpr (std::is_same_v<TUnaryCasualExprRule, TRule_unary_casual_subexpr>) { + if constexpr (std::is_same_v<TUnaryCasualExprRule, TRule_unary_casual_subexpr>) { exprOrId = AtomExpr(alt.GetRule_atom_expr1(), suffixIsEmpty ? tail : TTrailingQuestions{}); } else { exprOrId = InAtomExpr(alt.GetRule_in_atom_expr1(), suffixIsEmpty ? tail : TTrailingQuestions{}); @@ -4208,8 +4208,8 @@ TNodePtr TSqlExpression::UnaryCasualExpr(const TUnaryCasualExprRule& node, const } break; } - default: - Y_FAIL("You should change implementation according to grammar changes"); + default: + Y_FAIL("You should change implementation according to grammar changes"); } bool onlyDots = true; @@ -4352,8 +4352,8 @@ TNodePtr TSqlExpression::UnaryCasualExpr(const TUnaryCasualExprRule& node, const ids.push_back(Id(bb.GetAlt3().GetRule_an_id_or_type1(), *this)); break; } - default: - Y_FAIL("You should change implementation according to grammar changes"); + default: + Y_FAIL("You should change implementation according to grammar changes"); } if (lastExpr) { @@ -4638,8 +4638,8 @@ TMaybe<TExprOrIdent> TSqlExpression::AtomExpr(const TRule_atom_expr& node, const } break; } - default: - Y_FAIL("Unsigned number: you should change implementation according to grammar changes"); + default: + Y_FAIL("Unsigned number: you should change implementation according to grammar changes"); } result.Expr = BuildCallable(pos, module, name, {}); break; @@ -4711,7 +4711,7 @@ TMaybe<TExprOrIdent> TSqlExpression::InAtomExpr(const TRule_in_atom_expr& node, TPosition pos(Ctx.Pos()); TString name; switch (alt.GetBlock3().Alt_case()) { - case TRule_in_atom_expr::TAlt6::TBlock3::kAlt1: + case TRule_in_atom_expr::TAlt6::TBlock3::kAlt1: name = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), *this); break; case TRule_in_atom_expr::TAlt6::TBlock3::kAlt2: { @@ -4724,8 +4724,8 @@ TMaybe<TExprOrIdent> TSqlExpression::InAtomExpr(const TRule_in_atom_expr& node, } break; } - default: - Y_FAIL("You should change implementation according to grammar changes"); + default: + Y_FAIL("You should change implementation according to grammar changes"); } result.Expr = BuildCallable(pos, module, name, {}); break; @@ -4752,13 +4752,13 @@ TMaybe<TExprOrIdent> TSqlExpression::InAtomExpr(const TRule_in_atom_expr& node, case TRule_in_atom_expr::kAltInAtomExpr9: result.Expr = BitCastRule(node.GetAlt_in_atom_expr9().GetRule_bitcast_expr1()); break; - case TRule_in_atom_expr::kAltInAtomExpr10: + case TRule_in_atom_expr::kAltInAtomExpr10: result.Expr = ListLiteral(node.GetAlt_in_atom_expr10().GetRule_list_literal1()); break; - case TRule_in_atom_expr::kAltInAtomExpr11: + case TRule_in_atom_expr::kAltInAtomExpr11: result.Expr = DictLiteral(node.GetAlt_in_atom_expr11().GetRule_dict_literal1()); break; - case TRule_in_atom_expr::kAltInAtomExpr12: + case TRule_in_atom_expr::kAltInAtomExpr12: result.Expr = StructLiteral(node.GetAlt_in_atom_expr12().GetRule_struct_literal1()); break; default: @@ -4902,8 +4902,8 @@ TNodePtr TSqlExpression::SubExpr(const TRule_con_subexpr& node, const TTrailingQ auto expr = UnaryExpr(node.GetAlt_con_subexpr2().GetRule_unary_subexpr2(), tail); return expr ? expr->ApplyUnaryOp(Ctx, pos, opName) : expr; } - default: - Y_FAIL("You should change implementation according to grammar changes"); + default: + Y_FAIL("You should change implementation according to grammar changes"); } return nullptr; } @@ -5323,8 +5323,8 @@ TNodePtr TSqlExpression::BinOpList(const TRule_bit_subexpr& node, TGetNode getNo Ctx.IncrementMonCounter("sql_binary_operations", "BitXor"); break; } - default: - Y_FAIL("You should change implementation according to grammar changes"); + default: + Y_FAIL("You should change implementation according to grammar changes"); } partialResult = BuildBinaryOp(Ctx, Ctx.Pos(), opName, partialResult, SubExpr(getNode(*begin), (begin + 1 == end) ? tail : TTrailingQuestions{})); @@ -7259,8 +7259,8 @@ bool TGroupByClause::GroupingElement(const TRule_grouping_element& node, EGroupB } break; } - default: - Y_FAIL("You should change implementation according to grammar changes"); + default: + Y_FAIL("You should change implementation according to grammar changes"); } return true; } @@ -7611,8 +7611,8 @@ TSqlSelect::TSelectKindResult TSqlSelect::SelectKind(const TRule_select_kind& no placement, res.SelectOpOrderBy, res.SelectOpAssumeOrderBy); break; } - default: - Y_FAIL("You should change implementation according to grammar changes"); + default: + Y_FAIL("You should change implementation according to grammar changes"); } return res; @@ -7941,8 +7941,8 @@ TNodePtr TSqlIntoTable::Build(const TRule_into_table_stmt& node) { case TRule_into_table_stmt_TBlock1::AltCase::kAlt6: modeTokens = {modeBlock.GetAlt6().GetToken1()}; break; - default: - Y_FAIL("You should change implementation according to grammar changes"); + default: + Y_FAIL("You should change implementation according to grammar changes"); } TVector<TString> modeStrings; @@ -8086,7 +8086,7 @@ TNodePtr TSqlIntoTable::Build(const TRule_into_table_stmt& node) { bool TSqlIntoTable::ValidateServiceName(const TRule_into_table_stmt& node, const TTableRef& table, ESQLWriteColumnMode mode, const TPosition& pos) { - Y_UNUSED(node); + Y_UNUSED(node); auto serviceName = table.Service; const bool isMapReduce = serviceName == YtProviderName; const bool isKikimr = serviceName == KikimrProviderName || serviceName == YdbProviderName; @@ -9523,12 +9523,12 @@ TNodePtr TSqlQuery::PragmaStatement(const TRule_pragma_stmt& stmt, bool& success return {}; } Ctx.IncrementMonCounter("sql_pragma", "RegexUseRe2"); - } else if (normalizedPragma == "jsonqueryreturnsjsondocument") { - Ctx.JsonQueryReturnsJsonDocument = true; - Ctx.IncrementMonCounter("sql_pragma", "JsonQueryReturnsJsonDocument"); - } else if (normalizedPragma == "disablejsonqueryreturnsjsondocument") { - Ctx.JsonQueryReturnsJsonDocument = false; - Ctx.IncrementMonCounter("sql_pragma", "DisableJsonQueryReturnsJsonDocument"); + } else if (normalizedPragma == "jsonqueryreturnsjsondocument") { + Ctx.JsonQueryReturnsJsonDocument = true; + Ctx.IncrementMonCounter("sql_pragma", "JsonQueryReturnsJsonDocument"); + } else if (normalizedPragma == "disablejsonqueryreturnsjsondocument") { + Ctx.JsonQueryReturnsJsonDocument = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableJsonQueryReturnsJsonDocument"); } else if (normalizedPragma == "orderedcolumns") { Ctx.OrderedColumns = true; Ctx.IncrementMonCounter("sql_pragma", "OrderedColumns"); diff --git a/ydb/library/yql/sql/v1/sql_ut.cpp b/ydb/library/yql/sql/v1/sql_ut.cpp index 85b6e0dfcd..3166327e13 100644 --- a/ydb/library/yql/sql/v1/sql_ut.cpp +++ b/ydb/library/yql/sql/v1/sql_ut.cpp @@ -3176,7 +3176,7 @@ select FormatType($f()); ExpectFailWithError("pragma ClassicDivision = 'false'; select $foo / 30;", "<main>:1:42: Error: Unknown name: $foo\n"); } } - + void CheckUnused(const TString& req, const TString& symbol, unsigned row, unsigned col) { auto res = SqlToYql(req); @@ -3404,415 +3404,415 @@ Y_UNIT_TEST_SUITE(AnonymousNames) { } } -Y_UNIT_TEST_SUITE(JsonValue) { - Y_UNIT_TEST(JsonValueArgumentCount) { - NYql::TAstParseResult res = SqlToYql("select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json));"); - - UNIT_ASSERT(!res.Root); - UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:49: Error: Unexpected token ')' : syntax error...\n\n"); - } - - Y_UNIT_TEST(JsonValueJsonPathMustBeLiteralString) { - NYql::TAstParseResult res = SqlToYql("$jsonPath = \"strict $.key\"; select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), $jsonPath);"); - - UNIT_ASSERT(!res.Root); - UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:79: Error: Unexpected token absence : Missing STRING_VALUE \n\n"); - } - - Y_UNIT_TEST(JsonValueTranslation) { - NYql::TAstParseResult res = SqlToYql("select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\");"); - - UNIT_ASSERT(res.Root); - - TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { - Y_UNUSED(word); - UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"strict $.key\"")); - UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("SafeCast")); - UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("DataType 'Json")); - }; - - TWordCountHive elementStat({"JsonValue"}); - VerifyProgram(res, elementStat, verifyLine); - UNIT_ASSERT_VALUES_EQUAL(1, elementStat["JsonValue"]); - } - - Y_UNIT_TEST(JsonValueReturningSection) { - for (const auto& typeName : {"Bool", "Int64", "Double", "String"}) { - NYql::TAstParseResult res = SqlToYql( - TStringBuilder() << "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" RETURNING " << typeName << ");" - ); - - UNIT_ASSERT(res.Root); - - TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { - Y_UNUSED(word); - UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"strict $.key\"")); - UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("SafeCast")); - UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("DataType 'Json")); - UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(TStringBuilder() << "DataType '" << typeName)); - }; - - TWordCountHive elementStat({typeName}); - VerifyProgram(res, elementStat, verifyLine); - UNIT_ASSERT(elementStat[typeName] > 0); - } - } - - Y_UNIT_TEST(JsonValueInvalidReturningType) { +Y_UNIT_TEST_SUITE(JsonValue) { + Y_UNIT_TEST(JsonValueArgumentCount) { + NYql::TAstParseResult res = SqlToYql("select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json));"); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:49: Error: Unexpected token ')' : syntax error...\n\n"); + } + + Y_UNIT_TEST(JsonValueJsonPathMustBeLiteralString) { + NYql::TAstParseResult res = SqlToYql("$jsonPath = \"strict $.key\"; select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), $jsonPath);"); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:79: Error: Unexpected token absence : Missing STRING_VALUE \n\n"); + } + + Y_UNIT_TEST(JsonValueTranslation) { + NYql::TAstParseResult res = SqlToYql("select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\");"); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"strict $.key\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("SafeCast")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("DataType 'Json")); + }; + + TWordCountHive elementStat({"JsonValue"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["JsonValue"]); + } + + Y_UNIT_TEST(JsonValueReturningSection) { + for (const auto& typeName : {"Bool", "Int64", "Double", "String"}) { + NYql::TAstParseResult res = SqlToYql( + TStringBuilder() << "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" RETURNING " << typeName << ");" + ); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"strict $.key\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("SafeCast")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("DataType 'Json")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(TStringBuilder() << "DataType '" << typeName)); + }; + + TWordCountHive elementStat({typeName}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat[typeName] > 0); + } + } + + Y_UNIT_TEST(JsonValueInvalidReturningType) { NYql::TAstParseResult res = SqlToYql("select JSON_VALUE(CAST(@@{'key': 1238}@@ as Json), 'strict $.key' RETURNING invalid);"); - - UNIT_ASSERT(!res.Root); + + UNIT_ASSERT(!res.Root); UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:77: Error: Unknown simple type 'invalid'\n"); - } - - Y_UNIT_TEST(JsonValueAndReturningInExpressions) { - NYql::TAstParseResult res = SqlToYql( - "USE plato\n;" - "$json_value = \"some string\";\n" - "SELECT $json_value;\n" - "SELECT 1 as json_value;\n" - "SELECT $json_value as json_value;\n" - "$returning = \"another string\";\n" - "SELECT $returning;\n" - "SELECT 1 as returning;\n" - "SELECT $returning as returning;\n" - "SELECT returning FROM InputSyntax;\n" - "SELECT returning, count(*) FROM InputSyntax GROUP BY returning;\n" - ); - - UNIT_ASSERT(res.Root); - } - - Y_UNIT_TEST(JsonValueValidCaseHandlers) { - const TVector<std::pair<TString, TString>> testCases = { - {"", "'DefaultValue (Null)"}, - {"NULL", "'DefaultValue (Null)"}, - {"ERROR", "'Error (Null)"}, - {"DEFAULT 123", "'DefaultValue (Int32 '\"123\")"}, - }; - - for (const auto& onEmpty : testCases) { - for (const auto& onError : testCases) { - TStringBuilder query; - query << "$json = CAST(@@{\"key\": 1238}@@ as Json);\n" - << "SELECT JSON_VALUE($json, \"strict $.key\""; - if (!onEmpty.first.Empty()) { - query << " " << onEmpty.first << " ON EMPTY"; - } - if (!onError.first.Empty()) { - query << " " << onError.first << " ON ERROR"; - } - query << ");\n"; - - NYql::TAstParseResult res = SqlToYql(query); - - UNIT_ASSERT(res.Root); - - TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { - Y_UNUSED(word); - UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(onEmpty.second + " " + onError.second)); - }; - - TWordCountHive elementStat({"JsonValue"}); - VerifyProgram(res, elementStat, verifyLine); - UNIT_ASSERT(elementStat["JsonValue"] > 0); - } - } - } - - Y_UNIT_TEST(JsonValueTooManyCaseHandlers) { - NYql::TAstParseResult res = SqlToYql( - "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON EMPTY NULL ON ERROR NULL ON EMPTY);\n" - ); - - UNIT_ASSERT(!res.Root); - UNIT_ASSERT_NO_DIFF( - Err2Str(res), - "<main>:1:52: Error: Only 1 ON EMPTY and/or 1 ON ERROR clause is expected\n" - ); - } - - Y_UNIT_TEST(JsonValueTooManyOnEmpty) { - NYql::TAstParseResult res = SqlToYql( - "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON EMPTY NULL ON EMPTY);\n" - ); - - UNIT_ASSERT(!res.Root); - UNIT_ASSERT_NO_DIFF( - Err2Str(res), - "<main>:1:52: Error: Only 1 ON EMPTY clause is expected\n" - ); - } - - Y_UNIT_TEST(JsonValueTooManyOnError) { - NYql::TAstParseResult res = SqlToYql( - "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON ERROR NULL ON ERROR);\n" - ); - - UNIT_ASSERT(!res.Root); - UNIT_ASSERT_NO_DIFF( - Err2Str(res), - "<main>:1:52: Error: Only 1 ON ERROR clause is expected\n" - ); - } - - Y_UNIT_TEST(JsonValueOnEmptyAfterOnError) { - NYql::TAstParseResult res = SqlToYql( - "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON ERROR NULL ON EMPTY);\n" - ); - - UNIT_ASSERT(!res.Root); - UNIT_ASSERT_NO_DIFF( - Err2Str(res), - "<main>:1:52: Error: ON EMPTY clause must be before ON ERROR clause\n" - ); - } - - Y_UNIT_TEST(JsonValueNullInput) { - NYql::TAstParseResult res = SqlToYql(R"(SELECT JSON_VALUE(NULL, "strict $.key");)"); - - UNIT_ASSERT(res.Root); - - TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { - Y_UNUSED(word); - UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Nothing (OptionalType (DataType 'Json)))")); - }; - - TWordCountHive elementStat({"JsonValue"}); - VerifyProgram(res, elementStat, verifyLine); - UNIT_ASSERT(elementStat["JsonValue"] > 0); - } -} - -Y_UNIT_TEST_SUITE(JsonExists) { - Y_UNIT_TEST(JsonExistsValidHandlers) { - const TVector<std::pair<TString, TString>> testCases = { - {"", "(Just (Bool '\"false\"))"}, - {"TRUE ON ERROR", "(Just (Bool '\"true\"))"}, - {"FALSE ON ERROR", "(Just (Bool '\"false\"))"}, - {"UNKNOWN ON ERROR", "(Nothing (OptionalType (DataType 'Bool)))"}, - // NOTE: in this case we expect arguments of JsonExists callable to end immediately - // after variables. This parenthesis at the end of the expression is left on purpose - {"ERROR ON ERROR", "(Utf8 '\"strict $.key\") (JsonVariables))"}, - }; - - for (const auto& item : testCases) { - NYql::TAstParseResult res = SqlToYql( - TStringBuilder() << R"( - $json = CAST(@@{"key": 1238}@@ as Json); - SELECT JSON_EXISTS($json, "strict $.key" )" << item.first << ");\n" - ); - - UNIT_ASSERT(res.Root); - - TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { - Y_UNUSED(word); - UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(item.second)); - }; - - TWordCountHive elementStat({"JsonExists"}); - VerifyProgram(res, elementStat, verifyLine); - UNIT_ASSERT(elementStat["JsonExists"] > 0); - } - } - - Y_UNIT_TEST(JsonExistsInvalidHandler) { - NYql::TAstParseResult res = SqlToYql(R"( - $json = CAST(@@{"key": 1238}@@ as Json); - $default = false; - SELECT JSON_EXISTS($json, "strict $.key" $default ON ERROR); - )"); - - UNIT_ASSERT(!res.Root); - UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:53: Error: Unexpected token absence : Missing RPAREN \n\n"); - } - - Y_UNIT_TEST(JsonExistsNullInput) { - NYql::TAstParseResult res = SqlToYql(R"(SELECT JSON_EXISTS(NULL, "strict $.key");)"); - - UNIT_ASSERT(res.Root); - - TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { - Y_UNUSED(word); - UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Nothing (OptionalType (DataType 'Json)))")); - }; - - TWordCountHive elementStat({"JsonExists"}); - VerifyProgram(res, elementStat, verifyLine); - UNIT_ASSERT(elementStat["JsonExists"] > 0); - } + } + + Y_UNIT_TEST(JsonValueAndReturningInExpressions) { + NYql::TAstParseResult res = SqlToYql( + "USE plato\n;" + "$json_value = \"some string\";\n" + "SELECT $json_value;\n" + "SELECT 1 as json_value;\n" + "SELECT $json_value as json_value;\n" + "$returning = \"another string\";\n" + "SELECT $returning;\n" + "SELECT 1 as returning;\n" + "SELECT $returning as returning;\n" + "SELECT returning FROM InputSyntax;\n" + "SELECT returning, count(*) FROM InputSyntax GROUP BY returning;\n" + ); + + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(JsonValueValidCaseHandlers) { + const TVector<std::pair<TString, TString>> testCases = { + {"", "'DefaultValue (Null)"}, + {"NULL", "'DefaultValue (Null)"}, + {"ERROR", "'Error (Null)"}, + {"DEFAULT 123", "'DefaultValue (Int32 '\"123\")"}, + }; + + for (const auto& onEmpty : testCases) { + for (const auto& onError : testCases) { + TStringBuilder query; + query << "$json = CAST(@@{\"key\": 1238}@@ as Json);\n" + << "SELECT JSON_VALUE($json, \"strict $.key\""; + if (!onEmpty.first.Empty()) { + query << " " << onEmpty.first << " ON EMPTY"; + } + if (!onError.first.Empty()) { + query << " " << onError.first << " ON ERROR"; + } + query << ");\n"; + + NYql::TAstParseResult res = SqlToYql(query); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(onEmpty.second + " " + onError.second)); + }; + + TWordCountHive elementStat({"JsonValue"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonValue"] > 0); + } + } + } + + Y_UNIT_TEST(JsonValueTooManyCaseHandlers) { + NYql::TAstParseResult res = SqlToYql( + "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON EMPTY NULL ON ERROR NULL ON EMPTY);\n" + ); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF( + Err2Str(res), + "<main>:1:52: Error: Only 1 ON EMPTY and/or 1 ON ERROR clause is expected\n" + ); + } + + Y_UNIT_TEST(JsonValueTooManyOnEmpty) { + NYql::TAstParseResult res = SqlToYql( + "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON EMPTY NULL ON EMPTY);\n" + ); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF( + Err2Str(res), + "<main>:1:52: Error: Only 1 ON EMPTY clause is expected\n" + ); + } + + Y_UNIT_TEST(JsonValueTooManyOnError) { + NYql::TAstParseResult res = SqlToYql( + "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON ERROR NULL ON ERROR);\n" + ); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF( + Err2Str(res), + "<main>:1:52: Error: Only 1 ON ERROR clause is expected\n" + ); + } + + Y_UNIT_TEST(JsonValueOnEmptyAfterOnError) { + NYql::TAstParseResult res = SqlToYql( + "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON ERROR NULL ON EMPTY);\n" + ); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF( + Err2Str(res), + "<main>:1:52: Error: ON EMPTY clause must be before ON ERROR clause\n" + ); + } + + Y_UNIT_TEST(JsonValueNullInput) { + NYql::TAstParseResult res = SqlToYql(R"(SELECT JSON_VALUE(NULL, "strict $.key");)"); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Nothing (OptionalType (DataType 'Json)))")); + }; + + TWordCountHive elementStat({"JsonValue"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonValue"] > 0); + } +} + +Y_UNIT_TEST_SUITE(JsonExists) { + Y_UNIT_TEST(JsonExistsValidHandlers) { + const TVector<std::pair<TString, TString>> testCases = { + {"", "(Just (Bool '\"false\"))"}, + {"TRUE ON ERROR", "(Just (Bool '\"true\"))"}, + {"FALSE ON ERROR", "(Just (Bool '\"false\"))"}, + {"UNKNOWN ON ERROR", "(Nothing (OptionalType (DataType 'Bool)))"}, + // NOTE: in this case we expect arguments of JsonExists callable to end immediately + // after variables. This parenthesis at the end of the expression is left on purpose + {"ERROR ON ERROR", "(Utf8 '\"strict $.key\") (JsonVariables))"}, + }; + + for (const auto& item : testCases) { + NYql::TAstParseResult res = SqlToYql( + TStringBuilder() << R"( + $json = CAST(@@{"key": 1238}@@ as Json); + SELECT JSON_EXISTS($json, "strict $.key" )" << item.first << ");\n" + ); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(item.second)); + }; + + TWordCountHive elementStat({"JsonExists"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonExists"] > 0); + } + } + + Y_UNIT_TEST(JsonExistsInvalidHandler) { + NYql::TAstParseResult res = SqlToYql(R"( + $json = CAST(@@{"key": 1238}@@ as Json); + $default = false; + SELECT JSON_EXISTS($json, "strict $.key" $default ON ERROR); + )"); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:53: Error: Unexpected token absence : Missing RPAREN \n\n"); + } + + Y_UNIT_TEST(JsonExistsNullInput) { + NYql::TAstParseResult res = SqlToYql(R"(SELECT JSON_EXISTS(NULL, "strict $.key");)"); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Nothing (OptionalType (DataType 'Json)))")); + }; + + TWordCountHive elementStat({"JsonExists"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonExists"] > 0); + } +} + +Y_UNIT_TEST_SUITE(JsonQuery) { + Y_UNIT_TEST(JsonQueryValidHandlers) { + using TTestSuite = const TVector<std::pair<TString, TString>>; + TTestSuite wrapCases = { + {"", "'NoWrap"}, + {"WITHOUT WRAPPER", "'NoWrap"}, + {"WITHOUT ARRAY WRAPPER", "'NoWrap"}, + {"WITH WRAPPER", "'Wrap"}, + {"WITH ARRAY WRAPPER", "'Wrap"}, + {"WITH UNCONDITIONAL WRAPPER", "'Wrap"}, + {"WITH UNCONDITIONAL ARRAY WRAPPER", "'Wrap"}, + {"WITH CONDITIONAL WRAPPER", "'ConditionalWrap"}, + {"WITH CONDITIONAL ARRAY WRAPPER", "'ConditionalWrap"}, + }; + TTestSuite handlerCases = { + {"", "'Null"}, + {"ERROR", "'Error"}, + {"NULL", "'Null"}, + {"EMPTY ARRAY", "'EmptyArray"}, + {"EMPTY OBJECT", "'EmptyObject"}, + }; + + for (const auto wrap : wrapCases) { + for (const auto onError : handlerCases) { + for (const auto onEmpty : handlerCases) { + TStringBuilder query; + query << R"($json = CAST(@@{"key": [123]}@@ as Json); + SELECT JSON_QUERY($json, "strict $.key" )" << wrap.first; + if (!onEmpty.first.Empty()) { + if (wrap.first.StartsWith("WITH ")) { + continue; + } + query << " " << onEmpty.first << " ON EMPTY"; + } + if (!onError.first.Empty()) { + query << " " << onError.first << " ON ERROR"; + } + query << ");\n"; + + NYql::TAstParseResult res = SqlToYql(query); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + const TString args = TStringBuilder() << wrap.second << " " << onEmpty.second << " " << onError.second; + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(args)); + }; + + Cout << wrap.first << " " << onEmpty.first << " " << onError.first << Endl; + + TWordCountHive elementStat({"JsonQuery"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonQuery"] > 0); + } + } + } + } + + Y_UNIT_TEST(JsonQueryOnEmptyWithWrapper) { + NYql::TAstParseResult res = SqlToYql(R"( + $json = CAST(@@{"key": 1238}@@ as Json); + SELECT JSON_QUERY($json, "strict $" WITH ARRAY WRAPPER EMPTY ARRAY ON EMPTY); + )"); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:38: Error: ON EMPTY is prohibited because WRAPPER clause is specified\n"); + } + + Y_UNIT_TEST(JsonQueryNullInput) { + NYql::TAstParseResult res = SqlToYql(R"(SELECT JSON_QUERY(NULL, "strict $.key");)"); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Nothing (OptionalType (DataType 'Json)))")); + }; + + TWordCountHive elementStat({"JsonQuery"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonQuery"] > 0); + } } - -Y_UNIT_TEST_SUITE(JsonQuery) { - Y_UNIT_TEST(JsonQueryValidHandlers) { - using TTestSuite = const TVector<std::pair<TString, TString>>; - TTestSuite wrapCases = { - {"", "'NoWrap"}, - {"WITHOUT WRAPPER", "'NoWrap"}, - {"WITHOUT ARRAY WRAPPER", "'NoWrap"}, - {"WITH WRAPPER", "'Wrap"}, - {"WITH ARRAY WRAPPER", "'Wrap"}, - {"WITH UNCONDITIONAL WRAPPER", "'Wrap"}, - {"WITH UNCONDITIONAL ARRAY WRAPPER", "'Wrap"}, - {"WITH CONDITIONAL WRAPPER", "'ConditionalWrap"}, - {"WITH CONDITIONAL ARRAY WRAPPER", "'ConditionalWrap"}, - }; - TTestSuite handlerCases = { - {"", "'Null"}, - {"ERROR", "'Error"}, - {"NULL", "'Null"}, - {"EMPTY ARRAY", "'EmptyArray"}, - {"EMPTY OBJECT", "'EmptyObject"}, - }; - - for (const auto wrap : wrapCases) { - for (const auto onError : handlerCases) { - for (const auto onEmpty : handlerCases) { - TStringBuilder query; - query << R"($json = CAST(@@{"key": [123]}@@ as Json); - SELECT JSON_QUERY($json, "strict $.key" )" << wrap.first; - if (!onEmpty.first.Empty()) { - if (wrap.first.StartsWith("WITH ")) { - continue; - } - query << " " << onEmpty.first << " ON EMPTY"; - } - if (!onError.first.Empty()) { - query << " " << onError.first << " ON ERROR"; - } - query << ");\n"; - - NYql::TAstParseResult res = SqlToYql(query); - - UNIT_ASSERT(res.Root); - - TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { - Y_UNUSED(word); - const TString args = TStringBuilder() << wrap.second << " " << onEmpty.second << " " << onError.second; - UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(args)); - }; - - Cout << wrap.first << " " << onEmpty.first << " " << onError.first << Endl; - - TWordCountHive elementStat({"JsonQuery"}); - VerifyProgram(res, elementStat, verifyLine); - UNIT_ASSERT(elementStat["JsonQuery"] > 0); - } - } - } - } - - Y_UNIT_TEST(JsonQueryOnEmptyWithWrapper) { - NYql::TAstParseResult res = SqlToYql(R"( - $json = CAST(@@{"key": 1238}@@ as Json); - SELECT JSON_QUERY($json, "strict $" WITH ARRAY WRAPPER EMPTY ARRAY ON EMPTY); - )"); - - UNIT_ASSERT(!res.Root); - UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:38: Error: ON EMPTY is prohibited because WRAPPER clause is specified\n"); - } - - Y_UNIT_TEST(JsonQueryNullInput) { - NYql::TAstParseResult res = SqlToYql(R"(SELECT JSON_QUERY(NULL, "strict $.key");)"); - - UNIT_ASSERT(res.Root); - - TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { - Y_UNUSED(word); - UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Nothing (OptionalType (DataType 'Json)))")); - }; - - TWordCountHive elementStat({"JsonQuery"}); - VerifyProgram(res, elementStat, verifyLine); - UNIT_ASSERT(elementStat["JsonQuery"] > 0); - } -} - -Y_UNIT_TEST_SUITE(JsonPassing) { - Y_UNIT_TEST(SupportedVariableTypes) { - const TVector<TString> functions = {"JSON_EXISTS", "JSON_VALUE", "JSON_QUERY"}; - - for (const auto& function : functions) { - const auto query = Sprintf(R"( - $json = CAST(@@{"key": 1238}@@ as Json); - SELECT %s( - $json, - "strict $.key" - PASSING - "string" as var1, - 1.234 as var2, - CAST(1 as Int64) as var3, - true as var4, - $json as var5 - ))", - function.data() - ); - NYql::TAstParseResult res = SqlToYql(query); - - UNIT_ASSERT(res.Root); - - TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { - Y_UNUSED(word); - UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var1" (String '"string")))"), "Cannot find `var1`"); - UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var2" (Double '"1.234")))"), "Cannot find `var2`"); - UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var3" (SafeCast (Int32 '"1") (DataType 'Int64))))"), "Cannot find `var3`"); - UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var4" (Bool '"true")))"), "Cannot find `var4`"); - UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var5" (SafeCast (String '@@{"key": 1238}@@) (DataType 'Json))))"), "Cannot find `var5`"); - }; - - TWordCountHive elementStat({"JsonVariables"}); - VerifyProgram(res, elementStat, verifyLine); - UNIT_ASSERT(elementStat["JsonVariables"] > 0); - } - } - - Y_UNIT_TEST(ValidVariableNames) { - const TVector<TString> functions = {"JSON_EXISTS", "JSON_VALUE", "JSON_QUERY"}; - - for (const auto& function : functions) { - const auto query = Sprintf(R"( - $json = CAST(@@{"key": 1238}@@ as Json); - SELECT %s( - $json, - "strict $.key" - PASSING - "one" as var1, - "two" as "VaR2", - "three" as `var3`, - "four" as VaR4 - ))", - function.data() - ); - NYql::TAstParseResult res = SqlToYql(query); - - UNIT_ASSERT(res.Root); - - TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { - Y_UNUSED(word); - UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var1" (String '"one")))"), "Cannot find `var1`"); - UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"VaR2" (String '"two")))"), "Cannot find `VaR2`"); - UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var3" (String '"three")))"), "Cannot find `var3`"); - UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"VaR4" (String '"four")))"), "Cannot find `VaR4`"); - }; - - TWordCountHive elementStat({"JsonVariables"}); - VerifyProgram(res, elementStat, verifyLine); - UNIT_ASSERT(elementStat["JsonVariables"] > 0); - } - } -} - -Y_UNIT_TEST_SUITE(MigrationToJsonApi) { - Y_UNIT_TEST(WarningOnDeprecatedJsonUdf) { - NYql::TAstParseResult res = SqlToYql(R"( - $json = CAST(@@{"key": 1234}@@ as Json); - SELECT Json::Parse($json); - )"); - - UNIT_ASSERT(res.Root); - UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:26: Warning: Json UDF is deprecated. Please use JSON API instead, code: 4506\n"); - } + +Y_UNIT_TEST_SUITE(JsonPassing) { + Y_UNIT_TEST(SupportedVariableTypes) { + const TVector<TString> functions = {"JSON_EXISTS", "JSON_VALUE", "JSON_QUERY"}; + + for (const auto& function : functions) { + const auto query = Sprintf(R"( + $json = CAST(@@{"key": 1238}@@ as Json); + SELECT %s( + $json, + "strict $.key" + PASSING + "string" as var1, + 1.234 as var2, + CAST(1 as Int64) as var3, + true as var4, + $json as var5 + ))", + function.data() + ); + NYql::TAstParseResult res = SqlToYql(query); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var1" (String '"string")))"), "Cannot find `var1`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var2" (Double '"1.234")))"), "Cannot find `var2`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var3" (SafeCast (Int32 '"1") (DataType 'Int64))))"), "Cannot find `var3`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var4" (Bool '"true")))"), "Cannot find `var4`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var5" (SafeCast (String '@@{"key": 1238}@@) (DataType 'Json))))"), "Cannot find `var5`"); + }; + + TWordCountHive elementStat({"JsonVariables"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonVariables"] > 0); + } + } + + Y_UNIT_TEST(ValidVariableNames) { + const TVector<TString> functions = {"JSON_EXISTS", "JSON_VALUE", "JSON_QUERY"}; + + for (const auto& function : functions) { + const auto query = Sprintf(R"( + $json = CAST(@@{"key": 1238}@@ as Json); + SELECT %s( + $json, + "strict $.key" + PASSING + "one" as var1, + "two" as "VaR2", + "three" as `var3`, + "four" as VaR4 + ))", + function.data() + ); + NYql::TAstParseResult res = SqlToYql(query); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var1" (String '"one")))"), "Cannot find `var1`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"VaR2" (String '"two")))"), "Cannot find `VaR2`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var3" (String '"three")))"), "Cannot find `var3`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"VaR4" (String '"four")))"), "Cannot find `VaR4`"); + }; + + TWordCountHive elementStat({"JsonVariables"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonVariables"] > 0); + } + } } + +Y_UNIT_TEST_SUITE(MigrationToJsonApi) { + Y_UNIT_TEST(WarningOnDeprecatedJsonUdf) { + NYql::TAstParseResult res = SqlToYql(R"( + $json = CAST(@@{"key": 1234}@@ as Json); + SELECT Json::Parse($json); + )"); + + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:26: Warning: Json UDF is deprecated. Please use JSON API instead, code: 4506\n"); + } +} Y_UNIT_TEST_SUITE(AnsiIdentsNegative) { Y_UNIT_TEST(EnableAnsiLexerFromRequestSpecialComments) { diff --git a/ydb/library/yql/udfs/common/json2/as_json_node.h b/ydb/library/yql/udfs/common/json2/as_json_node.h index 442e4a90ba..7d8a625b32 100644 --- a/ydb/library/yql/udfs/common/json2/as_json_node.h +++ b/ydb/library/yql/udfs/common/json2/as_json_node.h @@ -1,113 +1,113 @@ -#pragma once - -#include "resource.h" - +#pragma once + +#include "resource.h" + #include <ydb/library/yql/public/udf/udf_value.h> #include <ydb/library/yql/public/udf/udf_helpers.h> #include <ydb/library/yql/minikql/dom/node.h> #include <ydb/library/yql/minikql/dom/json.h> - -namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - using namespace NDom; - - template <typename TSource> - class TAsJsonNode: public TBoxedValue { - public: - TAsJsonNode(TSourcePosition pos) - : Pos_(pos) - { - } - - static TStringRef Name(); - - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; - } - - auto optionalSourceType = builder.Optional()->Item<TSource>().Build(); - auto resourceType = builder.Resource(JSON_NODE_RESOURCE_NAME); - builder.Args() - ->Add(optionalSourceType) - .Done() - .Returns(resourceType); - - if (!typesOnly) { - builder.Implementation(new TAsJsonNode<TSource>(builder.GetSourcePosition())); - } - return true; - } - - private: - const size_t MaxParseErrors = 10; - - static TUnboxedValue Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder); - - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - Y_UNUSED(valueBuilder); - try { - if (!args[0].HasValue()) { - return MakeEntity(); - } - return Interpret(args[0], valueBuilder); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); - } - } - - TSourcePosition Pos_; - }; - - template <> - TStringRef TAsJsonNode<TUtf8>::Name() { - return TStringRef::Of("Utf8AsJsonNode"); - } - - template <> - TUnboxedValue TAsJsonNode<TUtf8>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { - return MakeString(sourceValue.AsStringRef(), valueBuilder); - } - - template <> - TStringRef TAsJsonNode<double>::Name() { - return TStringRef::Of("DoubleAsJsonNode"); - } - - template <> - TUnboxedValue TAsJsonNode<double>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { - Y_UNUSED(valueBuilder); - return MakeDouble(sourceValue.Get<double>()); - } - - template <> - TStringRef TAsJsonNode<bool>::Name() { - return TStringRef::Of("BoolAsJsonNode"); - } - - template <> - TUnboxedValue TAsJsonNode<bool>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { - Y_UNUSED(valueBuilder); - return MakeBool(sourceValue.Get<bool>()); - } - - template <> - TStringRef TAsJsonNode<TJson>::Name() { - return TStringRef::Of("JsonAsJsonNode"); - } - - template <> - TUnboxedValue TAsJsonNode<TJson>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { - return TryParseJsonDom(sourceValue.AsStringRef(), valueBuilder); - } + +namespace NJson2Udf { + using namespace NKikimr; + using namespace NUdf; + using namespace NYql; + using namespace NDom; + + template <typename TSource> + class TAsJsonNode: public TBoxedValue { + public: + TAsJsonNode(TSourcePosition pos) + : Pos_(pos) + { + } + + static TStringRef Name(); + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } + + auto optionalSourceType = builder.Optional()->Item<TSource>().Build(); + auto resourceType = builder.Resource(JSON_NODE_RESOURCE_NAME); + builder.Args() + ->Add(optionalSourceType) + .Done() + .Returns(resourceType); + + if (!typesOnly) { + builder.Implementation(new TAsJsonNode<TSource>(builder.GetSourcePosition())); + } + return true; + } + + private: + const size_t MaxParseErrors = 10; + + static TUnboxedValue Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder); + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + if (!args[0].HasValue()) { + return MakeEntity(); + } + return Interpret(args[0], valueBuilder); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + TSourcePosition Pos_; + }; + + template <> + TStringRef TAsJsonNode<TUtf8>::Name() { + return TStringRef::Of("Utf8AsJsonNode"); + } + + template <> + TUnboxedValue TAsJsonNode<TUtf8>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { + return MakeString(sourceValue.AsStringRef(), valueBuilder); + } + + template <> + TStringRef TAsJsonNode<double>::Name() { + return TStringRef::Of("DoubleAsJsonNode"); + } + + template <> + TUnboxedValue TAsJsonNode<double>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { + Y_UNUSED(valueBuilder); + return MakeDouble(sourceValue.Get<double>()); + } + + template <> + TStringRef TAsJsonNode<bool>::Name() { + return TStringRef::Of("BoolAsJsonNode"); + } + + template <> + TUnboxedValue TAsJsonNode<bool>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { + Y_UNUSED(valueBuilder); + return MakeBool(sourceValue.Get<bool>()); + } + + template <> + TStringRef TAsJsonNode<TJson>::Name() { + return TStringRef::Of("JsonAsJsonNode"); + } + + template <> + TUnboxedValue TAsJsonNode<TJson>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { + return TryParseJsonDom(sourceValue.AsStringRef(), valueBuilder); + } } diff --git a/ydb/library/yql/udfs/common/json2/compile_path.h b/ydb/library/yql/udfs/common/json2/compile_path.h index 31edf265a8..cea9f1a8bb 100644 --- a/ydb/library/yql/udfs/common/json2/compile_path.h +++ b/ydb/library/yql/udfs/common/json2/compile_path.h @@ -1,70 +1,70 @@ -#pragma once - -#include "resource.h" - +#pragma once + +#include "resource.h" + #include <ydb/library/yql/public/udf/udf_value.h> #include <ydb/library/yql/public/udf/udf_helpers.h> - -namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - - class TCompilePath: public TBoxedValue { - public: - TCompilePath(TSourcePosition pos) - : Pos_(pos) - { - } - - static const TStringRef& Name() { - static auto name = TStringRef::Of("CompilePath"); - return name; - } - - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; - } - - auto resourceType = builder.Resource(JSONPATH_RESOURCE_NAME); - builder.Args() - ->Add<NUdf::TUtf8>() - .Done() - .Returns(resourceType); - - if (!typesOnly) { - builder.Implementation(new TCompilePath(builder.GetSourcePosition())); - } - return true; - } - - private: - const size_t MaxParseErrors = 10; - - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - Y_UNUSED(valueBuilder); - try { - TIssues issues; - const auto jsonPath = NJsonPath::ParseJsonPath(args[0].AsStringRef(), issues, MaxParseErrors); - if (!issues.Empty()) { - ythrow yexception() << "Error parsing jsonpath:" << Endl << issues.ToString(); - } - - return TUnboxedValuePod(new TJsonPathResource(jsonPath)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); - } - } - - TSourcePosition Pos_; - }; + +namespace NJson2Udf { + using namespace NKikimr; + using namespace NUdf; + using namespace NYql; + + class TCompilePath: public TBoxedValue { + public: + TCompilePath(TSourcePosition pos) + : Pos_(pos) + { + } + + static const TStringRef& Name() { + static auto name = TStringRef::Of("CompilePath"); + return name; + } + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } + + auto resourceType = builder.Resource(JSONPATH_RESOURCE_NAME); + builder.Args() + ->Add<NUdf::TUtf8>() + .Done() + .Returns(resourceType); + + if (!typesOnly) { + builder.Implementation(new TCompilePath(builder.GetSourcePosition())); + } + return true; + } + + private: + const size_t MaxParseErrors = 10; + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + TIssues issues; + const auto jsonPath = NJsonPath::ParseJsonPath(args[0].AsStringRef(), issues, MaxParseErrors); + if (!issues.Empty()) { + ythrow yexception() << "Error parsing jsonpath:" << Endl << issues.ToString(); + } + + return TUnboxedValuePod(new TJsonPathResource(jsonPath)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + TSourcePosition Pos_; + }; } diff --git a/ydb/library/yql/udfs/common/json2/json2_udf.cpp b/ydb/library/yql/udfs/common/json2/json2_udf.cpp index 761db67f64..a69e9c14b7 100644 --- a/ydb/library/yql/udfs/common/json2/json2_udf.cpp +++ b/ydb/library/yql/udfs/common/json2/json2_udf.cpp @@ -1,43 +1,43 @@ -#include "as_json_node.h" -#include "compile_path.h" -#include "parse.h" -#include "serialize.h" -#include "sql_exists.h" -#include "sql_query.h" -#include "sql_value.h" - +#include "as_json_node.h" +#include "compile_path.h" +#include "parse.h" +#include "serialize.h" +#include "sql_exists.h" +#include "sql_query.h" +#include "sql_value.h" + #include <ydb/library/yql/public/udf/udf_helpers.h> - -namespace NJson2Udf { - SIMPLE_MODULE(TJson2Module, - TParse, - TSerialize<EDataSlot::Json>, - TSerialize<EDataSlot::JsonDocument>, - TCompilePath, - TSqlValue<EDataSlot::Json, TUtf8>, - TSqlValue<EDataSlot::Json, TUtf8, true>, - TSqlValue<EDataSlot::Json, i64>, - TSqlValue<EDataSlot::Json, double>, - TSqlValue<EDataSlot::Json, bool>, - TSqlValue<EDataSlot::JsonDocument, TUtf8>, - TSqlValue<EDataSlot::JsonDocument, TUtf8, true>, - TSqlValue<EDataSlot::JsonDocument, i64>, - TSqlValue<EDataSlot::JsonDocument, double>, - TSqlValue<EDataSlot::JsonDocument, bool>, - TSqlExists<EDataSlot::Json, false>, - TSqlExists<EDataSlot::Json, true>, - TSqlExists<EDataSlot::JsonDocument, false>, - TSqlExists<EDataSlot::JsonDocument, true>, - TSqlQuery<EDataSlot::Json, EJsonQueryWrap::NoWrap>, - TSqlQuery<EDataSlot::Json, EJsonQueryWrap::Wrap>, - TSqlQuery<EDataSlot::Json, EJsonQueryWrap::ConditionalWrap>, - TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::NoWrap>, - TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::Wrap>, - TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::ConditionalWrap>, - TAsJsonNode<TUtf8>, - TAsJsonNode<double>, - TAsJsonNode<bool>, - TAsJsonNode<TJson>) -} - -REGISTER_MODULES(NJson2Udf::TJson2Module) + +namespace NJson2Udf { + SIMPLE_MODULE(TJson2Module, + TParse, + TSerialize<EDataSlot::Json>, + TSerialize<EDataSlot::JsonDocument>, + TCompilePath, + TSqlValue<EDataSlot::Json, TUtf8>, + TSqlValue<EDataSlot::Json, TUtf8, true>, + TSqlValue<EDataSlot::Json, i64>, + TSqlValue<EDataSlot::Json, double>, + TSqlValue<EDataSlot::Json, bool>, + TSqlValue<EDataSlot::JsonDocument, TUtf8>, + TSqlValue<EDataSlot::JsonDocument, TUtf8, true>, + TSqlValue<EDataSlot::JsonDocument, i64>, + TSqlValue<EDataSlot::JsonDocument, double>, + TSqlValue<EDataSlot::JsonDocument, bool>, + TSqlExists<EDataSlot::Json, false>, + TSqlExists<EDataSlot::Json, true>, + TSqlExists<EDataSlot::JsonDocument, false>, + TSqlExists<EDataSlot::JsonDocument, true>, + TSqlQuery<EDataSlot::Json, EJsonQueryWrap::NoWrap>, + TSqlQuery<EDataSlot::Json, EJsonQueryWrap::Wrap>, + TSqlQuery<EDataSlot::Json, EJsonQueryWrap::ConditionalWrap>, + TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::NoWrap>, + TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::Wrap>, + TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::ConditionalWrap>, + TAsJsonNode<TUtf8>, + TAsJsonNode<double>, + TAsJsonNode<bool>, + TAsJsonNode<TJson>) +} + +REGISTER_MODULES(NJson2Udf::TJson2Module) diff --git a/ydb/library/yql/udfs/common/json2/parse.h b/ydb/library/yql/udfs/common/json2/parse.h index 6139e724e2..01a6bb7676 100644 --- a/ydb/library/yql/udfs/common/json2/parse.h +++ b/ydb/library/yql/udfs/common/json2/parse.h @@ -1,66 +1,66 @@ -#pragma once - -#include "resource.h" - +#pragma once + +#include "resource.h" + #include <ydb/library/yql/public/udf/udf_value.h> #include <ydb/library/yql/public/udf/udf_helpers.h> #include <ydb/library/yql/minikql/dom/json.h> - + #include <library/cpp/json/json_reader.h> - -namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - using namespace NDom; - - class TParse: public TBoxedValue { - public: - TParse(TSourcePosition pos) - : Pos_(pos) - { - } - - static const TStringRef& Name() { - static auto name = TStringRef::Of("Parse"); - return name; - } - - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; - } - - builder.Args() - ->Add<TAutoMap<TJson>>() - .Done() - .Returns<TJsonNodeResource>(); - - if (!typesOnly) { - builder.Implementation(new TParse(builder.GetSourcePosition())); - } - return true; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - Y_UNUSED(valueBuilder); - try { - const auto json = args[0].AsStringRef(); - return TryParseJsonDom(json, valueBuilder); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); - } - } - - TSourcePosition Pos_; - }; + +namespace NJson2Udf { + using namespace NKikimr; + using namespace NUdf; + using namespace NYql; + using namespace NDom; + + class TParse: public TBoxedValue { + public: + TParse(TSourcePosition pos) + : Pos_(pos) + { + } + + static const TStringRef& Name() { + static auto name = TStringRef::Of("Parse"); + return name; + } + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } + + builder.Args() + ->Add<TAutoMap<TJson>>() + .Done() + .Returns<TJsonNodeResource>(); + + if (!typesOnly) { + builder.Implementation(new TParse(builder.GetSourcePosition())); + } + return true; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + const auto json = args[0].AsStringRef(); + return TryParseJsonDom(json, valueBuilder); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + TSourcePosition Pos_; + }; } diff --git a/ydb/library/yql/udfs/common/json2/resource.h b/ydb/library/yql/udfs/common/json2/resource.h index 912fd43a67..43b79fe710 100644 --- a/ydb/library/yql/udfs/common/json2/resource.h +++ b/ydb/library/yql/udfs/common/json2/resource.h @@ -1,17 +1,17 @@ -#pragma once - +#pragma once + #include <ydb/library/yql/public/udf/udf_value.h> #include <ydb/library/yql/minikql/jsonpath/jsonpath.h> - -namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - - extern const char JSONPATH_RESOURCE_NAME[] = "JsonPath"; - using TJsonPathResource = TBoxedResource<NJsonPath::TJsonPathPtr, JSONPATH_RESOURCE_NAME>; - - extern const char JSON_NODE_RESOURCE_NAME[] = "JsonNode"; - using TJsonNodeResource = TResource<JSON_NODE_RESOURCE_NAME>; + +namespace NJson2Udf { + using namespace NKikimr; + using namespace NUdf; + using namespace NYql; + + extern const char JSONPATH_RESOURCE_NAME[] = "JsonPath"; + using TJsonPathResource = TBoxedResource<NJsonPath::TJsonPathPtr, JSONPATH_RESOURCE_NAME>; + + extern const char JSON_NODE_RESOURCE_NAME[] = "JsonNode"; + using TJsonNodeResource = TResource<JSON_NODE_RESOURCE_NAME>; } diff --git a/ydb/library/yql/udfs/common/json2/serialize.h b/ydb/library/yql/udfs/common/json2/serialize.h index 4942ff6d1e..f075cce185 100644 --- a/ydb/library/yql/udfs/common/json2/serialize.h +++ b/ydb/library/yql/udfs/common/json2/serialize.h @@ -1,89 +1,89 @@ -#pragma once - -#include "resource.h" - +#pragma once + +#include "resource.h" + #include <ydb/library/yql/public/udf/udf_value.h> #include <ydb/library/yql/public/udf/udf_helpers.h> #include <ydb/library/yql/minikql/dom/json.h> - + #include <ydb/library/binary_json/write.h> - -namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - using namespace NDom; - using namespace NBinaryJson; - - template <EDataSlot ResultType> - class TSerialize : public TBoxedValue { - public: - TSerialize(TSourcePosition pos) - : Pos_(pos) - { - } - - static const TStringRef& Name(); - - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; - } - - TType* resultType = nullptr; - if constexpr (ResultType == EDataSlot::Json) { - resultType = builder.SimpleType<TJson>(); - } else { - resultType = builder.SimpleType<TJsonDocument>(); - } - - builder.Args() - ->Add<TAutoMap<TJsonNodeResource>>() - .Done() - .Returns(resultType); - - if (!typesOnly) { - builder.Implementation(new TSerialize(builder.GetSourcePosition())); - } - return true; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - try { - const TUnboxedValue& jsonDom = args[0]; - - if constexpr (ResultType == EDataSlot::Json) { - return valueBuilder->NewString(SerializeJsonDom(jsonDom)); - } else { - const auto binaryJson = SerializeToBinaryJson(jsonDom); - return valueBuilder->NewString(TStringBuf(binaryJson.Data(), binaryJson.Size())); - } - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); - } - } - - TSourcePosition Pos_; - }; - - template <> - const TStringRef& TSerialize<EDataSlot::Json>::Name() { - static auto name = TStringRef::Of("Serialize"); - return name; - } - - template <> - const TStringRef& TSerialize<EDataSlot::JsonDocument>::Name() { - static auto name = TStringRef::Of("SerializeToJsonDocument"); - return name; - } + +namespace NJson2Udf { + using namespace NKikimr; + using namespace NUdf; + using namespace NYql; + using namespace NDom; + using namespace NBinaryJson; + + template <EDataSlot ResultType> + class TSerialize : public TBoxedValue { + public: + TSerialize(TSourcePosition pos) + : Pos_(pos) + { + } + + static const TStringRef& Name(); + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } + + TType* resultType = nullptr; + if constexpr (ResultType == EDataSlot::Json) { + resultType = builder.SimpleType<TJson>(); + } else { + resultType = builder.SimpleType<TJsonDocument>(); + } + + builder.Args() + ->Add<TAutoMap<TJsonNodeResource>>() + .Done() + .Returns(resultType); + + if (!typesOnly) { + builder.Implementation(new TSerialize(builder.GetSourcePosition())); + } + return true; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + try { + const TUnboxedValue& jsonDom = args[0]; + + if constexpr (ResultType == EDataSlot::Json) { + return valueBuilder->NewString(SerializeJsonDom(jsonDom)); + } else { + const auto binaryJson = SerializeToBinaryJson(jsonDom); + return valueBuilder->NewString(TStringBuf(binaryJson.Data(), binaryJson.Size())); + } + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + TSourcePosition Pos_; + }; + + template <> + const TStringRef& TSerialize<EDataSlot::Json>::Name() { + static auto name = TStringRef::Of("Serialize"); + return name; + } + + template <> + const TStringRef& TSerialize<EDataSlot::JsonDocument>::Name() { + static auto name = TStringRef::Of("SerializeToJsonDocument"); + return name; + } } diff --git a/ydb/library/yql/udfs/common/json2/sql_exists.h b/ydb/library/yql/udfs/common/json2/sql_exists.h index 32d5b84897..e1eb1c75fc 100644 --- a/ydb/library/yql/udfs/common/json2/sql_exists.h +++ b/ydb/library/yql/udfs/common/json2/sql_exists.h @@ -1,132 +1,132 @@ -#pragma once - -#include "resource.h" -#include "compile_path.h" - +#pragma once + +#include "resource.h" +#include "compile_path.h" + #include <ydb/library/yql/public/udf/udf_type_builder.h> #include <ydb/library/yql/public/udf/udf_value.h> #include <ydb/library/yql/public/udf/udf_helpers.h> - -#include <util/generic/yexception.h> - -namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - using namespace NJsonPath; - - template <EDataSlot InputType, bool ThrowException> - class TSqlExists: public TBoxedValue { - public: - explicit TSqlExists(TSourcePosition pos) - : Pos_(pos) - { - } - - static TStringRef Name(); - - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; - } - - auto jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); - TType* inputType = nullptr; - if constexpr (InputType == EDataSlot::JsonDocument) { - inputType = builder.SimpleType<TJsonDocument>(); - } else { - inputType = jsonType; - } - auto inputOptionalType = builder.Optional()->Item(inputType).Build(); - auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); - auto dictType = builder.Dict()->Key<TUtf8>().Value(jsonType).Build(); - auto optionalBoolType = builder.Optional()->Item<bool>().Build(); - - if constexpr (ThrowException) { - builder.Args() - ->Add(inputOptionalType) - .Add(jsonPathType) - .Add(dictType) - .Done() - .Returns(optionalBoolType); - } else { - builder.Args() - ->Add(inputOptionalType) - .Add(jsonPathType) - .Add(dictType) - .Add(optionalBoolType) - .Done() - .Returns(optionalBoolType); - } - - if (!typesOnly) { - builder.Implementation(new TSqlExists(builder.GetSourcePosition())); - } - return true; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - Y_UNUSED(valueBuilder); - try { - if (!args[0].HasValue()) { - return TUnboxedValuePod(); - } - - TValue jsonDom; - if constexpr (InputType == EDataSlot::JsonDocument) { - jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); - } else { - jsonDom = TValue(args[0]); - } - - auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); - const auto& jsonPath = *jsonPathResource->Get(); - const auto variables = DictToVariables(args[2]); - - const auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); - if (result.IsError()) { - if constexpr (ThrowException) { - ythrow yexception() << "Error executing jsonpath:" << Endl << result.GetError() << Endl; - } else { - return args[3]; - } - } - - return TUnboxedValuePod(!result.GetNodes().empty()); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); - } - } - - TSourcePosition Pos_; - }; - - template <> - TStringRef TSqlExists<EDataSlot::Json, false>::Name() { - return "SqlExists"; - } - - template <> - TStringRef TSqlExists<EDataSlot::Json, true>::Name() { - return "SqlTryExists"; - } - - template <> - TStringRef TSqlExists<EDataSlot::JsonDocument, false>::Name() { - return "JsonDocumentSqlExists"; - } - - template <> - TStringRef TSqlExists<EDataSlot::JsonDocument, true>::Name() { - return "JsonDocumentSqlTryExists"; - } + +#include <util/generic/yexception.h> + +namespace NJson2Udf { + using namespace NKikimr; + using namespace NUdf; + using namespace NYql; + using namespace NJsonPath; + + template <EDataSlot InputType, bool ThrowException> + class TSqlExists: public TBoxedValue { + public: + explicit TSqlExists(TSourcePosition pos) + : Pos_(pos) + { + } + + static TStringRef Name(); + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } + + auto jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); + TType* inputType = nullptr; + if constexpr (InputType == EDataSlot::JsonDocument) { + inputType = builder.SimpleType<TJsonDocument>(); + } else { + inputType = jsonType; + } + auto inputOptionalType = builder.Optional()->Item(inputType).Build(); + auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); + auto dictType = builder.Dict()->Key<TUtf8>().Value(jsonType).Build(); + auto optionalBoolType = builder.Optional()->Item<bool>().Build(); + + if constexpr (ThrowException) { + builder.Args() + ->Add(inputOptionalType) + .Add(jsonPathType) + .Add(dictType) + .Done() + .Returns(optionalBoolType); + } else { + builder.Args() + ->Add(inputOptionalType) + .Add(jsonPathType) + .Add(dictType) + .Add(optionalBoolType) + .Done() + .Returns(optionalBoolType); + } + + if (!typesOnly) { + builder.Implementation(new TSqlExists(builder.GetSourcePosition())); + } + return true; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + if (!args[0].HasValue()) { + return TUnboxedValuePod(); + } + + TValue jsonDom; + if constexpr (InputType == EDataSlot::JsonDocument) { + jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); + } else { + jsonDom = TValue(args[0]); + } + + auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); + const auto& jsonPath = *jsonPathResource->Get(); + const auto variables = DictToVariables(args[2]); + + const auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); + if (result.IsError()) { + if constexpr (ThrowException) { + ythrow yexception() << "Error executing jsonpath:" << Endl << result.GetError() << Endl; + } else { + return args[3]; + } + } + + return TUnboxedValuePod(!result.GetNodes().empty()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + TSourcePosition Pos_; + }; + + template <> + TStringRef TSqlExists<EDataSlot::Json, false>::Name() { + return "SqlExists"; + } + + template <> + TStringRef TSqlExists<EDataSlot::Json, true>::Name() { + return "SqlTryExists"; + } + + template <> + TStringRef TSqlExists<EDataSlot::JsonDocument, false>::Name() { + return "JsonDocumentSqlExists"; + } + + template <> + TStringRef TSqlExists<EDataSlot::JsonDocument, true>::Name() { + return "JsonDocumentSqlTryExists"; + } } diff --git a/ydb/library/yql/udfs/common/json2/sql_query.h b/ydb/library/yql/udfs/common/json2/sql_query.h index 85ed288fec..f681c562a4 100644 --- a/ydb/library/yql/udfs/common/json2/sql_query.h +++ b/ydb/library/yql/udfs/common/json2/sql_query.h @@ -1,182 +1,182 @@ -#pragma once - -#include "resource.h" -#include "compile_path.h" - +#pragma once + +#include "resource.h" +#include "compile_path.h" + #include <ydb/library/yql/core/yql_atom_enums.h> #include <ydb/library/yql/public/udf/udf_type_builder.h> #include <ydb/library/yql/public/udf/udf_value.h> #include <ydb/library/yql/public/udf/udf_helpers.h> #include <ydb/library/yql/minikql/dom/node.h> - -#include <util/generic/yexception.h> - -namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - using namespace NDom; - using namespace NJsonPath; - - template <EDataSlot InputType, EJsonQueryWrap Mode> - class TSqlQuery: public TBoxedValue { - public: - explicit TSqlQuery(TSourcePosition pos) - : Pos_(pos) - { - } - - static TStringRef Name(); - - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; - } - - auto jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); - auto optionalJsonType = builder.Optional()->Item(jsonType).Build(); - TType* inputType = nullptr; - if constexpr (InputType == EDataSlot::JsonDocument) { - inputType = builder.SimpleType<TJsonDocument>(); - } else { - inputType = jsonType; - } - auto inputOptionalType = builder.Optional()->Item(inputType).Build(); - auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); - auto dictType = builder.Dict()->Key<TUtf8>().Value(jsonType).Build(); - - /* - Arguments: - 0. Resource<JsonNode>? or JsonDocument?. Input json - 1. Resource<JsonPath>. Jsonpath to execute on json - 2. Dict<TUtf8, Resource<JsonNode>>. Variables to pass into jsonpath - 3. Bool. True - throw on empty result, false otherwise - 4. Resource<JsonNode>?. Default value to return on empty result. Ignored if 2d argument is true - 5. Bool. True - throw on error, false - otherwise - 6. Resource<JsonNode>?. Default value to return on error. Ignored if 4th argument is true - */ - builder.Args() - ->Add(inputOptionalType) - .Add(jsonPathType) - .Add(dictType) - .Add<bool>() - .Add(optionalJsonType) - .Add<bool>() - .Add(optionalJsonType) - .Done() - .Returns(optionalJsonType); - - if (!typesOnly) { - builder.Implementation(new TSqlQuery(builder.GetSourcePosition())); - } - return true; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - Y_UNUSED(valueBuilder); - try { - if (!args[0].HasValue()) { - return TUnboxedValuePod(); - } - - TValue jsonDom; - if constexpr (InputType == EDataSlot::JsonDocument) { - jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); - } else { - jsonDom = TValue(args[0]); - } - - auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); - const auto& jsonPath = *jsonPathResource->Get(); - - const bool throwOnEmpty = args[3].Get<bool>(); - const auto emptyDefault = args[4]; - const bool throwOnError = args[5].Get<bool>(); - const auto errorDefault = args[6]; - const auto variables = DictToVariables(args[2]); - - auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); - - const auto handleCase = [](TStringBuf message, bool throws, const TUnboxedValuePod& caseDefault) { - if (throws) { - ythrow yexception() << message; - } - return caseDefault; - }; - - if (result.IsError()) { - return handleCase(TStringBuilder() << "Error executing jsonpath:" << Endl << result.GetError() << Endl, throwOnError, errorDefault); - } - - auto& nodes = result.GetNodes(); - const bool isSingleStruct = nodes.size() == 1 && (nodes[0].Is(EValueType::Array) || nodes[0].Is(EValueType::Object)); - if (Mode == EJsonQueryWrap::Wrap || (Mode == EJsonQueryWrap::ConditionalWrap && !isSingleStruct)) { - TVector<TUnboxedValue> converted; - converted.reserve(nodes.size()); - for (auto& node : nodes) { - converted.push_back(node.ConvertToUnboxedValue(valueBuilder)); - } - return MakeList(converted.data(), converted.size(), valueBuilder); - } - - if (nodes.empty()) { - return handleCase("Empty result", throwOnEmpty, emptyDefault); - } - - // No wrapping is applicable and result is not empty. Result must be a single object or array - if (nodes.size() > 1) { - return handleCase("Result consists of multiple items", throwOnError, errorDefault); - } - - if (!nodes[0].Is(EValueType::Array) && !nodes[0].Is(EValueType::Object)) { - return handleCase("Result is neither object nor array", throwOnError, errorDefault); - } - - return nodes[0].ConvertToUnboxedValue(valueBuilder); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); - } - } - - TSourcePosition Pos_; - }; - - template <> - TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::NoWrap>::Name() { - return "SqlQuery"; - } - - template <> - TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::Wrap>::Name() { - return "SqlQueryWrap"; - } - - template <> - TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::ConditionalWrap>::Name() { - return "SqlQueryConditionalWrap"; - } - - template <> - TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::NoWrap>::Name() { - return "JsonDocumentSqlQuery"; - } - - template <> - TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::Wrap>::Name() { - return "JsonDocumentSqlQueryWrap"; - } - - template <> - TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::ConditionalWrap>::Name() { - return "JsonDocumentSqlQueryConditionalWrap"; - } + +#include <util/generic/yexception.h> + +namespace NJson2Udf { + using namespace NKikimr; + using namespace NUdf; + using namespace NYql; + using namespace NDom; + using namespace NJsonPath; + + template <EDataSlot InputType, EJsonQueryWrap Mode> + class TSqlQuery: public TBoxedValue { + public: + explicit TSqlQuery(TSourcePosition pos) + : Pos_(pos) + { + } + + static TStringRef Name(); + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } + + auto jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); + auto optionalJsonType = builder.Optional()->Item(jsonType).Build(); + TType* inputType = nullptr; + if constexpr (InputType == EDataSlot::JsonDocument) { + inputType = builder.SimpleType<TJsonDocument>(); + } else { + inputType = jsonType; + } + auto inputOptionalType = builder.Optional()->Item(inputType).Build(); + auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); + auto dictType = builder.Dict()->Key<TUtf8>().Value(jsonType).Build(); + + /* + Arguments: + 0. Resource<JsonNode>? or JsonDocument?. Input json + 1. Resource<JsonPath>. Jsonpath to execute on json + 2. Dict<TUtf8, Resource<JsonNode>>. Variables to pass into jsonpath + 3. Bool. True - throw on empty result, false otherwise + 4. Resource<JsonNode>?. Default value to return on empty result. Ignored if 2d argument is true + 5. Bool. True - throw on error, false - otherwise + 6. Resource<JsonNode>?. Default value to return on error. Ignored if 4th argument is true + */ + builder.Args() + ->Add(inputOptionalType) + .Add(jsonPathType) + .Add(dictType) + .Add<bool>() + .Add(optionalJsonType) + .Add<bool>() + .Add(optionalJsonType) + .Done() + .Returns(optionalJsonType); + + if (!typesOnly) { + builder.Implementation(new TSqlQuery(builder.GetSourcePosition())); + } + return true; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + if (!args[0].HasValue()) { + return TUnboxedValuePod(); + } + + TValue jsonDom; + if constexpr (InputType == EDataSlot::JsonDocument) { + jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); + } else { + jsonDom = TValue(args[0]); + } + + auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); + const auto& jsonPath = *jsonPathResource->Get(); + + const bool throwOnEmpty = args[3].Get<bool>(); + const auto emptyDefault = args[4]; + const bool throwOnError = args[5].Get<bool>(); + const auto errorDefault = args[6]; + const auto variables = DictToVariables(args[2]); + + auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); + + const auto handleCase = [](TStringBuf message, bool throws, const TUnboxedValuePod& caseDefault) { + if (throws) { + ythrow yexception() << message; + } + return caseDefault; + }; + + if (result.IsError()) { + return handleCase(TStringBuilder() << "Error executing jsonpath:" << Endl << result.GetError() << Endl, throwOnError, errorDefault); + } + + auto& nodes = result.GetNodes(); + const bool isSingleStruct = nodes.size() == 1 && (nodes[0].Is(EValueType::Array) || nodes[0].Is(EValueType::Object)); + if (Mode == EJsonQueryWrap::Wrap || (Mode == EJsonQueryWrap::ConditionalWrap && !isSingleStruct)) { + TVector<TUnboxedValue> converted; + converted.reserve(nodes.size()); + for (auto& node : nodes) { + converted.push_back(node.ConvertToUnboxedValue(valueBuilder)); + } + return MakeList(converted.data(), converted.size(), valueBuilder); + } + + if (nodes.empty()) { + return handleCase("Empty result", throwOnEmpty, emptyDefault); + } + + // No wrapping is applicable and result is not empty. Result must be a single object or array + if (nodes.size() > 1) { + return handleCase("Result consists of multiple items", throwOnError, errorDefault); + } + + if (!nodes[0].Is(EValueType::Array) && !nodes[0].Is(EValueType::Object)) { + return handleCase("Result is neither object nor array", throwOnError, errorDefault); + } + + return nodes[0].ConvertToUnboxedValue(valueBuilder); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + TSourcePosition Pos_; + }; + + template <> + TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::NoWrap>::Name() { + return "SqlQuery"; + } + + template <> + TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::Wrap>::Name() { + return "SqlQueryWrap"; + } + + template <> + TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::ConditionalWrap>::Name() { + return "SqlQueryConditionalWrap"; + } + + template <> + TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::NoWrap>::Name() { + return "JsonDocumentSqlQuery"; + } + + template <> + TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::Wrap>::Name() { + return "JsonDocumentSqlQueryWrap"; + } + + template <> + TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::ConditionalWrap>::Name() { + return "JsonDocumentSqlQueryConditionalWrap"; + } } diff --git a/ydb/library/yql/udfs/common/json2/sql_value.h b/ydb/library/yql/udfs/common/json2/sql_value.h index 5960c0749e..dd9613285d 100644 --- a/ydb/library/yql/udfs/common/json2/sql_value.h +++ b/ydb/library/yql/udfs/common/json2/sql_value.h @@ -1,294 +1,294 @@ -#pragma once - -#include "resource.h" -#include "compile_path.h" - +#pragma once + +#include "resource.h" +#include "compile_path.h" + #include <ydb/library/yql/public/udf/udf_type_builder.h> #include <ydb/library/yql/public/udf/udf_value.h> #include <ydb/library/yql/public/udf/udf_helpers.h> #include <ydb/library/yql/minikql/dom/node.h> - + #include <ydb/library/binary_json/read.h> - -#include <util/generic/yexception.h> -#include <util/generic/ylimits.h> + +#include <util/generic/yexception.h> +#include <util/generic/ylimits.h> #include <util/string/cast.h> - -namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - using namespace NDom; - using namespace NJsonPath; - - namespace { - template <class TValueType, bool ForceConvert = false> - TUnboxedValue TryConvertJson(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { - Y_UNUSED(valueBuilder); - Y_UNUSED(source); - Y_FAIL("Unsupported type"); - } - - template <> - TUnboxedValue TryConvertJson<TUtf8>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { - Y_UNUSED(valueBuilder); - if (IsNodeType(source, ENodeType::String)) { - return source; - } - return {}; - } - - template <> - TUnboxedValue TryConvertJson<TUtf8, true>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { - switch (GetNodeType(source)) { - case ENodeType::String: - return source; - case ENodeType::Uint64: - return valueBuilder->NewString(ToString(source.Get<ui64>())).Release(); - case ENodeType::Int64: - return valueBuilder->NewString(ToString(source.Get<i64>())).Release(); - case ENodeType::Bool: - return source.Get<bool>() ? TUnboxedValuePod::Embedded("true") : TUnboxedValuePod::Embedded("false"); - case ENodeType::Double: - return valueBuilder->NewString(ToString(source.Get<double>())).Release(); - case ENodeType::Entity: - return TUnboxedValuePod::Embedded("null"); - case ENodeType::List: - case ENodeType::Dict: - case ENodeType::Attr: - return {}; - } - } - - template <> - TUnboxedValue TryConvertJson<i64>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { - Y_UNUSED(valueBuilder); - if (!source.IsEmbedded()) { - return {}; - } - - if (IsNodeType(source, ENodeType::Int64)) { - return TUnboxedValuePod(source.Get<i64>()); - } else if (IsNodeType(source, ENodeType::Uint64) && source.Get<ui64>() < Max<i64>()) { - return TUnboxedValuePod(static_cast<i64>(source.Get<ui64>())); - } else if (IsNodeType(source, ENodeType::Double) && static_cast<i64>(source.Get<double>()) == source.Get<double>()) { - return TUnboxedValuePod(static_cast<i64>(source.Get<double>())); - } - - return {}; - } - - template <> - TUnboxedValue TryConvertJson<double>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { - Y_UNUSED(valueBuilder); - if (!source.IsEmbedded()) { - return {}; - } - - if (IsNodeType(source, ENodeType::Double)) { - return TUnboxedValuePod(source.Get<double>()); - } else if (IsNodeType(source, ENodeType::Int64)) { - return TUnboxedValuePod(static_cast<double>(source.Get<i64>())); - } else if (IsNodeType(source, ENodeType::Uint64)) { - return TUnboxedValuePod(static_cast<double>(source.Get<ui64>())); - } - - return {}; - } - - template <> - TUnboxedValue TryConvertJson<bool>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { - Y_UNUSED(valueBuilder); - if (!source.IsEmbedded() || !IsNodeType(source, ENodeType::Bool)) { - return {}; - } - return {TUnboxedValuePod(source.Get<bool>())}; - } - } - - template <EDataSlot InputType, class TValueType, bool ForceConvert = false> - class TSqlValue: public TBoxedValue { - public: - enum class TErrorCode : ui8 { - Empty = 0, - Error = 1 - }; - - TSqlValue(TSourcePosition pos) - : Pos_(pos) - { - } - - static TStringRef Name(); - - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; - } - - auto optionalValueType = builder.Optional()->Item<TValueType>().Build(); - auto errorTupleType = builder.Tuple(2)->Add<ui8>().Add<char*>().Build(); - auto returnTypeTuple = builder.Tuple(2) - ->Add(errorTupleType) - .Add(optionalValueType) - .Build(); - auto returnType = builder.Variant()->Over(returnTypeTuple).Build(); - - TType* jsonType = nullptr; - if constexpr (InputType == EDataSlot::Json) { - jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); - } else { - jsonType = builder.SimpleType<TJsonDocument>(); - } - auto optionalJsonType = builder.Optional()->Item(jsonType).Build(); - auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); - auto dictType = builder.Dict()->Key<TUtf8>().Value(builder.Resource(JSON_NODE_RESOURCE_NAME)).Build(); - - builder.Args() - ->Add(optionalJsonType) - .Add(jsonPathType) - .Add(dictType) - .Done() - .Returns(returnType); - - if (!typesOnly) { - builder.Implementation(new TSqlValue(builder.GetSourcePosition())); - } - return true; - } - - private: - TUnboxedValue BuildErrorResult(const IValueBuilder* valueBuilder, TErrorCode code, const TStringBuf message) const { - TUnboxedValue* items = nullptr; - auto errorTuple = valueBuilder->NewArray(2, items); - items[0] = TUnboxedValuePod(static_cast<ui8>(code)); - items[1] = valueBuilder->NewString(message); - return valueBuilder->NewVariant(0, std::move(errorTuple)); - } - - TUnboxedValue BuildSuccessfulResult(const IValueBuilder* valueBuilder, TUnboxedValue&& value) const { - return valueBuilder->NewVariant(1, std::move(value)); - } - - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - try { - if (!args[0].HasValue()) { - return BuildSuccessfulResult(valueBuilder, TUnboxedValuePod()); - } - - TValue jsonDom; - if constexpr (InputType == EDataSlot::JsonDocument) { - jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); - } else { - jsonDom = TValue(args[0]); - } - - auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); - const auto& jsonPath = *jsonPathResource->Get(); - const auto variables = DictToVariables(args[2]); - - const auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); - - if (result.IsError()) { - return BuildErrorResult(valueBuilder, TErrorCode::Error, TStringBuilder() << "Error executing jsonpath:" << Endl << result.GetError() << Endl); - } - - const auto& nodes = result.GetNodes(); - if (nodes.empty()) { - return BuildErrorResult(valueBuilder, TErrorCode::Empty, "Result is empty"); - } - - if (nodes.size() > 1) { - return BuildErrorResult(valueBuilder, TErrorCode::Error, "Result consists of multiple items"); - } - - const auto& value = nodes[0]; - if (value.Is(EValueType::Array) || value.Is(EValueType::Object)) { - // SqlValue can return only scalar values - return BuildErrorResult(valueBuilder, TErrorCode::Error, "Extracted JSON value is either object or array"); - } - - if (value.Is(EValueType::Null)) { - // JSON nulls must be converted to SQL nulls - return BuildSuccessfulResult(valueBuilder, TUnboxedValuePod()); - } - - const auto source = value.ConvertToUnboxedValue(valueBuilder); - TUnboxedValue convertedValue = TryConvertJson<TValueType, ForceConvert>(valueBuilder, source); - if (!convertedValue) { - // error while converting JSON value type to TValueType - return BuildErrorResult(valueBuilder, TErrorCode::Error, "Cannot convert extracted JSON value to target type"); - } - - return BuildSuccessfulResult(valueBuilder, std::move(convertedValue)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); - } - } - - TSourcePosition Pos_; - }; - - template <EDataSlot InputType, class TValueType, bool ForceConvert> - TStringRef TSqlValue<InputType, TValueType, ForceConvert>::Name() { - Y_FAIL("Unknown name"); - } - - template<> - TStringRef TSqlValue<EDataSlot::Json, TUtf8, true>::Name() { - return TStringRef::Of("SqlValueConvertToUtf8"); - } - - template <> - TStringRef TSqlValue<EDataSlot::Json, TUtf8>::Name() { - return TStringRef::Of("SqlValueUtf8"); - } - - template <> - TStringRef TSqlValue<EDataSlot::Json, i64>::Name() { - return TStringRef::Of("SqlValueInt64"); - } - - template <> - TStringRef TSqlValue<EDataSlot::Json, double>::Name() { - return TStringRef::Of("SqlValueNumber"); - } - - template <> - TStringRef TSqlValue<EDataSlot::Json, bool>::Name() { - return TStringRef::Of("SqlValueBool"); - } - - template<> - TStringRef TSqlValue<EDataSlot::JsonDocument, TUtf8, true>::Name() { - return TStringRef::Of("JsonDocumentSqlValueConvertToUtf8"); - } - - template <> - TStringRef TSqlValue<EDataSlot::JsonDocument, TUtf8>::Name() { - return TStringRef::Of("JsonDocumentSqlValueUtf8"); - } - - template <> - TStringRef TSqlValue<EDataSlot::JsonDocument, i64>::Name() { - return TStringRef::Of("JsonDocumentSqlValueInt64"); - } - - template <> - TStringRef TSqlValue<EDataSlot::JsonDocument, double>::Name() { - return TStringRef::Of("JsonDocumentSqlValueNumber"); - } - - template <> - TStringRef TSqlValue<EDataSlot::JsonDocument, bool>::Name() { - return TStringRef::Of("JsonDocumentSqlValueBool"); - } - + +namespace NJson2Udf { + using namespace NKikimr; + using namespace NUdf; + using namespace NYql; + using namespace NDom; + using namespace NJsonPath; + + namespace { + template <class TValueType, bool ForceConvert = false> + TUnboxedValue TryConvertJson(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + Y_UNUSED(source); + Y_FAIL("Unsupported type"); + } + + template <> + TUnboxedValue TryConvertJson<TUtf8>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + if (IsNodeType(source, ENodeType::String)) { + return source; + } + return {}; + } + + template <> + TUnboxedValue TryConvertJson<TUtf8, true>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + switch (GetNodeType(source)) { + case ENodeType::String: + return source; + case ENodeType::Uint64: + return valueBuilder->NewString(ToString(source.Get<ui64>())).Release(); + case ENodeType::Int64: + return valueBuilder->NewString(ToString(source.Get<i64>())).Release(); + case ENodeType::Bool: + return source.Get<bool>() ? TUnboxedValuePod::Embedded("true") : TUnboxedValuePod::Embedded("false"); + case ENodeType::Double: + return valueBuilder->NewString(ToString(source.Get<double>())).Release(); + case ENodeType::Entity: + return TUnboxedValuePod::Embedded("null"); + case ENodeType::List: + case ENodeType::Dict: + case ENodeType::Attr: + return {}; + } + } + + template <> + TUnboxedValue TryConvertJson<i64>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + if (!source.IsEmbedded()) { + return {}; + } + + if (IsNodeType(source, ENodeType::Int64)) { + return TUnboxedValuePod(source.Get<i64>()); + } else if (IsNodeType(source, ENodeType::Uint64) && source.Get<ui64>() < Max<i64>()) { + return TUnboxedValuePod(static_cast<i64>(source.Get<ui64>())); + } else if (IsNodeType(source, ENodeType::Double) && static_cast<i64>(source.Get<double>()) == source.Get<double>()) { + return TUnboxedValuePod(static_cast<i64>(source.Get<double>())); + } + + return {}; + } + + template <> + TUnboxedValue TryConvertJson<double>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + if (!source.IsEmbedded()) { + return {}; + } + + if (IsNodeType(source, ENodeType::Double)) { + return TUnboxedValuePod(source.Get<double>()); + } else if (IsNodeType(source, ENodeType::Int64)) { + return TUnboxedValuePod(static_cast<double>(source.Get<i64>())); + } else if (IsNodeType(source, ENodeType::Uint64)) { + return TUnboxedValuePod(static_cast<double>(source.Get<ui64>())); + } + + return {}; + } + + template <> + TUnboxedValue TryConvertJson<bool>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + if (!source.IsEmbedded() || !IsNodeType(source, ENodeType::Bool)) { + return {}; + } + return {TUnboxedValuePod(source.Get<bool>())}; + } + } + + template <EDataSlot InputType, class TValueType, bool ForceConvert = false> + class TSqlValue: public TBoxedValue { + public: + enum class TErrorCode : ui8 { + Empty = 0, + Error = 1 + }; + + TSqlValue(TSourcePosition pos) + : Pos_(pos) + { + } + + static TStringRef Name(); + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } + + auto optionalValueType = builder.Optional()->Item<TValueType>().Build(); + auto errorTupleType = builder.Tuple(2)->Add<ui8>().Add<char*>().Build(); + auto returnTypeTuple = builder.Tuple(2) + ->Add(errorTupleType) + .Add(optionalValueType) + .Build(); + auto returnType = builder.Variant()->Over(returnTypeTuple).Build(); + + TType* jsonType = nullptr; + if constexpr (InputType == EDataSlot::Json) { + jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); + } else { + jsonType = builder.SimpleType<TJsonDocument>(); + } + auto optionalJsonType = builder.Optional()->Item(jsonType).Build(); + auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); + auto dictType = builder.Dict()->Key<TUtf8>().Value(builder.Resource(JSON_NODE_RESOURCE_NAME)).Build(); + + builder.Args() + ->Add(optionalJsonType) + .Add(jsonPathType) + .Add(dictType) + .Done() + .Returns(returnType); + + if (!typesOnly) { + builder.Implementation(new TSqlValue(builder.GetSourcePosition())); + } + return true; + } + + private: + TUnboxedValue BuildErrorResult(const IValueBuilder* valueBuilder, TErrorCode code, const TStringBuf message) const { + TUnboxedValue* items = nullptr; + auto errorTuple = valueBuilder->NewArray(2, items); + items[0] = TUnboxedValuePod(static_cast<ui8>(code)); + items[1] = valueBuilder->NewString(message); + return valueBuilder->NewVariant(0, std::move(errorTuple)); + } + + TUnboxedValue BuildSuccessfulResult(const IValueBuilder* valueBuilder, TUnboxedValue&& value) const { + return valueBuilder->NewVariant(1, std::move(value)); + } + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + try { + if (!args[0].HasValue()) { + return BuildSuccessfulResult(valueBuilder, TUnboxedValuePod()); + } + + TValue jsonDom; + if constexpr (InputType == EDataSlot::JsonDocument) { + jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); + } else { + jsonDom = TValue(args[0]); + } + + auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); + const auto& jsonPath = *jsonPathResource->Get(); + const auto variables = DictToVariables(args[2]); + + const auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); + + if (result.IsError()) { + return BuildErrorResult(valueBuilder, TErrorCode::Error, TStringBuilder() << "Error executing jsonpath:" << Endl << result.GetError() << Endl); + } + + const auto& nodes = result.GetNodes(); + if (nodes.empty()) { + return BuildErrorResult(valueBuilder, TErrorCode::Empty, "Result is empty"); + } + + if (nodes.size() > 1) { + return BuildErrorResult(valueBuilder, TErrorCode::Error, "Result consists of multiple items"); + } + + const auto& value = nodes[0]; + if (value.Is(EValueType::Array) || value.Is(EValueType::Object)) { + // SqlValue can return only scalar values + return BuildErrorResult(valueBuilder, TErrorCode::Error, "Extracted JSON value is either object or array"); + } + + if (value.Is(EValueType::Null)) { + // JSON nulls must be converted to SQL nulls + return BuildSuccessfulResult(valueBuilder, TUnboxedValuePod()); + } + + const auto source = value.ConvertToUnboxedValue(valueBuilder); + TUnboxedValue convertedValue = TryConvertJson<TValueType, ForceConvert>(valueBuilder, source); + if (!convertedValue) { + // error while converting JSON value type to TValueType + return BuildErrorResult(valueBuilder, TErrorCode::Error, "Cannot convert extracted JSON value to target type"); + } + + return BuildSuccessfulResult(valueBuilder, std::move(convertedValue)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + TSourcePosition Pos_; + }; + + template <EDataSlot InputType, class TValueType, bool ForceConvert> + TStringRef TSqlValue<InputType, TValueType, ForceConvert>::Name() { + Y_FAIL("Unknown name"); + } + + template<> + TStringRef TSqlValue<EDataSlot::Json, TUtf8, true>::Name() { + return TStringRef::Of("SqlValueConvertToUtf8"); + } + + template <> + TStringRef TSqlValue<EDataSlot::Json, TUtf8>::Name() { + return TStringRef::Of("SqlValueUtf8"); + } + + template <> + TStringRef TSqlValue<EDataSlot::Json, i64>::Name() { + return TStringRef::Of("SqlValueInt64"); + } + + template <> + TStringRef TSqlValue<EDataSlot::Json, double>::Name() { + return TStringRef::Of("SqlValueNumber"); + } + + template <> + TStringRef TSqlValue<EDataSlot::Json, bool>::Name() { + return TStringRef::Of("SqlValueBool"); + } + + template<> + TStringRef TSqlValue<EDataSlot::JsonDocument, TUtf8, true>::Name() { + return TStringRef::Of("JsonDocumentSqlValueConvertToUtf8"); + } + + template <> + TStringRef TSqlValue<EDataSlot::JsonDocument, TUtf8>::Name() { + return TStringRef::Of("JsonDocumentSqlValueUtf8"); + } + + template <> + TStringRef TSqlValue<EDataSlot::JsonDocument, i64>::Name() { + return TStringRef::Of("JsonDocumentSqlValueInt64"); + } + + template <> + TStringRef TSqlValue<EDataSlot::JsonDocument, double>::Name() { + return TStringRef::Of("JsonDocumentSqlValueNumber"); + } + + template <> + TStringRef TSqlValue<EDataSlot::JsonDocument, bool>::Name() { + return TStringRef::Of("JsonDocumentSqlValueBool"); + } + } diff --git a/ydb/library/yql/udfs/common/json2/ya.make b/ydb/library/yql/udfs/common/json2/ya.make index 617d3c9d00..dda2070653 100644 --- a/ydb/library/yql/udfs/common/json2/ya.make +++ b/ydb/library/yql/udfs/common/json2/ya.make @@ -1,25 +1,25 @@ -YQL_UDF(json2_udf) - -YQL_ABI_VERSION( - 2 +YQL_UDF(json2_udf) + +YQL_ABI_VERSION( + 2 21 - 0 -) - + 0 +) + OWNER( g:kikimr g:yql g:yql_ydb_core ) - -SRCS( - json2_udf.cpp -) - -PEERDIR( + +SRCS( + json2_udf.cpp +) + +PEERDIR( ydb/library/binary_json ydb/library/yql/minikql/dom ydb/library/yql/minikql/jsonpath -) - -END() +) + +END() diff --git a/ydb/library/yql/udfs/common/yson2/yson2_udf.cpp b/ydb/library/yql/udfs/common/yson2/yson2_udf.cpp index 1278ec565c..193daa758d 100644 --- a/ydb/library/yql/udfs/common/yson2/yson2_udf.cpp +++ b/ydb/library/yql/udfs/common/yson2/yson2_udf.cpp @@ -7,13 +7,13 @@ #include <ydb/library/yql/minikql/dom/convert.h> #include <ydb/library/yql/public/udf/udf_helpers.h> #include <ydb/library/yql/public/udf/udf_type_printer.h> - + #include <library/cpp/yson_pull/exceptions.h> #include <util/string/split.h> using namespace NYql::NUdf; -using namespace NYql::NDom; +using namespace NYql::NDom; using namespace NYsonPull; namespace { @@ -96,109 +96,109 @@ public: }; using TConverterPtr = TUnboxedValuePod (*)(TUnboxedValuePod, const IValueBuilder*, const TSourcePosition& pos); - -template <TConverterPtr Converter> + +template <TConverterPtr Converter> class TLazyConveterT : public TManagedBoxedValue { -public: +public: TLazyConveterT(TUnboxedValue&& original, const IValueBuilder* valueBuilder, const TSourcePosition& pos) : Original(std::move(original)), ValueBuilder(valueBuilder), Pos_(pos) {} private: - template <bool NoSwap> - class TIterator: public TManagedBoxedValue { - public: + template <bool NoSwap> + class TIterator: public TManagedBoxedValue { + public: TIterator(TUnboxedValue&& original, const IValueBuilder* valueBuilder, const TSourcePosition& pos) : Original(std::move(original)), ValueBuilder(valueBuilder), Pos_(pos) - {} - - private: - bool Skip() final { - return Original.Skip(); - } - - bool Next(TUnboxedValue& value) final { - if (Original.Next(value)) { - if constexpr (!NoSwap) { + {} + + private: + bool Skip() final { + return Original.Skip(); + } + + bool Next(TUnboxedValue& value) final { + if (Original.Next(value)) { + if constexpr (!NoSwap) { value = Converter(value.Release(), ValueBuilder, Pos_); - } - return true; - } - return false; - } - - bool NextPair(TUnboxedValue& key, TUnboxedValue& payload) final { - if (Original.NextPair(key, payload)) { - if constexpr (NoSwap) { + } + return true; + } + return false; + } + + bool NextPair(TUnboxedValue& key, TUnboxedValue& payload) final { + if (Original.NextPair(key, payload)) { + if constexpr (NoSwap) { payload = Converter(payload.Release(), ValueBuilder, Pos_); - } else { + } else { key = Converter(key.Release(), ValueBuilder, Pos_); - } - return true; - } - return false; - } - - const TUnboxedValue Original; + } + return true; + } + return false; + } + + const TUnboxedValue Original; const IValueBuilder *const ValueBuilder; const TSourcePosition Pos_; - }; - - ui64 GetDictLength() const final { - return Original.GetDictLength(); - } - - ui64 GetListLength() const final { - return Original.GetListLength(); - } - - bool HasFastListLength() const final { - return Original.HasFastListLength(); - } - - bool HasDictItems() const final { - return Original.HasDictItems(); - } - - bool HasListItems() const final { - return Original.HasListItems(); - } - - TUnboxedValue GetListIterator() const final { + }; + + ui64 GetDictLength() const final { + return Original.GetDictLength(); + } + + ui64 GetListLength() const final { + return Original.GetListLength(); + } + + bool HasFastListLength() const final { + return Original.HasFastListLength(); + } + + bool HasDictItems() const final { + return Original.HasDictItems(); + } + + bool HasListItems() const final { + return Original.HasListItems(); + } + + TUnboxedValue GetListIterator() const final { return TUnboxedValuePod(new TIterator<false>(Original.GetListIterator(), ValueBuilder, Pos_)); - } - - TUnboxedValue GetDictIterator() const final { + } + + TUnboxedValue GetDictIterator() const final { return TUnboxedValuePod(new TIterator<true>(Original.GetDictIterator(), ValueBuilder, Pos_)); - } - - TUnboxedValue GetKeysIterator() const final { + } + + TUnboxedValue GetKeysIterator() const final { return TUnboxedValuePod(new TIterator<true>(Original.GetKeysIterator(), ValueBuilder, Pos_)); - } - + } + TUnboxedValue GetPayloadsIterator() const override { return TUnboxedValuePod(new TIterator<false>(Original.GetPayloadsIterator(), ValueBuilder, Pos_)); - } - - bool Contains(const TUnboxedValuePod& key) const final { - return Original.Contains(key); - } - - TUnboxedValue Lookup(const TUnboxedValuePod& key) const final { - if (auto lookup = Original.Lookup(key)) { + } + + bool Contains(const TUnboxedValuePod& key) const final { + return Original.Contains(key); + } + + TUnboxedValue Lookup(const TUnboxedValuePod& key) const final { + if (auto lookup = Original.Lookup(key)) { return Converter(lookup.Release().GetOptionalValue(), ValueBuilder, Pos_).MakeOptional(); - } - return {}; - } - - bool IsSortedDict() const final { - return Original.IsSortedDict(); - } - - const TUnboxedValue Original; + } + return {}; + } + + bool IsSortedDict() const final { + return Original.IsSortedDict(); + } + + const TUnboxedValue Original; const IValueBuilder *const ValueBuilder; const TSourcePosition Pos_; -}; - +}; + template<bool Strict, bool AutoConvert, TConverterPtr Converter = nullptr> TUnboxedValuePod ConvertToListImpl(TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { if (!x) { diff --git a/ydb/public/api/protos/ydb_value.proto b/ydb/public/api/protos/ydb_value.proto index d885d73f8e..d9aca4c878 100644 --- a/ydb/public/api/protos/ydb_value.proto +++ b/ydb/public/api/protos/ydb_value.proto @@ -76,8 +76,8 @@ message Type { YSON = 0x1201; JSON = 0x1202; UUID = 0x1203; - JSON_DOCUMENT = 0x1204; - DYNUMBER = 0x1302; + JSON_DOCUMENT = 0x1204; + DYNUMBER = 0x1302; } oneof type { diff --git a/ydb/public/lib/json_value/ydb_json_value.cpp b/ydb/public/lib/json_value/ydb_json_value.cpp index af126501a7..71d265cdcc 100644 --- a/ydb/public/lib/json_value/ydb_json_value.cpp +++ b/ydb/public/lib/json_value/ydb_json_value.cpp @@ -189,12 +189,12 @@ namespace NYdb { case EPrimitiveType::Json: Writer.WriteString(Parser.GetJson()); break; - case EPrimitiveType::JsonDocument: + case EPrimitiveType::JsonDocument: Writer.WriteString(Parser.GetJsonDocument()); - break; - case EPrimitiveType::DyNumber: + break; + case EPrimitiveType::DyNumber: Writer.WriteString(Parser.GetDyNumber()); - break; + break; default: ThrowFatalError(TStringBuilder() << "Unsupported primitive type: " << type); } diff --git a/ydb/public/lib/scheme_types/scheme_type_id.h b/ydb/public/lib/scheme_types/scheme_type_id.h index 22fada2a7d..ac76ee33c6 100644 --- a/ydb/public/lib/scheme_types/scheme_type_id.h +++ b/ydb/public/lib/scheme_types/scheme_type_id.h @@ -44,10 +44,10 @@ static constexpr TTypeId Utf8 = NYql::NProto::Utf8; static constexpr TTypeId Yson = NYql::NProto::Yson; static constexpr TTypeId Json = NYql::NProto::Json; -static constexpr TTypeId JsonDocument = NYql::NProto::JsonDocument; - -static constexpr TTypeId DyNumber = NYql::NProto::DyNumber; - +static constexpr TTypeId JsonDocument = NYql::NProto::JsonDocument; + +static constexpr TTypeId DyNumber = NYql::NProto::DyNumber; + static constexpr TTypeId Decimal = NYql::NProto::Decimal; static constexpr TTypeId YqlIds[] = { @@ -67,9 +67,9 @@ static constexpr TTypeId YqlIds[] = { Date, Datetime, Timestamp, - Interval, - JsonDocument, - DyNumber, + Interval, + JsonDocument, + DyNumber, }; // types must be defined in GetValueHash and CompareTypedCells @@ -114,9 +114,9 @@ const char *TypeName(TTypeId typeId) { case NTypeIds::Utf8: return "Utf8"; case NTypeIds::Yson: return "Yson"; case NTypeIds::Json: return "Json"; - case NTypeIds::JsonDocument: return "JsonDocument"; + case NTypeIds::JsonDocument: return "JsonDocument"; case NTypeIds::Decimal: return "Decimal"; - case NTypeIds::DyNumber: return "DyNumber"; + case NTypeIds::DyNumber: return "DyNumber"; default: return "Unknown"; } } diff --git a/ydb/public/lib/ydb_cli/commands/ydb_service_table.cpp b/ydb/public/lib/ydb_cli/commands/ydb_service_table.cpp index 2f94b2fc03..0451c346cc 100644 --- a/ydb/public/lib/ydb_cli/commands/ydb_service_table.cpp +++ b/ydb/public/lib/ydb_cli/commands/ydb_service_table.cpp @@ -105,9 +105,9 @@ namespace { {"Utf8", EPrimitiveType::Utf8}, {"Yson", EPrimitiveType::Yson}, {"Json", EPrimitiveType::Json}, - {"Uuid", EPrimitiveType::Uuid}, - {"JsonDocument", EPrimitiveType::JsonDocument}, - {"DyNumber", EPrimitiveType::DyNumber}, + {"Uuid", EPrimitiveType::Uuid}, + {"JsonDocument", EPrimitiveType::JsonDocument}, + {"DyNumber", EPrimitiveType::DyNumber}, }; TString GetAllTypesString() { diff --git a/ydb/public/lib/yson_value/ydb_yson_value.cpp b/ydb/public/lib/yson_value/ydb_yson_value.cpp index c855acf0a7..615d157dba 100644 --- a/ydb/public/lib/yson_value/ydb_yson_value.cpp +++ b/ydb/public/lib/yson_value/ydb_yson_value.cpp @@ -76,12 +76,12 @@ static void PrimitiveValueToYson(EPrimitiveType type, TValueParser& parser, NYso case EPrimitiveType::Json: writer.OnStringScalar(parser.GetJson()); break; - case EPrimitiveType::JsonDocument: - writer.OnStringScalar(parser.GetJsonDocument()); - break; - case EPrimitiveType::DyNumber: - writer.OnStringScalar(parser.GetDyNumber()); - break; + case EPrimitiveType::JsonDocument: + writer.OnStringScalar(parser.GetJsonDocument()); + break; + case EPrimitiveType::DyNumber: + writer.OnStringScalar(parser.GetDyNumber()); + break; default: ThrowFatalError(TStringBuilder() << "Unsupported primitive type: " << type); } diff --git a/ydb/public/sdk/cpp/client/ydb_value/value.cpp b/ydb/public/sdk/cpp/client/ydb_value/value.cpp index 8c18d4d50f..27b63b7408 100644 --- a/ydb/public/sdk/cpp/client/ydb_value/value.cpp +++ b/ydb/public/sdk/cpp/client/ydb_value/value.cpp @@ -1071,16 +1071,16 @@ public: return GetProto().text_value(); } - const TString& GetJsonDocument() const { - CheckPrimitive(NYdb::EPrimitiveType::JsonDocument); - return GetProto().text_value(); - } - - const TString& GetDyNumber() const { - CheckPrimitive(NYdb::EPrimitiveType::DyNumber); - return GetProto().text_value(); - } - + const TString& GetJsonDocument() const { + CheckPrimitive(NYdb::EPrimitiveType::JsonDocument); + return GetProto().text_value(); + } + + const TString& GetDyNumber() const { + CheckPrimitive(NYdb::EPrimitiveType::DyNumber); + return GetProto().text_value(); + } + TDecimalValue GetDecimal() const { CheckDecimal(); return TDecimalValue(GetProto(), TypeParser_.GetDecimal()); @@ -1415,8 +1415,8 @@ private: case NYdb::EPrimitiveType::Yson: return Ydb::Value::kBytesValue; case NYdb::EPrimitiveType::Json: - case NYdb::EPrimitiveType::JsonDocument: - case NYdb::EPrimitiveType::DyNumber: + case NYdb::EPrimitiveType::JsonDocument: + case NYdb::EPrimitiveType::DyNumber: return Ydb::Value::kTextValue; default: FatalError(TStringBuilder() << "Unexpected primitive type: " << primitiveTypeId); @@ -1547,14 +1547,14 @@ const TString& TValueParser::GetJson() const { return Impl_->GetJson(); } -const TString& TValueParser::GetJsonDocument() const { - return Impl_->GetJsonDocument(); -} - -const TString& TValueParser::GetDyNumber() const { - return Impl_->GetDyNumber(); -} - +const TString& TValueParser::GetJsonDocument() const { + return Impl_->GetJsonDocument(); +} + +const TString& TValueParser::GetDyNumber() const { + return Impl_->GetDyNumber(); +} + TDecimalValue TValueParser::GetDecimal() const { return Impl_->GetDecimal(); } @@ -1655,14 +1655,14 @@ TMaybe<TString> TValueParser::GetOptionalJson() const { RET_OPT_VALUE(TString, Json); } -TMaybe<TString> TValueParser::GetOptionalJsonDocument() const { - RET_OPT_VALUE(TString, JsonDocument); -} - -TMaybe<TString> TValueParser::GetOptionalDyNumber() const { - RET_OPT_VALUE(TString, DyNumber); -} - +TMaybe<TString> TValueParser::GetOptionalJsonDocument() const { + RET_OPT_VALUE(TString, JsonDocument); +} + +TMaybe<TString> TValueParser::GetOptionalDyNumber() const { + RET_OPT_VALUE(TString, DyNumber); +} + TMaybe<TDecimalValue> TValueParser::GetOptionalDecimal() const { RET_OPT_VALUE(TDecimalValue, Decimal); } @@ -1931,16 +1931,16 @@ public: GetValue().set_text_value(value); } - void JsonDocument(const TString& value) { - FillPrimitiveType(EPrimitiveType::JsonDocument); - GetValue().set_text_value(value); - } - - void DyNumber(const TString& value) { - FillPrimitiveType(EPrimitiveType::DyNumber); - GetValue().set_text_value(value); - } - + void JsonDocument(const TString& value) { + FillPrimitiveType(EPrimitiveType::JsonDocument); + GetValue().set_text_value(value); + } + + void DyNumber(const TString& value) { + FillPrimitiveType(EPrimitiveType::DyNumber); + GetValue().set_text_value(value); + } + void Decimal(const TDecimalValue& value) { FillDecimalType(value.DecimalType_); GetValue().set_low_128(value.Low_); @@ -2615,18 +2615,18 @@ TDerived& TValueBuilderBase<TDerived>::Json(const TString& value) { } template<typename TDerived> -TDerived& TValueBuilderBase<TDerived>::JsonDocument(const TString& value) { - Impl_->JsonDocument(value); - return static_cast<TDerived&>(*this); -} - -template<typename TDerived> -TDerived& TValueBuilderBase<TDerived>::DyNumber(const TString& value) { - Impl_->DyNumber(value); - return static_cast<TDerived&>(*this); -} - -template<typename TDerived> +TDerived& TValueBuilderBase<TDerived>::JsonDocument(const TString& value) { + Impl_->JsonDocument(value); + return static_cast<TDerived&>(*this); +} + +template<typename TDerived> +TDerived& TValueBuilderBase<TDerived>::DyNumber(const TString& value) { + Impl_->DyNumber(value); + return static_cast<TDerived&>(*this); +} + +template<typename TDerived> TDerived& TValueBuilderBase<TDerived>::Decimal(const TDecimalValue& value) { Impl_->Decimal(value); return static_cast<TDerived&>(*this); @@ -2758,17 +2758,17 @@ TDerived& TValueBuilderBase<TDerived>::OptionalJson(const TMaybe<TString>& value SET_OPT_VALUE_MAYBE(Json); } -template<typename TDerived> -TDerived& TValueBuilderBase<TDerived>::OptionalJsonDocument(const TMaybe<TString>& value) { - SET_OPT_VALUE_MAYBE(JsonDocument); -} - -template<typename TDerived> -TDerived& TValueBuilderBase<TDerived>::OptionalDyNumber(const TMaybe<TString>& value) { - SET_OPT_VALUE_MAYBE(DyNumber); -} - +template<typename TDerived> +TDerived& TValueBuilderBase<TDerived>::OptionalJsonDocument(const TMaybe<TString>& value) { + SET_OPT_VALUE_MAYBE(JsonDocument); +} +template<typename TDerived> +TDerived& TValueBuilderBase<TDerived>::OptionalDyNumber(const TMaybe<TString>& value) { + SET_OPT_VALUE_MAYBE(DyNumber); +} + + template<typename TDerived> TDerived& TValueBuilderBase<TDerived>::BeginOptional() { Impl_->BeginOptional(); diff --git a/ydb/public/sdk/cpp/client/ydb_value/value.h b/ydb/public/sdk/cpp/client/ydb_value/value.h index 4a163b9220..3c84b6c618 100644 --- a/ydb/public/sdk/cpp/client/ydb_value/value.h +++ b/ydb/public/sdk/cpp/client/ydb_value/value.h @@ -33,31 +33,31 @@ private: }; enum class EPrimitiveType { - Bool = 0x0006, - Int8 = 0x0007, - Uint8 = 0x0005, - Int16 = 0x0008, - Uint16 = 0x0009, - Int32 = 0x0001, - Uint32 = 0x0002, - Int64 = 0x0003, - Uint64 = 0x0004, - Float = 0x0021, - Double = 0x0020, - Date = 0x0030, - Datetime = 0x0031, - Timestamp = 0x0032, - Interval = 0x0033, - TzDate = 0x0034, - TzDatetime = 0x0035, - TzTimestamp = 0x0036, - String = 0x1001, - Utf8 = 0x1200, - Yson = 0x1201, - Json = 0x1202, - Uuid = 0x1203, - JsonDocument = 0x1204, - DyNumber = 0x1302, + Bool = 0x0006, + Int8 = 0x0007, + Uint8 = 0x0005, + Int16 = 0x0008, + Uint16 = 0x0009, + Int32 = 0x0001, + Uint32 = 0x0002, + Int64 = 0x0003, + Uint64 = 0x0004, + Float = 0x0021, + Double = 0x0020, + Date = 0x0030, + Datetime = 0x0031, + Timestamp = 0x0032, + Interval = 0x0033, + TzDate = 0x0034, + TzDatetime = 0x0035, + TzTimestamp = 0x0036, + String = 0x1001, + Utf8 = 0x1200, + Yson = 0x1201, + Json = 0x1202, + Uuid = 0x1203, + JsonDocument = 0x1204, + DyNumber = 0x1302, }; struct TDecimalType { @@ -257,8 +257,8 @@ public: const TString& GetYson() const; const TString& GetJson() const; TDecimalValue GetDecimal() const; - const TString& GetJsonDocument() const; - const TString& GetDyNumber() const; + const TString& GetJsonDocument() const; + const TString& GetDyNumber() const; TMaybe<bool> GetOptionalBool() const; TMaybe<i8> GetOptionalInt8() const; @@ -283,8 +283,8 @@ public: TMaybe<TString> GetOptionalYson() const; TMaybe<TString> GetOptionalJson() const; TMaybe<TDecimalValue> GetOptionalDecimal() const; - TMaybe<TString> GetOptionalJsonDocument() const; - TMaybe<TString> GetOptionalDyNumber() const; + TMaybe<TString> GetOptionalJsonDocument() const; + TMaybe<TString> GetOptionalDyNumber() const; // Optional void OpenOptional(); @@ -360,8 +360,8 @@ public: TDerived& Yson(const TString& value); TDerived& Json(const TString& value); TDerived& Decimal(const TDecimalValue& value); - TDerived& JsonDocument(const TString& value); - TDerived& DyNumber(const TString& value); + TDerived& JsonDocument(const TString& value); + TDerived& DyNumber(const TString& value); TDerived& OptionalBool(const TMaybe<bool>& value); TDerived& OptionalInt8(const TMaybe<i8>& value); @@ -385,8 +385,8 @@ public: TDerived& OptionalUtf8(const TMaybe<TString>& value); TDerived& OptionalYson(const TMaybe<TString>& value); TDerived& OptionalJson(const TMaybe<TString>& value); - TDerived& OptionalJsonDocument(const TMaybe<TString>& value); - TDerived& OptionalDyNumber(const TMaybe<TString>& value); + TDerived& OptionalJsonDocument(const TMaybe<TString>& value); + TDerived& OptionalDyNumber(const TMaybe<TString>& value); // Optional TDerived& BeginOptional(); diff --git a/ydb/public/sdk/python/ydb/types.py b/ydb/public/sdk/python/ydb/types.py index 6ae09a5b42..106b732c99 100644 --- a/ydb/public/sdk/python/ydb/types.py +++ b/ydb/public/sdk/python/ydb/types.py @@ -96,7 +96,7 @@ class PrimitiveType(enum.Enum): Interval = _apis.primitive_types.INTERVAL, "int64_value" DyNumber = _apis.primitive_types.DYNUMBER, "text_value", _from_bytes - + def __init__(self, idn, proto_field, to_obj=None, from_obj=None): self._idn_ = idn self._to_obj = to_obj diff --git a/ydb/tests/functional/canonical/canondata/result.json b/ydb/tests/functional/canonical/canondata/result.json index d927ff0f90..ac438e8225 100644 --- a/ydb/tests/functional/canonical/canondata/result.json +++ b/ydb/tests/functional/canonical/canondata/result.json @@ -19,78 +19,78 @@ "uri": "file://test_sql.TestCanonicalFolder1.test_case_dt.sql-result_sets_/dt.sql.results" } }, - "test_sql.TestCanonicalFolder1.test_case[dynumber/insert_from_table.sql-plan]": { - "plan": { + "test_sql.TestCanonicalFolder1.test_case[dynumber/insert_from_table.sql-plan]": { + "plan": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_dynumber_insert_from_table.sql-plan_/dynumber_insert_from_table.sql.plan" - } - }, - "test_sql.TestCanonicalFolder1.test_case[dynumber/insert_from_table.sql-result_sets]": { - "result_sets": { + } + }, + "test_sql.TestCanonicalFolder1.test_case[dynumber/insert_from_table.sql-result_sets]": { + "result_sets": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_dynumber_insert_from_table.sql-result_sets_/dynumber_insert_from_table.sql.results" - }, - "table_data_ResultTableDyNumber": { + }, + "table_data_ResultTableDyNumber": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_dynumber_insert_from_table.sql-result_sets_/dynumber_insert_from_table.sql_ResultTableDyNumber.results" - } - }, - "test_sql.TestCanonicalFolder1.test_case[dynumber/insert_literal.sql-plan]": { - "plan": { + } + }, + "test_sql.TestCanonicalFolder1.test_case[dynumber/insert_literal.sql-plan]": { + "plan": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_dynumber_insert_literal.sql-plan_/dynumber_insert_literal.sql.plan" - } - }, - "test_sql.TestCanonicalFolder1.test_case[dynumber/insert_literal.sql-result_sets]": { - "result_sets": { + } + }, + "test_sql.TestCanonicalFolder1.test_case[dynumber/insert_literal.sql-result_sets]": { + "result_sets": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_dynumber_insert_literal.sql-result_sets_/dynumber_insert_literal.sql.results" - }, - "table_data_ResultLiteralDyNumber": { + }, + "table_data_ResultLiteralDyNumber": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_dynumber_insert_literal.sql-result_sets_/dynumber_insert_literal.sql_ResultLiteralDyNumber.results" - } - }, - "test_sql.TestCanonicalFolder1.test_case[dynumber/insert_params.sql-plan]": { - "plan": { + } + }, + "test_sql.TestCanonicalFolder1.test_case[dynumber/insert_params.sql-plan]": { + "plan": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_dynumber_insert_params.sql-plan_/dynumber_insert_params.sql.plan" - } - }, - "test_sql.TestCanonicalFolder1.test_case[dynumber/insert_params.sql-result_sets]": { - "result_sets": { + } + }, + "test_sql.TestCanonicalFolder1.test_case[dynumber/insert_params.sql-result_sets]": { + "result_sets": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_dynumber_insert_params.sql-result_sets_/dynumber_insert_params.sql.results" - }, - "table_data_ResultParamsDyNumber": { + }, + "table_data_ResultParamsDyNumber": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_dynumber_insert_params.sql-result_sets_/dynumber_insert_params.sql_ResultParamsDyNumber.results" - } - }, - "test_sql.TestCanonicalFolder1.test_case[dynumber/select_from_table.sql-plan]": { - "plan": { + } + }, + "test_sql.TestCanonicalFolder1.test_case[dynumber/select_from_table.sql-plan]": { + "plan": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_dynumber_select_from_table.sql-plan_/dynumber_select_from_table.sql.plan" - } - }, - "test_sql.TestCanonicalFolder1.test_case[dynumber/select_from_table.sql-result_sets]": { - "result_sets": { + } + }, + "test_sql.TestCanonicalFolder1.test_case[dynumber/select_from_table.sql-result_sets]": { + "result_sets": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_dynumber_select_from_table.sql-result_sets_/dynumber_select_from_table.sql.results" - } - }, - "test_sql.TestCanonicalFolder1.test_case[dynumber/select_literal.sql-plan]": { - "plan": { + } + }, + "test_sql.TestCanonicalFolder1.test_case[dynumber/select_literal.sql-plan]": { + "plan": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_dynumber_select_literal.sql-plan_/dynumber_select_literal.sql.plan" - } - }, - "test_sql.TestCanonicalFolder1.test_case[dynumber/select_literal.sql-result_sets]": { - "result_sets": { + } + }, + "test_sql.TestCanonicalFolder1.test_case[dynumber/select_literal.sql-result_sets]": { + "result_sets": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_dynumber_select_literal.sql-result_sets_/dynumber_select_literal.sql.results" - } - }, - "test_sql.TestCanonicalFolder1.test_case[dynumber/select_params.sql-plan]": { - "plan": { + } + }, + "test_sql.TestCanonicalFolder1.test_case[dynumber/select_params.sql-plan]": { + "plan": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_dynumber_select_params.sql-plan_/dynumber_select_params.sql.plan" - } - }, - "test_sql.TestCanonicalFolder1.test_case[dynumber/select_params.sql-result_sets]": { - "result_sets": { + } + }, + "test_sql.TestCanonicalFolder1.test_case[dynumber/select_params.sql-result_sets]": { + "result_sets": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_dynumber_select_params.sql-result_sets_/dynumber_select_params.sql.results" }, "result_sets_scan_query": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_dynumber_select_params.sql-result_sets_/dynumber_select_params.sql_scan_query.results" - } - }, + } + }, "test_sql.TestCanonicalFolder1.test_case[explain.script-script]": { "script": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_explain.script-script_/explain.script.results" @@ -378,87 +378,87 @@ } }, "test_sql.TestCanonicalFolder1.test_case[json/insert_from_table.sql-plan]": { - "plan": { + "plan": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_json_insert_from_table.sql-plan_/json_insert_from_table.sql.plan" - } - }, + } + }, "test_sql.TestCanonicalFolder1.test_case[json/insert_from_table.sql-result_sets]": { - "result_sets": { + "result_sets": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_json_insert_from_table.sql-result_sets_/json_insert_from_table.sql.results" - }, - "table_data_ResultTableJD": { + }, + "table_data_ResultTableJD": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_json_insert_from_table.sql-result_sets_/json_insert_from_table.sql_ResultTableJD.results" - } - }, + } + }, "test_sql.TestCanonicalFolder1.test_case[json/insert_literal.sql-plan]": { - "plan": { + "plan": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_json_insert_literal.sql-plan_/json_insert_literal.sql.plan" - } - }, + } + }, "test_sql.TestCanonicalFolder1.test_case[json/insert_literal.sql-result_sets]": { - "result_sets": { + "result_sets": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_json_insert_literal.sql-result_sets_/json_insert_literal.sql.results" - }, - "table_data_ResultLiteralJD": { + }, + "table_data_ResultLiteralJD": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_json_insert_literal.sql-result_sets_/json_insert_literal.sql_ResultLiteralJD.results" - } - }, + } + }, "test_sql.TestCanonicalFolder1.test_case[json/insert_params.sql-plan]": { - "plan": { + "plan": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_json_insert_params.sql-plan_/json_insert_params.sql.plan" - } - }, + } + }, "test_sql.TestCanonicalFolder1.test_case[json/insert_params.sql-result_sets]": { - "result_sets": { + "result_sets": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_json_insert_params.sql-result_sets_/json_insert_params.sql.results" - }, - "table_data_ResultParamsJD": { + }, + "table_data_ResultParamsJD": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_json_insert_params.sql-result_sets_/json_insert_params.sql_ResultParamsJD.results" - } - }, + } + }, "test_sql.TestCanonicalFolder1.test_case[json/json_query.sql-plan]": { - "plan": { + "plan": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_json_json_query.sql-plan_/json_json_query.sql.plan" - } - }, + } + }, "test_sql.TestCanonicalFolder1.test_case[json/json_query.sql-result_sets]": { - "result_sets": { + "result_sets": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_json_json_query.sql-result_sets_/json_json_query.sql.results" - } - }, + } + }, "test_sql.TestCanonicalFolder1.test_case[json/select_from_table.sql-plan]": { - "plan": { + "plan": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_json_select_from_table.sql-plan_/json_select_from_table.sql.plan" - } - }, + } + }, "test_sql.TestCanonicalFolder1.test_case[json/select_from_table.sql-result_sets]": { - "result_sets": { + "result_sets": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_json_select_from_table.sql-result_sets_/json_select_from_table.sql.results" - } - }, + } + }, "test_sql.TestCanonicalFolder1.test_case[json/select_literal.sql-plan]": { - "plan": { + "plan": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_json_select_literal.sql-plan_/json_select_literal.sql.plan" - } - }, + } + }, "test_sql.TestCanonicalFolder1.test_case[json/select_literal.sql-result_sets]": { - "result_sets": { + "result_sets": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_json_select_literal.sql-result_sets_/json_select_literal.sql.results" - } - }, + } + }, "test_sql.TestCanonicalFolder1.test_case[json/select_params.sql-plan]": { - "plan": { + "plan": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_json_select_params.sql-plan_/json_select_params.sql.plan" - } - }, + } + }, "test_sql.TestCanonicalFolder1.test_case[json/select_params.sql-result_sets]": { - "result_sets": { + "result_sets": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_json_select_params.sql-result_sets_/json_select_params.sql.results" }, "result_sets_scan_query": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_json_select_params.sql-result_sets_/json_select_params.sql_scan_query.results" - } - }, + } + }, "test_sql.TestCanonicalFolder1.test_case[order_by/order_by_pk.sql-plan]": { "plan": { "uri": "file://test_sql.TestCanonicalFolder1.test_case_order_by_order_by_pk.sql-plan_/order_by_order_by_pk.sql.plan" diff --git a/ydb/tests/functional/canonical/sql/InputDyNumber.data b/ydb/tests/functional/canonical/sql/InputDyNumber.data index 91164a35c9..cd270abc13 100644 --- a/ydb/tests/functional/canonical/sql/InputDyNumber.data +++ b/ydb/tests/functional/canonical/sql/InputDyNumber.data @@ -1,5 +1,5 @@ -[ -{"Key": "1.23", "Value": "{\"name\": \"Jake\", \"age\": 50}"}, -{"Key": "1.32", "Value": "{\"name\": \"Jessica\", \"age\": 35}"}, -{"Key": "1.5", "Value": "{\"name\": \"Tom\", \"age\": 3}"} -] +[ +{"Key": "1.23", "Value": "{\"name\": \"Jake\", \"age\": 50}"}, +{"Key": "1.32", "Value": "{\"name\": \"Jessica\", \"age\": 35}"}, +{"Key": "1.5", "Value": "{\"name\": \"Tom\", \"age\": 3}"} +] diff --git a/ydb/tests/functional/canonical/sql/InputJD.data b/ydb/tests/functional/canonical/sql/InputJD.data index 1af79a3094..7e0a485521 100644 --- a/ydb/tests/functional/canonical/sql/InputJD.data +++ b/ydb/tests/functional/canonical/sql/InputJD.data @@ -1,5 +1,5 @@ -[ -{"Key": 1, "Value": "{\"name\": \"Jake\", \"age\": 50}"}, -{"Key": 2, "Value": "{\"name\": \"Jessica\", \"age\": 35}"}, -{"Key": 3, "Value": "{\"name\": \"Tom\", \"age\": 3}"} -] +[ +{"Key": 1, "Value": "{\"name\": \"Jake\", \"age\": 50}"}, +{"Key": 2, "Value": "{\"name\": \"Jessica\", \"age\": 35}"}, +{"Key": 3, "Value": "{\"name\": \"Tom\", \"age\": 3}"} +] diff --git a/ydb/tests/functional/canonical/sql/dynumber/create_insert_from_table.scheme b/ydb/tests/functional/canonical/sql/dynumber/create_insert_from_table.scheme index 76206ff7e6..bd1355ac00 100644 --- a/ydb/tests/functional/canonical/sql/dynumber/create_insert_from_table.scheme +++ b/ydb/tests/functional/canonical/sql/dynumber/create_insert_from_table.scheme @@ -1,6 +1,6 @@ ---!syntax_v1 -CREATE TABLE ResultTableDyNumber ( - Key DyNumber, - Value Json, - PRIMARY KEY (Key) -); +--!syntax_v1 +CREATE TABLE ResultTableDyNumber ( + Key DyNumber, + Value Json, + PRIMARY KEY (Key) +); diff --git a/ydb/tests/functional/canonical/sql/dynumber/create_insert_literal.scheme b/ydb/tests/functional/canonical/sql/dynumber/create_insert_literal.scheme index b7fc5b2ffa..50ca13eb38 100644 --- a/ydb/tests/functional/canonical/sql/dynumber/create_insert_literal.scheme +++ b/ydb/tests/functional/canonical/sql/dynumber/create_insert_literal.scheme @@ -1,6 +1,6 @@ ---!syntax_v1 -CREATE TABLE ResultLiteralDyNumber ( - Key DyNumber, - Value Json, - PRIMARY KEY (Key) -); +--!syntax_v1 +CREATE TABLE ResultLiteralDyNumber ( + Key DyNumber, + Value Json, + PRIMARY KEY (Key) +); diff --git a/ydb/tests/functional/canonical/sql/dynumber/create_insert_params_table.scheme b/ydb/tests/functional/canonical/sql/dynumber/create_insert_params_table.scheme index 60433e461c..80b960f7e9 100644 --- a/ydb/tests/functional/canonical/sql/dynumber/create_insert_params_table.scheme +++ b/ydb/tests/functional/canonical/sql/dynumber/create_insert_params_table.scheme @@ -1,6 +1,6 @@ ---!syntax_v1 -CREATE TABLE ResultParamsDyNumber ( - Key DyNumber, - Value Json, - PRIMARY KEY (Key) -); +--!syntax_v1 +CREATE TABLE ResultParamsDyNumber ( + Key DyNumber, + Value Json, + PRIMARY KEY (Key) +); diff --git a/ydb/tests/functional/canonical/sql/dynumber/insert_from_table.sql b/ydb/tests/functional/canonical/sql/dynumber/insert_from_table.sql index 1a1647f211..eb039244b7 100644 --- a/ydb/tests/functional/canonical/sql/dynumber/insert_from_table.sql +++ b/ydb/tests/functional/canonical/sql/dynumber/insert_from_table.sql @@ -1,4 +1,4 @@ ---!syntax_v1 - -INSERT INTO ResultTableDyNumber -SELECT Key, Value FROM InputDyNumber; +--!syntax_v1 + +INSERT INTO ResultTableDyNumber +SELECT Key, Value FROM InputDyNumber; diff --git a/ydb/tests/functional/canonical/sql/dynumber/insert_literal.sql b/ydb/tests/functional/canonical/sql/dynumber/insert_literal.sql index 2b6164f0a0..85603a4013 100644 --- a/ydb/tests/functional/canonical/sql/dynumber/insert_literal.sql +++ b/ydb/tests/functional/canonical/sql/dynumber/insert_literal.sql @@ -1,6 +1,6 @@ ---!syntax_v1 - -INSERT INTO ResultLiteralDyNumber (Key, Value) -VALUES - (DyNumber("1.23"), Json(@@{"name": "George", "age": 23}@@)), - (DyNumber("4.56"), Json(@@{"name": "Alex", "age": 65}@@)); +--!syntax_v1 + +INSERT INTO ResultLiteralDyNumber (Key, Value) +VALUES + (DyNumber("1.23"), Json(@@{"name": "George", "age": 23}@@)), + (DyNumber("4.56"), Json(@@{"name": "Alex", "age": 65}@@)); diff --git a/ydb/tests/functional/canonical/sql/dynumber/insert_params.sql b/ydb/tests/functional/canonical/sql/dynumber/insert_params.sql index 5d01b293ce..c4a743b38c 100644 --- a/ydb/tests/functional/canonical/sql/dynumber/insert_params.sql +++ b/ydb/tests/functional/canonical/sql/dynumber/insert_params.sql @@ -1,6 +1,6 @@ ---!syntax_v1 - -DECLARE $data AS List<Struct<Key: DyNumber, Value: Json>>; - -INSERT INTO ResultParamsDyNumber -SELECT Key, Value FROM AS_TABLE($data); +--!syntax_v1 + +DECLARE $data AS List<Struct<Key: DyNumber, Value: Json>>; + +INSERT INTO ResultParamsDyNumber +SELECT Key, Value FROM AS_TABLE($data); diff --git a/ydb/tests/functional/canonical/sql/dynumber/select_from_table.sql b/ydb/tests/functional/canonical/sql/dynumber/select_from_table.sql index c8fbb47960..6ba9423717 100644 --- a/ydb/tests/functional/canonical/sql/dynumber/select_from_table.sql +++ b/ydb/tests/functional/canonical/sql/dynumber/select_from_table.sql @@ -1,3 +1,3 @@ ---!syntax_v1 - -SELECT * FROM InputDyNumber; +--!syntax_v1 + +SELECT * FROM InputDyNumber; diff --git a/ydb/tests/functional/canonical/sql/dynumber/select_literal.sql b/ydb/tests/functional/canonical/sql/dynumber/select_literal.sql index fa355bc8b5..cc4fa1fce3 100644 --- a/ydb/tests/functional/canonical/sql/dynumber/select_literal.sql +++ b/ydb/tests/functional/canonical/sql/dynumber/select_literal.sql @@ -1,3 +1,3 @@ ---!syntax_v1 - -SELECT DyNumber("123.456");
\ No newline at end of file +--!syntax_v1 + +SELECT DyNumber("123.456");
\ No newline at end of file diff --git a/ydb/tests/functional/canonical/sql/dynumber/select_params.sql b/ydb/tests/functional/canonical/sql/dynumber/select_params.sql index e55b582b0f..194ff85ec9 100644 --- a/ydb/tests/functional/canonical/sql/dynumber/select_params.sql +++ b/ydb/tests/functional/canonical/sql/dynumber/select_params.sql @@ -1,6 +1,6 @@ ---!syntax_v1 - -DECLARE $param1 AS DyNumber; -DECLARE $param2 AS DyNumber; - +--!syntax_v1 + +DECLARE $param1 AS DyNumber; +DECLARE $param2 AS DyNumber; + SELECT $param1, $param2; diff --git a/ydb/tests/functional/canonical/sql/dynumber/test_config.json b/ydb/tests/functional/canonical/sql/dynumber/test_config.json index 0d07f8811c..39b457da16 100644 --- a/ydb/tests/functional/canonical/sql/dynumber/test_config.json +++ b/ydb/tests/functional/canonical/sql/dynumber/test_config.json @@ -1,25 +1,25 @@ -{ - "insert_from_table.sql": { - "compare_tables": ["ResultTableDyNumber"] - }, - "insert_literal.sql": { - "compare_tables": ["ResultLiteralDyNumber"] - }, - "insert_params.sql": { - "compare_tables": ["ResultParamsDyNumber"], - "parameters": { - "$data": [ - {"Key": "1.23", "Value": "{\"name\": \"Jake\", \"age\": 50}"}, - {"Key": "1.32", "Value": "{\"name\": \"Jessica\", \"age\": 35}"}, - {"Key": "1.5", "Value": "{\"name\": \"Tom\", \"age\": 3}"} - ] - } - }, - "select_params.sql": { +{ + "insert_from_table.sql": { + "compare_tables": ["ResultTableDyNumber"] + }, + "insert_literal.sql": { + "compare_tables": ["ResultLiteralDyNumber"] + }, + "insert_params.sql": { + "compare_tables": ["ResultParamsDyNumber"], + "parameters": { + "$data": [ + {"Key": "1.23", "Value": "{\"name\": \"Jake\", \"age\": 50}"}, + {"Key": "1.32", "Value": "{\"name\": \"Jessica\", \"age\": 35}"}, + {"Key": "1.5", "Value": "{\"name\": \"Tom\", \"age\": 3}"} + ] + } + }, + "select_params.sql": { "check_scan_query": true, - "parameters": { - "$param1": "-345.678", - "$param2": "0.00001" - } - } -} + "parameters": { + "$param1": "-345.678", + "$param2": "0.00001" + } + } +} diff --git a/ydb/tests/functional/canonical/sql/input_dynumber.table b/ydb/tests/functional/canonical/sql/input_dynumber.table index 5d83b2ea8e..4ca8a31bc2 100644 --- a/ydb/tests/functional/canonical/sql/input_dynumber.table +++ b/ydb/tests/functional/canonical/sql/input_dynumber.table @@ -1,10 +1,10 @@ -path: "/local/InputDyNumber" -columns { - name: "Key" - type { optional_type { item { type_id: DYNUMBER } } } -} -columns { - name: "Value" - type { optional_type { item { type_id: JSON } } } -} -primary_key: "Key" +path: "/local/InputDyNumber" +columns { + name: "Key" + type { optional_type { item { type_id: DYNUMBER } } } +} +columns { + name: "Value" + type { optional_type { item { type_id: JSON } } } +} +primary_key: "Key" diff --git a/ydb/tests/functional/canonical/sql/input_jd.table b/ydb/tests/functional/canonical/sql/input_jd.table index 81dddcad5e..d390b3127d 100644 --- a/ydb/tests/functional/canonical/sql/input_jd.table +++ b/ydb/tests/functional/canonical/sql/input_jd.table @@ -1,10 +1,10 @@ -path: "/local/InputJD" -columns { - name: "Key" - type { optional_type { item { type_id: INT32 } } } -} -columns { - name: "Value" - type { optional_type { item { type_id: JSON_DOCUMENT } } } -} -primary_key: "Key" +path: "/local/InputJD" +columns { + name: "Key" + type { optional_type { item { type_id: INT32 } } } +} +columns { + name: "Value" + type { optional_type { item { type_id: JSON_DOCUMENT } } } +} +primary_key: "Key" diff --git a/ydb/tests/functional/canonical/sql/json/create_insert_from_table.scheme b/ydb/tests/functional/canonical/sql/json/create_insert_from_table.scheme index 542267a493..62c194d990 100644 --- a/ydb/tests/functional/canonical/sql/json/create_insert_from_table.scheme +++ b/ydb/tests/functional/canonical/sql/json/create_insert_from_table.scheme @@ -1,6 +1,6 @@ ---!syntax_v1 -CREATE TABLE ResultTableJD ( - Key Int32, - Value JsonDocument, - PRIMARY KEY (Key) -); +--!syntax_v1 +CREATE TABLE ResultTableJD ( + Key Int32, + Value JsonDocument, + PRIMARY KEY (Key) +); diff --git a/ydb/tests/functional/canonical/sql/json/create_insert_literal.scheme b/ydb/tests/functional/canonical/sql/json/create_insert_literal.scheme index f4351b67c8..d70b223d59 100644 --- a/ydb/tests/functional/canonical/sql/json/create_insert_literal.scheme +++ b/ydb/tests/functional/canonical/sql/json/create_insert_literal.scheme @@ -1,6 +1,6 @@ ---!syntax_v1 -CREATE TABLE ResultLiteralJD ( - Key Int32, - Value JsonDocument, - PRIMARY KEY (Key) -); +--!syntax_v1 +CREATE TABLE ResultLiteralJD ( + Key Int32, + Value JsonDocument, + PRIMARY KEY (Key) +); diff --git a/ydb/tests/functional/canonical/sql/json/create_insert_params_table.scheme b/ydb/tests/functional/canonical/sql/json/create_insert_params_table.scheme index fa8822afcb..7a7533b1fe 100644 --- a/ydb/tests/functional/canonical/sql/json/create_insert_params_table.scheme +++ b/ydb/tests/functional/canonical/sql/json/create_insert_params_table.scheme @@ -1,6 +1,6 @@ ---!syntax_v1 -CREATE TABLE ResultParamsJD ( - Key Int32, - Value JsonDocument, - PRIMARY KEY (Key) -); +--!syntax_v1 +CREATE TABLE ResultParamsJD ( + Key Int32, + Value JsonDocument, + PRIMARY KEY (Key) +); diff --git a/ydb/tests/functional/canonical/sql/json/insert_from_table.sql b/ydb/tests/functional/canonical/sql/json/insert_from_table.sql index 86b136225e..e26c367df5 100644 --- a/ydb/tests/functional/canonical/sql/json/insert_from_table.sql +++ b/ydb/tests/functional/canonical/sql/json/insert_from_table.sql @@ -1,4 +1,4 @@ ---!syntax_v1 - -INSERT INTO ResultTableJD -SELECT Key, Value FROM InputJD; +--!syntax_v1 + +INSERT INTO ResultTableJD +SELECT Key, Value FROM InputJD; diff --git a/ydb/tests/functional/canonical/sql/json/insert_literal.sql b/ydb/tests/functional/canonical/sql/json/insert_literal.sql index de091779b3..f7a9511036 100644 --- a/ydb/tests/functional/canonical/sql/json/insert_literal.sql +++ b/ydb/tests/functional/canonical/sql/json/insert_literal.sql @@ -1,6 +1,6 @@ ---!syntax_v1 - -INSERT INTO ResultLiteralJD (Key, Value) -VALUES - (4, JsonDocument(@@{"name": "George", "age": 23}@@)), - (5, JsonDocument(@@{"name": "Alex", "age": 65}@@)); +--!syntax_v1 + +INSERT INTO ResultLiteralJD (Key, Value) +VALUES + (4, JsonDocument(@@{"name": "George", "age": 23}@@)), + (5, JsonDocument(@@{"name": "Alex", "age": 65}@@)); diff --git a/ydb/tests/functional/canonical/sql/json/insert_params.sql b/ydb/tests/functional/canonical/sql/json/insert_params.sql index 3492e5adfe..3b413b514d 100644 --- a/ydb/tests/functional/canonical/sql/json/insert_params.sql +++ b/ydb/tests/functional/canonical/sql/json/insert_params.sql @@ -1,6 +1,6 @@ ---!syntax_v1 - -DECLARE $data AS List<Struct<Key: Int32, Value: JsonDocument>>; - -INSERT INTO ResultParamsJD -SELECT Key, Value FROM AS_TABLE($data); +--!syntax_v1 + +DECLARE $data AS List<Struct<Key: Int32, Value: JsonDocument>>; + +INSERT INTO ResultParamsJD +SELECT Key, Value FROM AS_TABLE($data); diff --git a/ydb/tests/functional/canonical/sql/json/json_query.sql b/ydb/tests/functional/canonical/sql/json/json_query.sql index a4fed0b2de..1ba91e8f80 100644 --- a/ydb/tests/functional/canonical/sql/json/json_query.sql +++ b/ydb/tests/functional/canonical/sql/json/json_query.sql @@ -1,12 +1,12 @@ ---!syntax_v1 - -$from_jd = JSON_QUERY(JsonDocument("[1, 2, 3, 4]"), "$"); -$from_json = JSON_QUERY(Json(@@{ - "key": "value" -}@@), "$"); - -SELECT - FormatType(TypeOf($from_jd)), - $from_jd, - FormatType(TypeOf($from_json)), - $from_json;
\ No newline at end of file +--!syntax_v1 + +$from_jd = JSON_QUERY(JsonDocument("[1, 2, 3, 4]"), "$"); +$from_json = JSON_QUERY(Json(@@{ + "key": "value" +}@@), "$"); + +SELECT + FormatType(TypeOf($from_jd)), + $from_jd, + FormatType(TypeOf($from_json)), + $from_json;
\ No newline at end of file diff --git a/ydb/tests/functional/canonical/sql/json/select_from_table.sql b/ydb/tests/functional/canonical/sql/json/select_from_table.sql index 0981186e64..95013e51d5 100644 --- a/ydb/tests/functional/canonical/sql/json/select_from_table.sql +++ b/ydb/tests/functional/canonical/sql/json/select_from_table.sql @@ -1,3 +1,3 @@ ---!syntax_v1 - -SELECT * FROM InputJD; +--!syntax_v1 + +SELECT * FROM InputJD; diff --git a/ydb/tests/functional/canonical/sql/json/select_literal.sql b/ydb/tests/functional/canonical/sql/json/select_literal.sql index 4feb342464..ece8bb9396 100644 --- a/ydb/tests/functional/canonical/sql/json/select_literal.sql +++ b/ydb/tests/functional/canonical/sql/json/select_literal.sql @@ -1,3 +1,3 @@ ---!syntax_v1 - -SELECT JsonDocument(@@{"name": "George", "age": 23}@@);
\ No newline at end of file +--!syntax_v1 + +SELECT JsonDocument(@@{"name": "George", "age": 23}@@);
\ No newline at end of file diff --git a/ydb/tests/functional/canonical/sql/json/select_params.sql b/ydb/tests/functional/canonical/sql/json/select_params.sql index 5b4a323f6c..b6ad7e9855 100644 --- a/ydb/tests/functional/canonical/sql/json/select_params.sql +++ b/ydb/tests/functional/canonical/sql/json/select_params.sql @@ -1,6 +1,6 @@ ---!syntax_v1 - -DECLARE $param1 AS JsonDocument; -DECLARE $param2 AS JsonDocument; - -SELECT $param1, $param2;
\ No newline at end of file +--!syntax_v1 + +DECLARE $param1 AS JsonDocument; +DECLARE $param2 AS JsonDocument; + +SELECT $param1, $param2;
\ No newline at end of file diff --git a/ydb/tests/functional/canonical/sql/json/test_config.json b/ydb/tests/functional/canonical/sql/json/test_config.json index f126be8837..515df92ae6 100644 --- a/ydb/tests/functional/canonical/sql/json/test_config.json +++ b/ydb/tests/functional/canonical/sql/json/test_config.json @@ -1,25 +1,25 @@ -{ - "insert_from_table.sql": { - "compare_tables": ["ResultTableJD"] - }, - "insert_literal.sql": { - "compare_tables": ["ResultLiteralJD"] - }, - "insert_params.sql": { - "compare_tables": ["ResultParamsJD"], - "parameters": { - "$data": [ - {"Key": 1, "Value": "{\"name\": \"Jake\", \"age\": 50}"}, - {"Key": 2, "Value": "{\"name\": \"Jessica\", \"age\": 35}"}, - {"Key": 3, "Value": "{\"name\": \"Tom\", \"age\": 3}"} - ] - } - }, - "select_params.sql": { +{ + "insert_from_table.sql": { + "compare_tables": ["ResultTableJD"] + }, + "insert_literal.sql": { + "compare_tables": ["ResultLiteralJD"] + }, + "insert_params.sql": { + "compare_tables": ["ResultParamsJD"], + "parameters": { + "$data": [ + {"Key": 1, "Value": "{\"name\": \"Jake\", \"age\": 50}"}, + {"Key": 2, "Value": "{\"name\": \"Jessica\", \"age\": 35}"}, + {"Key": 3, "Value": "{\"name\": \"Tom\", \"age\": 3}"} + ] + } + }, + "select_params.sql": { "check_scan_query": true, - "parameters": { - "$param1": "[1, 2, 3]", - "$param2": "{\"name\": \"Tass\"}" - } - } -} + "parameters": { + "$param1": "[1, 2, 3]", + "$param2": "{\"name\": \"Tass\"}" + } + } +} diff --git a/ydb/tests/functional/canonical/test_sql.py b/ydb/tests/functional/canonical/test_sql.py index af5cdc2b9c..b6302c41a7 100644 --- a/ydb/tests/functional/canonical/test_sql.py +++ b/ydb/tests/functional/canonical/test_sql.py @@ -307,29 +307,29 @@ class BaseCanonicalTest(object): ) ) - def do_execute(self, session, query, parameters=None): - if parameters is None: - parameters = {} - - prepared_query = session.prepare(query) - result_sets = session.transaction(ydb.SerializableReadWrite()).execute( - prepared_query, - commit_tx=True, - parameters=parameters, - ) + def do_execute(self, session, query, parameters=None): + if parameters is None: + parameters = {} + + prepared_query = session.prepare(query) + result_sets = session.transaction(ydb.SerializableReadWrite()).execute( + prepared_query, + commit_tx=True, + parameters=parameters, + ) return result_sets @pretty_error_report - def serializable_execute(self, query, parameters=None): - if parameters is None: - parameters = {} - + def serializable_execute(self, query, parameters=None): + if parameters is None: + parameters = {} + return self.wrap_result_sets( self.pool.retry_operation_sync( lambda session: self.do_execute( session, query, - parameters, + parameters, ) ) ) @@ -370,11 +370,11 @@ class BaseCanonicalTest(object): def wrap_rows(self, columns, rows): return [ - { - column.name: self.wrap_value(row[column.name]) - for column in columns - } - for row in rows + { + column.name: self.wrap_value(row[column.name]) + for column in columns + } + for row in rows ] def wrap_result_set(self, result_set): @@ -390,7 +390,7 @@ class BaseCanonicalTest(object): if not os.path.exists(cfg_json): return {} cfg = json.loads(self.read_query_text(cfg_json)) - return cfg.get(fl, {}) + return cfg.get(fl, {}) def read_table(self, table): fpath = os.path.join(self.prefix, table) @@ -499,7 +499,7 @@ class BaseCanonicalTest(object): assert self.pretty_json(self.format_plan(plan)) == self.pretty_json(self.format_plan(new_engine_plan)) elif kind == 'result_sets': - result_sets = self.serializable_execute(query, config.get('parameters', {})) + result_sets = self.serializable_execute(query, config.get('parameters', {})) canons['result_sets'] = self.canonical_results(query_name, self.pretty_json(result_sets)) check_scan_query = config.get('check_scan_query', False) |